apache
diff --git a/‎.build/run-tests.sh‎
Lines changed: 5 additions & 3 deletions b/‎.build/run-tests.sh‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎CHANGES.txt‎
Lines changed: 10 additions & 0 deletions b/‎CHANGES.txt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎NEWS.txt‎
Lines changed: 39 additions & 0 deletions b/‎NEWS.txt‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎conf/cassandra.yaml‎
Lines changed: 173 additions & 0 deletions b/‎conf/cassandra.yaml‎
Lines changed: 173 additions & 0 deletions
@@ -188,21 +188,23 @@ _build_all_dtest_jars() {
     if [ -d ${TMP_DIR}/cassandra-dtest-jars/.git ] && [ "https://github.com/apache/cassandra.git" == "$(git -C ${TMP_DIR}/cassandra-dtest-jars remote get-url origin)" ] ; then
       echo "Reusing ${TMP_DIR}/cassandra-dtest-jars for past branch dtest jars"
       if [ "x" == "x${OFFLINE}" ] ; then
-        until git -C ${TMP_DIR}/cassandra-dtest-jars fetch --quiet origin ; do echo "git -C ${TMP_DIR}/cassandra-dtest-jars fetch failed… trying again… " ; done
+        until git -C ${TMP_DIR}/cassandra-dtest-jars fetch --quiet --tags origin ; do echo "git -C ${TMP_DIR}/cassandra-dtest-jars fetch failed… trying again… " ; done
       fi
     else
         echo "Cloning cassandra to ${TMP_DIR}/cassandra-dtest-jars for past branch dtest jars"
         rm -fR ${TMP_DIR}/cassandra-dtest-jars
         pushd $TMP_DIR >/dev/null
-        until git clone --quiet --depth 1 --no-single-branch https://github.com/apache/cassandra.git cassandra-dtest-jars ; do echo "git clone failed… trying again… " ; done
+        until git clone --quiet --depth 1 --no-single-branch --tags https://github.com/apache/cassandra.git cassandra-dtest-jars ; do echo "git clone failed… trying again… " ; done
         popd >/dev/null
     fi
 
     # cassandra-4 branches need CASSANDRA_USE_JDK11 to allow jdk11
     [ "${java_version}" -eq 11 ] && export CASSANDRA_USE_JDK11=true
 
     pushd ${TMP_DIR}/cassandra-dtest-jars >/dev/null
-    for branch in cassandra-4.0 cassandra-4.1 cassandra-5.0 ; do
+    # Note: cassandra-5.0.7 tag is used instead of cassandra-5.0 branch to enable
+    # testing upgrades from 5.0.7 to the current local build for autorepair feature
+    for branch in cassandra-4.0 cassandra-4.1 cassandra-5.0.7 ; do
         git clean -qxdff && git reset --hard HEAD  || echo "failed to reset/clean ${TMP_DIR}/cassandra-dtest-jars… continuing…"
         git checkout --quiet $branch
         dtest_jar_version=$(grep 'property\s*name=\"base.version\"' build.xml |sed -ne 's/.*value=\"\([^"]*\)\".*/\1/p')
 
@@ -1,8 +1,18 @@
 5.0.8
+ * Backport Automated Repair Inside Cassandra for CEP-37 (CASSANDRA-21138)
  * Update cassandra-stress to support TLS 1.3 by default by auto-negotiation (CASSANDRA-21007)
  * Ensure schema created before 2.1 without tableId in folder name can be loaded in SnapshotLoader (CASSANDRA-21173)
 Merged from 4.1:
 Merged from 4.0:
+Backported from 6.0:
+ * Improved observability in AutoRepair to report both expected vs. actual repair bytes and expected vs. actual keyspaces (CASSANDRA-20581)
+ * Stop repair scheduler if two major versions are detected (CASSANDRA-20048)
+ * AutoRepair: Safeguard Full repair against disk protection (CASSANDRA-20045)
+ * Stop AutoRepair monitoring thread upon Cassandra shutdown (CASSANDRA-20623)
+ * Fix race condition in auto-repair scheduler (CASSANDRA-20265)
+ * Implement minimum repair task duration setting for auto-repair scheduler (CASSANDRA-20160)
+ * Implement preview_repaired auto-repair type (CASSANDRA-20046)
+ * Automated Repair Inside Cassandra for CEP-37 (CASSANDRA-19918)
 
 
 5.0.7
 
@@ -65,6 +65,45 @@ restore snapshots created with the previous major version using the
 'sstableloader' tool. You can upgrade the file format of your snapshots
 using the provided 'sstableupgrade' tool.
 
+5.0.8
+======
+
+New features
+------------
+    - CEP-37 Auto Repair is a fully automated scheduler that provides repair orchestration within Apache Cassandra. This
+    significantly reduces operational overhead by eliminating the need for operators to deploy external tools to submit
+    and manage repairs. See
+    https://cwiki.apache.org/confluence/display/CASSANDRA/CEP-37+Apache+Cassandra+Unified+Repair+Solution for more
+    details on the motivation and design.
+
+Upgrading
+---------
+    - The auto-repair feature requires enabling the JVM property `cassandra.autorepair.enable=true` (add
+      `-Dcassandra.autorepair.enable=true` to JVM options) before starting the node. This property creates the required
+      schema elements for auto-repair, including the auto_repair column in system_schema.tables and system_schema.views,
+      as well as the auto_repair_history and auto_repair_priority tables in system_distributed. After enabling this
+      property, you still need to enable auto-repair scheduling either in cassandra.yaml under the `auto_repair` section
+      or at runtime via JMX.
+
+      Users who do not intend to use auto-repair can leave this property disabled (the default) to maintain schema
+      compatibility with pre-5.0.8 nodes during rolling upgrades. This property must be set consistently across all
+      nodes before startup and cannot be changed at runtime.
+
+      WARNING: This property is non-reversible. Once enabled, it cannot be disabled. Attempting to start a node
+      with `cassandra.autorepair.enable=false` after it was previously enabled will cause the node to fail during
+      initialization due to schema incompatibility (the persisted schema contains auto-repair columns that are not
+      recognized when the property is disabled). To disable auto-repair scheduling after the property has been
+      enabled, use cassandra.yaml or JMX instead of changing the JVM property.
+
+      IMPORTANT: The `cassandra.autorepair.enable` property must be enabled consistently across all nodes in the
+      cluster before any schema changes are made. When some nodes have the property enabled and others do not, the
+      system_distributed keyspace schema generation will differ between nodes (generation 7 with auto-repair vs
+      generation 6 without), causing schema disagreement. This is similar to what happens during a major version
+      upgrade when new system tables are added. Any schema change (e.g. CREATE KEYSPACE) attempted while nodes
+      are in this inconsistent state will time out and schema versions will not converge until all nodes are
+      brought up with the same setting. Once all nodes have the property set consistently, schema will converge
+      automatically.
+
 5.0.7
 ======
 
 
@@ -1951,6 +1951,13 @@ report_unconfirmed_repaired_data_mismatches: false
 # Materialized views are considered experimental and are not recommended for production use.
 materialized_views_enabled: false
 
+# Specify whether Materialized View mutations are replayed through the write path on streaming, e.g. repair.
+# When enabled, Materialized View data streamed to the destination node will be written into commit log first. When setting to false,
+# the streamed Materialized View data is written into SSTables just the same as normal streaming. The default is true.
+# If this is set to false, streaming will be considerably faster however it's possible that, in extreme situations
+# (losing > quorum # nodes in a replica set), you may have data in your SSTables that never makes it to the Materialized View.
+# materialized_views_on_repair_enabled: true
+
 # Enables SASI index creation on this node.
 # SASI indexes are considered experimental and are not recommended for production use.
 sasi_indexes_enabled: false
@@ -2253,6 +2260,7 @@ drop_compact_storage_enabled: false
 #    excluded_keyspaces: # comma separated list of keyspaces to exclude from the check
 #    excluded_tables: # comma separated list of keyspace.table pairs to exclude from the check
 
+
 # This property indicates with what Cassandra major version the storage format will be compatible with.
 #
 # The chosen storage compatibility mode will determine the versions of the written sstables, commitlogs, hints, etc.
@@ -2281,3 +2289,168 @@ drop_compact_storage_enabled: false
 #   compatibility mode would no longer toggle behaviors as when it was running in the UPGRADING mode.
 #
 storage_compatibility_mode: CASSANDRA_4
+
+
+# Prevents preparing a repair session or beginning a repair streaming session if pending compactions is over
+# the given value.  Defaults to disabled.
+# reject_repair_compaction_threshold: 1024
+
+# Ratio of disk that must be unused to run repair. It is useful to avoid disks filling up during
+# repair as anti-compaction during repair may contribute to additional space temporarily.
+# For example, setting this to 0.2 means at least 20% of disk must be unused.
+# Set to 0.0 to disable this check. Defaults to 0.0 (disabled) on 5.0 for backward-compatibility.
+# repair_disk_headroom_reject_ratio: 0.0
+
+# Configuration for Auto Repair Scheduler.
+#
+# This feature is disabled by default.
+#
+# NOTE: The auto-repair feature requires enabling the JVM property `cassandra.autorepair.enable=true`.
+#
+# See: https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html for an overview of this
+# feature.
+#
+# auto_repair:
+#   # Enable/Disable the auto-repair scheduler.
+#   # If set to false, the scheduler thread will not be started.
+#   # If set to true, the repair scheduler thread will be created. The thread will
+#   # check for secondary configuration available for each repair type (full, incremental,
+#   # and preview_repaired), and based on that, it will schedule repairs.
+#   enabled: true
+#   repair_type_overrides:
+#     full:
+#       # Enable/Disable full auto-repair
+#       enabled: true
+#       # Minimum duration between repairing the same node again. This is useful for tiny clusters,
+#       # such as clusters with 5 nodes that finish repairs quickly. This means that if the scheduler completes one
+#       # round on all nodes in less than this duration, it will not start a new repair round on a given node until
+#       # this much time has passed since the last repair completed. Consider increasing to a larger value to reduce
+#       # the impact of repairs, however note that one should attempt to run repairs at a smaller interval than
+#       # gc_grace_seconds to avoid potential data resurrection.
+#       min_repair_interval: 24h
+#       token_range_splitter:
+#         # Implementation of IAutoRepairTokenRangeSplitter; responsible for splitting token ranges
+#         # for repair assignments.
+#         #
+#         # Out of the box, Cassandra provides org.apache.cassandra.repair.autorepair.{RepairTokenRangeSplitter,
+#         # FixedTokenRangeSplitter}.
+#         #
+#         # - RepairTokenRangeSplitter (default) attempts to intelligently split ranges based on data size and partition
+#         #   count.
+#         # - FixedTokenRangeSplitter splits into fixed ranges based on the 'number_of_subranges' option.
+#         # class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter
+#
+#         # Optional parameters can be specified in the form of:
+#         #   parameters:
+#         #    param_key1: param_value1
+#         parameters:
+#           # The target and maximum amount of compressed bytes that should be included in a repair assignment.
+#           # This scopes the amount of work involved in a repair and includes the data covering the range being
+#           # repaired.
+#           bytes_per_assignment: 50GiB
+#           # The maximum number of bytes to cover in an individual schedule. This serves as
+#           # a mechanism to throttle the work done in each repair cycle. You may reduce this
+#           # value if the impact of repairs is causing too much load on the cluster or increase it
+#           # if writes outpace the amount of data being repaired. Alternatively, adjust the
+#           # min_repair_interval.
+#           # This is set to a large value for full repair to attempt to repair all data per repair schedule.
+#           max_bytes_per_schedule: 100000GiB
+#     incremental:
+#       enabled: false
+#       # Incremental repairs operate over unrepaired data and should finish quickly. Running incremental repair
+#       # frequently keeps the unrepaired set smaller and thus causes repairs to operate over a smaller set of data,
+#       # so a more frequent schedule such as 1h is recommended.
+#       # NOTE: Please consult
+#       # https://cassandra.apache.org/doc/latest/cassandra/managing/operating/auto_repair.html#enabling-ir
+#       # for guidance on enabling incremental repair on ane exiting cluster.
+#       min_repair_interval: 24h
+#       token_range_splitter:
+#         parameters:
+#           # Configured to attempt repairing 50GiB of compressed data per repair.
+#           # This throttles the amount of incremental repair and anticompaction done per schedule after incremental
+#           # repairs are turned on.
+#           bytes_per_assignment: 50GiB
+#           # Restricts the maximum number of bytes to cover in an individual schedule to the configured
+#           # max_bytes_per_schedule value (defaults to 100GiB for incremental).
+#           # Consider increasing this value if more data is written than this limit within the min_repair_interval.
+#           max_bytes_per_schedule: 100GiB
+#     preview_repaired:
+#       # Performs preview repair over repaired SSTables, useful to detect possible inconsistencies in the repaired
+#       # data set.
+#       enabled: false
+#       min_repair_interval: 24h
+#       token_range_splitter:
+#         parameters:
+#           bytes_per_assignment: 50GiB
+#           max_bytes_per_schedule: 100000GiB
+#   # Time interval between successive checks to see if ongoing repairs are complete or if it is time to schedule
+#   # repairs.
+#   repair_check_interval: 5m
+#   # Minimum duration for the execution of a single repair task. This prevents the scheduler from overwhelming
+#   # the node by scheduling too many repair tasks in a short period of time.
+#   repair_task_min_duration: 5s
+#   # The scheduler needs to adjust its order when nodes leave the ring. Deleted hosts are tracked in metadata
+#   # for a specified duration to ensure they are indeed removed before adjustments are made to the schedule.
+#   history_clear_delete_hosts_buffer_interval: 2h
+#   # By default repair is disabled if there are mixed major versions detected - which would happen
+#   # if a major version upgrade is being performed on the cluster, but a user can enable it using this flag
+#   mixed_major_version_repair_enabled: false
+#   # NOTE: Each of the below settings can be overridden per repair type under repair_type_overrides
+#   global_settings:
+#     # If true, attempts to group tables in the same keyspace into one repair; otherwise, each table is repaired
+#     # individually.
+#     repair_by_keyspace: true
+#     # Number of threads to use for each repair job scheduled by the scheduler. Similar to the -j option in nodetool
+#     # repair.
+#     number_of_repair_threads: 1
+#     # Number of nodes running repair in parallel. If parallel_repair_percentage is set, the larger value is used.
+#     parallel_repair_count: 3
+#     # Percentage of nodes in the cluster running repair in parallel. If parallel_repair_count is set, the larger value
+#     # is used.
+#     parallel_repair_percentage: 3
+#     # Whether to allow a node to take its turn running repair while one or more of its replicas are running repair.
+#     # Defaults to false, as running repairs concurrently on replicas can increase load and also cause anticompaction
+#     # conflicts while running incremental repair.
+#     allow_parallel_replica_repair: false
+#     # An addition to allow_parallel_replica_repair that also blocks repairs when replicas (including this node itself)
+#     # are repairing in any schedule. For example, if a replica is executing full repairs, a value of false will
+#     # prevent starting incremental repairs for this node. Defaults to true and is only evaluated when
+#     # allow_parallel_replica_repair is false.
+#     allow_parallel_replica_repair_across_schedules: true
+#     # Repairs materialized views if true.
+#     materialized_view_repair_enabled: false
+#     # Delay before starting repairs after a node restarts to avoid repairs starting immediately after a restart.
+#     initial_scheduler_delay: 5m
+#     # Timeout for retrying stuck repair sessions.
+#     repair_session_timeout: 3h
+#     # Force immediate repair on new nodes after they join the ring.
+#     force_repair_new_node: false
+#     # Threshold to skip repairing tables with too many SSTables. Defaults to 10,000 SSTables to avoid penalizing good
+#     # tables.
+#     sstable_upper_threshold: 50000
+#     # Maximum time allowed for repairing one table on a given node. If exceeded, the repair proceeds to the
+#     # next table.
+#     table_max_repair_time: 6h
+#     # Avoid running repairs in specific data centers. By default, repairs run in all data centers. Specify data
+#     # centers to exclude in this list. Note that repair sessions will still consider all replicas from excluded
+#     # data centers. Useful if you have keyspaces that are not replicated in certain data centers, and you want to
+#     # not run repair schedule in certain data centers.
+#     ignore_dcs: []
+#     # Repair only the primary ranges owned by a node. Equivalent to the -pr option in nodetool repair. Defaults
+#     # to true. General advice is to keep this true.
+#     repair_primary_token_range_only: true
+#     # Maximum number of retries for a repair session.
+#     repair_max_retries: 3
+#     # Backoff time before retrying a repair session.
+#     repair_retry_backoff: 30s
+#     token_range_splitter:
+#       # Splitter implementation to generate repair assignments. Defaults to RepairTokenRangeSplitter.
+#       class_name: org.apache.cassandra.repair.autorepair.RepairTokenRangeSplitter
+#       parameters:
+#         # Maximum number of partitions to include in a repair assignment. Used to reduce number of partitions
+#         # present in merkle tree leaf nodes to avoid overstreaming.
+#         partitions_per_assignment: 1048576
+#         # Maximum number of tables to include in a repair assignment. This reduces the number of repairs,
+#         # especially in keyspaces with many tables. The splitter avoids batching tables together if they
+#         # exceed other configuration parameters like bytes_per_assignment or partitions_per_assignment.
+#         max_tables_per_assignment: 64