apache · ericm-db · Feb 11, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 26, 2026
diff --git a/...r/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/...r/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -514,7 +514,8 @@ object KafkaMicroBatchStream extends Logging {
       latestAvailablePartitionOffsets: Option[PartitionOffsetMap]): ju.Map[String, String] = {
     val offset = Option(latestConsumedOffset.orElse(null))
 
-    if (offset.nonEmpty && latestAvailablePartitionOffsets.isDefined) {
+    if (offset.nonEmpty && latestAvailablePartitionOffsets.isDefined &&
+        latestAvailablePartitionOffsets.get != null) {
       val consumedPartitionOffsets = offset.map(KafkaSourceOffset(_)).get.partitionToOffsets
       val offsetsBehindLatest = latestAvailablePartitionOffsets.get
         .map(partitionOffset => partitionOffset._2 -

diff --git a/...ka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/...ka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.checkpointing.OffsetSeqBase
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
-import org.apache.spark.sql.execution.streaming.runtime.{MicroBatchExecution, StreamExecution, StreamingExecutionRelation}
+import org.apache.spark.sql.execution.streaming.runtime.{MicroBatchExecution, StreamExecution, StreamingExecutionRelation, StreamingQueryWrapper}
 import org.apache.spark.sql.execution.streaming.runtime.AsyncProgressTrackingMicroBatchExecution.{ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, ASYNC_PROGRESS_TRACKING_ENABLED}
 import org.apache.spark.sql.functions.{count, expr, window}
 import org.apache.spark.sql.internal.SQLConf
@@ -1589,6 +1589,272 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with
       q.stop()
     }
   }
+
+  test("sequential union: bounded Kafka then live Kafka with watermarking " +
+    "and dropDuplicatesWithinWatermark") {
+    import testImplicits._
+
+    withSQLConf(
+      SQLConf.STREAMING_OFFSET_LOG_FORMAT_VERSION.key -> "2",
+      SQLConf.ENABLE_STREAMING_SOURCE_EVOLUTION.key -> "true") {
+
+      withTempDir { checkpointDir =>
+        withTempDir { outputDir =>
+          val checkpointLocation = checkpointDir.getCanonicalPath
+          val outputPath = outputDir.getCanonicalPath
+
+          val now = System.currentTimeMillis()
+
+          // Setup: First Kafka topic with historical data at older timestamps
+          val topic1 = newTopic()
+          testUtils.createTopic(topic1, partitions = 3)
+          val historicalData = (0 until 20).map { i =>
+            // Create JSON with timestamp field
+            s"""{"key":"key_$i","value":"key_$i:historical","timestamp":${now + i * 1000}}"""
+          }
+          testUtils.sendMessages(topic1, historicalData.toArray)
+
+          // Setup: Second Kafka topic with live data at newer timestamps
+          val topic2 = newTopic()
+          testUtils.createTopic(topic2, partitions = 3)
+          val liveData = (0 until 20).map { i =>
+            // Same keys but newer timestamps (later) - will be deduplicated
+            s"""{"key":"key_$i","value":"key_$i:live","timestamp":${now + 20000 + i * 1000}}"""
+          }
+          testUtils.sendMessages(topic2, liveData.toArray)
+
+          // Build sequential union query
+          val historicalStream = spark.readStream
+            .format("kafka")
+            .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+            .option("subscribe", topic1)
+            .option("startingOffsets", "earliest")
+            .name("historical_kafka")
+            .load()
+            .selectExpr("CAST(value AS STRING) AS json")
+            .selectExpr(
+              "get_json_object(json, '$.key') as key",
+              "get_json_object(json, '$.value') as value",
+              "CAST(get_json_object(json, '$.timestamp') AS LONG) as event_time")
+            .selectExpr("key", "value", "CAST(event_time AS TIMESTAMP) as event_time")
+
+          val liveStream = spark.readStream
+            .format("kafka")
+            .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+            .option("subscribe", topic2)
+            .option("startingOffsets", "earliest")
+            .name("live_kafka")
+            .load()
+            .selectExpr("CAST(value AS STRING) AS json")
+            .selectExpr(
+              "get_json_object(json, '$.key') as key",
+              "get_json_object(json, '$.value') as value",
+              "CAST(get_json_object(json, '$.timestamp') AS LONG) as event_time")
+            .selectExpr("key", "value", "CAST(event_time AS TIMESTAMP) as event_time")
+
+          // Use followedBy to create sequential union, then apply watermark and deduplication
+          val sequential = historicalStream
+            .followedBy(liveStream)
+            .withWatermark("event_time", "10 seconds")
+            .dropDuplicatesWithinWatermark("key")
+
+          // Write to Parquet sink
+          val query = sequential.writeStream
+            .outputMode("append")
+            .format("parquet")
+            .option("checkpointLocation", checkpointLocation)
+            .option("path", outputPath)
+            .trigger(Trigger.AvailableNow)
+            .start()
+
+          query.awaitTermination()
+
+          // Verify results
+          val result = spark.read.parquet(outputPath)
+            .select("key", "value")
+            .as[(String, String)]
+            .collect()
+            .toSeq
+
+          // We have 20 unique keys (key_0 through key_19)
+          val uniqueKeys = result.map(_._1).toSet
+          assert(uniqueKeys.size == 20,
+            s"Expected 20 unique keys but got ${uniqueKeys.size}")
+
+          // CRITICAL: Verify deduplication - all values should have ":historical" suffix
+          // because historical data was processed first and live duplicates were dropped
+          val allHistorical = result.forall(_._2.endsWith(":historical"))
+          assert(allHistorical,
+            s"Expected all values to be from historical source (first in sequence), " +
+            s"but found live values: ${result.filter(!_._2.endsWith(":historical"))}")
+
+          // Each key should appear exactly once
+          val keyCounts = result.groupBy(_._1).view.mapValues(_.size).toMap
+          keyCounts.foreach { case (key, count) =>
+            assert(count == 1,
+              s"Key $key appeared $count times, expected 1 (deduplication should keep first)")
+          }
+
+          // Verify sequential processing by examining the query execution
+          // The key verification is that deduplication worked correctly,
+          // which proves state continuity across the sequential transition.
+
+          // Additional verification: check that SequentialUnionExecution was used
+          val executionClass = query.asInstanceOf[StreamingQueryWrapper]
+            .streamingQuery.getClass.getName
+
+          // For now, we mainly verify correctness through the output data:
+          // - All 20 keys present (completeness)
+          // - All values from historical source (sequential ordering + dedup state continuity)
+          // - Each key exactly once (deduplication worked)
+          // These properties together prove sequential execution with state sharing
+        }
+      }
+    }
+  }
+
+  test("sequential union: union child followed by single source") {
+    import testImplicits._
+
+    withSQLConf(
+      SQLConf.STREAMING_OFFSET_LOG_FORMAT_VERSION.key -> "2",
+      SQLConf.ENABLE_STREAMING_SOURCE_EVOLUTION.key -> "true") {
+
+      withTempDir { checkpointDir =>
+        withTempDir { outputDir =>
+          val checkpointLocation = checkpointDir.getCanonicalPath
+          val outputPath = outputDir.getCanonicalPath
+
+          // Setup: Two Kafka topics for the first child (union)
+          val topic1 = newTopic()
+          testUtils.createTopic(topic1, partitions = 2)
+          val data1 = (0 until 10).map(i => s"topic1_$i")
+          testUtils.sendMessages(topic1, data1.toArray)
+
+          val topic2 = newTopic()
+          testUtils.createTopic(topic2, partitions = 2)
+          val data2 = (0 until 10).map(i => s"topic2_$i")
+          testUtils.sendMessages(topic2, data2.toArray)
+
+          // Setup: Third Kafka topic for the second child
+          val topic3 = newTopic()
+          testUtils.createTopic(topic3, partitions = 2)
+          val data3 = (0 until 10).map(i => s"topic3_$i")
+          testUtils.sendMessages(topic3, data3.toArray)
+
+          // Track write order with timestamps
+          var writeOrder = scala.collection.mutable.ListBuffer[(String, Long)]()
+
+          // Build first child as union of two sources
+          // Use maxOffsetsPerTrigger to force multiple batches
+          val stream1 = spark.readStream
+            .format("kafka")
+            .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+            .option("subscribe", topic1)
+            .option("startingOffsets", "earliest")
+            .option("maxOffsetsPerTrigger", "3")
+            .name("union_source_1")
+            .load()
+            .selectExpr("CAST(value AS STRING) AS value", "timestamp")
+
+          val stream2 = spark.readStream
+            .format("kafka")
+            .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+            .option("subscribe", topic2)
+            .option("startingOffsets", "earliest")
+            .option("maxOffsetsPerTrigger", "3")
+            .name("union_source_2")
+            .load()
+            .selectExpr("CAST(value AS STRING) AS value", "timestamp")
+
+          val unionChild = stream1.union(stream2)
+
+          // Build second child as single source
+          val stream3 = spark.readStream
+            .format("kafka")
+            .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+            .option("subscribe", topic3)
+            .option("startingOffsets", "earliest")
+            .option("maxOffsetsPerTrigger", "3")
+            .name("sequential_source")
+            .load()
+            .selectExpr("CAST(value AS STRING) AS value", "timestamp")
+
+          // Sequential union: union child followed by single source
+          val sequential = unionChild.followedBy(stream3)
+
+          // Write to Parquet sink with batch tracking
+          val query = sequential.writeStream
+            .outputMode("append")
+            .format("parquet")
+            .option("checkpointLocation", checkpointLocation)
+            .option("path", outputPath)
+            .foreachBatch { (batchDF: Dataset[Row], batchId: Long) =>
+              // Track which topics appear in which batch
+              val batchValues = batchDF.select("value").as[String].collect()
+              batchValues.foreach { v =>
+                writeOrder += ((v, batchId))
+              }
+              // Write to parquet
+              batchDF.write.mode("append").parquet(outputPath)
+            }
+            .trigger(Trigger.AvailableNow)
+            .start()
+
+          query.awaitTermination()
+
+          // Debug: Check if SequentialUnionExecution was used
+          val executionClass = query.asInstanceOf[StreamingQueryWrapper]
+            .streamingQuery.getClass.getName
+          println(s"[DEBUG] Execution class: $executionClass")
+          println(s"[DEBUG] Write order: ${writeOrder.toList}")
+
+          // Verify results
+          val result = spark.read.parquet(outputPath)
+            .select("value")
+            .as[String]
+            .collect()
+            .toSet
+
+          // Should have all 30 values (10 from each topic)
+          assert(result.size == 30,
+            s"Expected 30 values but got ${result.size}")
+
+          // Verify all expected values are present
+          val expected = data1.toSet ++ data2.toSet ++ data3.toSet
+          assert(result == expected,
+            s"Result mismatch. Missing: ${expected.diff(result)}, Extra: ${result.diff(expected)}")
+
+          // CRITICAL: Verify ordering semantics
+          val topic1Batches = writeOrder.filter(_._1.startsWith("topic1_")).map(_._2).toSet
+          val topic2Batches = writeOrder.filter(_._1.startsWith("topic2_")).map(_._2).toSet
+          val topic3Batches = writeOrder.filter(_._1.startsWith("topic3_")).map(_._2).toSet
+
+          // 1. Verify topic1 and topic2 are interleaved (processed concurrently in union)
+          val unionBatches = topic1Batches ++ topic2Batches
+          val hasInterleavedData = topic1Batches.intersect(topic2Batches).nonEmpty
+          assert(hasInterleavedData,
+            s"Expected topic1 and topic2 to be interleaved in same batches, " +
+            s"but topic1 batches=$topic1Batches, topic2 batches=$topic2Batches")
+
+          // 2. Verify topic3 only starts AFTER both topic1 and topic2 are exhausted
+          val maxUnionBatch = if (unionBatches.nonEmpty) unionBatches.max else -1L
+          val minTopic3Batch = if (topic3Batches.nonEmpty) topic3Batches.min else Long.MaxValue
+
+          assert(minTopic3Batch > maxUnionBatch,
+            s"Sequential order violated: topic3 first batch ($minTopic3Batch) should be " +
+            s"after union last batch ($maxUnionBatch). " +
+            s"Union batches: $unionBatches, Topic3 batches: $topic3Batches")
+
+          // 3. Verify all topic3 data comes after all union data
+          val allTopic3AfterUnion = topic3Batches.forall(_ > maxUnionBatch)
+          assert(allTopic3AfterUnion,
+            s"All topic3 batches should be after union batches. " +
+            s"Union max: $maxUnionBatch, Topic3 batches: $topic3Batches")
+        }
+      }
+    }
+  }
 }
 
 abstract class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1960,6 +1960,27 @@ abstract class Dataset[T] extends Serializable {
    */
   def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T]
 
+  /**
+   * Returns a Dataset containing rows from this Dataset followed sequentially by rows from
+   * another Dataset. Unlike `union` which processes both datasets concurrently, this method
+   * processes this Dataset completely before starting the other Dataset.
+   *
+   * This is useful for scenarios like processing historical data followed by live streaming data.
+   * For example:
+   * {{{
+   *   val historical = spark.readStream.format("parquet").load("/historical-data")
+   *   val live = spark.readStream.format("kafka").option("subscribe", "events").load()
+   *   val sequential = historical.followedBy(live)
+   *   // Processes all historical data first, then transitions to live Kafka
+   * }}}
+   *
+   * @param other Another Dataset to append after this one completes
+   * @return A new Dataset with sequential union semantics
+   * @group typedrel
+   * @since 4.0.0
+   */
+  def followedBy(other: Dataset[T]): Dataset[T]
+
   /**
    * Returns a new Dataset containing rows only in both this Dataset and another Dataset. This is
    * equivalent to `INTERSECT` in SQL.

diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/Dataset.scala
@@ -675,6 +675,12 @@ class Dataset[T] private[sql] (
     }
   }
 
+  /** @inheritdoc */
+  def followedBy(other: sql.Dataset[T]): Dataset[T] = {
+    throw new UnsupportedOperationException(
+      "followedBy is not yet supported in Spark Connect")
+  }
+
   /** @inheritdoc */
   def intersect(other: sql.Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT) { builder =>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/Dataset.scala
@@ -1167,6 +1167,12 @@ class Dataset[T] private[sql](
     combineUnions(Union(logicalPlan, other.logicalPlan))
   }
 
+  /** @inheritdoc */
+  def followedBy(other: sql.Dataset[T]): Dataset[T] = withSetOperator {
+    SequentialStreamingUnion(logicalPlan :: other.logicalPlan :: Nil, byName = false,
+      allowMissingCol = false)
+  }
+
   /** @inheritdoc */
   def unionByName(other: sql.Dataset[T], allowMissingColumns: Boolean): Dataset[T] = {
     withSetOperator {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/StreamingQueryManager.scala
@@ -29,12 +29,13 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.LogKeys.{CLASS_NAME, QUERY_ID, RUN_ID}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.plans.logical.SequentialStreamingUnion
 import org.apache.spark.sql.catalyst.streaming.{WriteToStream, WriteToStreamStatement}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
-import org.apache.spark.sql.execution.streaming.runtime.{AsyncProgressTrackingMicroBatchExecution, MicroBatchExecution, StreamingQueryListenerBus, StreamingQueryWrapper}
+import org.apache.spark.sql.execution.streaming.runtime.{AsyncProgressTrackingMicroBatchExecution, MicroBatchExecution, SequentialUnionExecution, StreamingQueryListenerBus, StreamingQueryWrapper}
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
@@ -222,6 +223,12 @@ class StreamingQueryManager private[sql] (
       sparkSession.sessionState.executePlan(dataStreamWritePlan).analyzed
         .asInstanceOf[WriteToStream]
 
+    // Detect if the query contains a SequentialStreamingUnion
+    val hasSequentialUnion = analyzedStreamWritePlan.inputQuery.exists {
+      case _: SequentialStreamingUnion => true
+      case _ => false
+    }
+
     (sink, trigger) match {
       case (_: SupportsWrite, trigger: ContinuousTrigger) =>
         new StreamingQueryWrapper(new ContinuousExecution(
@@ -231,7 +238,15 @@ class StreamingQueryManager private[sql] (
           extraOptions,
           analyzedStreamWritePlan))
       case _ =>
-        val microBatchExecution = if (useAsyncProgressTracking(extraOptions)) {
+        val microBatchExecution = if (hasSequentialUnion) {
+          // Use SequentialUnionExecution for queries with sequential union
+          new SequentialUnionExecution(
+            sparkSession,
+            trigger,
+            triggerClock,
+            extraOptions,
+            analyzedStreamWritePlan)
+        } else if (useAsyncProgressTracking(extraOptions)) {
           if (trigger.isInstanceOf[RealTimeTrigger]) {
             throw new SparkIllegalArgumentException(
               errorClass = "STREAMING_REAL_TIME_MODE.ASYNC_PROGRESS_TRACKING_NOT_SUPPORTED"