From dc92ebe282601c4ac03d67ec6ff2d79ebbdf7ff3 Mon Sep 17 00:00:00 2001 From: Zhuo Wang Date: Tue, 10 Feb 2026 17:07:26 +0800 Subject: [PATCH] feat: Add incremental scan API with IncrementalAppendScan and IncrementalChangelogScan --- src/iceberg/table.cc | 16 ++- src/iceberg/table.h | 12 +- src/iceberg/table_scan.cc | 198 +++++++++++++++++++++------- src/iceberg/table_scan.h | 122 +++++++++++++---- src/iceberg/test/table_scan_test.cc | 46 +++---- src/iceberg/type_fwd.h | 11 ++ 6 files changed, 310 insertions(+), 95 deletions(-) diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc index 0550f61d5..72190855f 100644 --- a/src/iceberg/table.cc +++ b/src/iceberg/table.cc @@ -149,8 +149,18 @@ Result> Table::location_provider() const { return LocationProvider::Make(metadata_->location, metadata_->properties); } -Result> Table::NewScan() const { - return TableScanBuilder::Make(metadata_, io_); +Result> Table::NewScan() const { + return DataTableScanBuilder::Make(metadata_, io_); +} + +Result> Table::NewIncrementalAppendScan() + const { + return IncrementalAppendScanBuilder::Make(metadata_, io_); +} + +Result> +Table::NewIncrementalChangelogScan() const { + return IncrementalChangelogScanBuilder::Make(metadata_, io_); } Result> Table::NewTransaction() { @@ -247,7 +257,7 @@ Result> StagedTable::Make( StagedTable::~StagedTable() = default; -Result> StagedTable::NewScan() const { +Result> StagedTable::NewScan() const { return NotSupported("Cannot scan a staged table"); } diff --git a/src/iceberg/table.h b/src/iceberg/table.h index 423911c21..8d8849f37 100644 --- a/src/iceberg/table.h +++ b/src/iceberg/table.h @@ -127,7 +127,15 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this { /// /// Once a table scan builder is created, it can be refined to project columns and /// filter data. - virtual Result> NewScan() const; + virtual Result> NewScan() const; + + /// \brief Create a new incremental append scan builder for this table + virtual Result> NewIncrementalAppendScan() + const; + + /// \brief Create a new incremental changelog scan builder for this table + virtual Result> + NewIncrementalChangelogScan() const; /// \brief Create a new Transaction to commit multiple table operations at once. virtual Result> NewTransaction(); @@ -196,7 +204,7 @@ class ICEBERG_EXPORT StagedTable final : public Table { Status Refresh() override { return {}; } - Result> NewScan() const override; + Result> NewScan() const override; private: using Table::Table; diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc index eeec262e9..1719e913a 100644 --- a/src/iceberg/table_scan.cc +++ b/src/iceberg/table_scan.cc @@ -210,39 +210,73 @@ Result FileScanTask::ToArrow( return MakeArrowArrayStream(std::move(reader)); } -Result> TableScanBuilder::Make( - std::shared_ptr metadata, std::shared_ptr io) { +// Template specialization for DataTableScan (default) +template <> +Result>> +TableScanBuilder::Make(std::shared_ptr metadata, + std::shared_ptr io) { ICEBERG_PRECHECK(metadata != nullptr, "Table metadata cannot be null"); ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null"); - return std::unique_ptr( - new TableScanBuilder(std::move(metadata), std::move(io))); + return std::unique_ptr>( + new TableScanBuilder(std::move(metadata), std::move(io))); } -TableScanBuilder::TableScanBuilder(std::shared_ptr table_metadata, - std::shared_ptr file_io) +// Template specialization for IncrementalAppendScan +template <> +Result>> +TableScanBuilder::Make(std::shared_ptr metadata, + std::shared_ptr io) { + ICEBERG_PRECHECK(metadata != nullptr, "Table metadata cannot be null"); + ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null"); + return std::unique_ptr>( + new TableScanBuilder(std::move(metadata), std::move(io))); +} + +// Template specialization for IncrementalChangelogScan +template <> +Result>> +TableScanBuilder::Make(std::shared_ptr metadata, + std::shared_ptr io) { + ICEBERG_PRECHECK(metadata != nullptr, "Table metadata cannot be null"); + ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null"); + return std::unique_ptr>( + new TableScanBuilder(std::move(metadata), std::move(io))); +} + +template +TableScanBuilder::TableScanBuilder( + std::shared_ptr table_metadata, std::shared_ptr file_io) : metadata_(std::move(table_metadata)), io_(std::move(file_io)) {} -TableScanBuilder& TableScanBuilder::Option(std::string key, std::string value) { +template +TableScanBuilder& TableScanBuilder::Option(std::string key, + std::string value) { context_.options[std::move(key)] = std::move(value); return *this; } -TableScanBuilder& TableScanBuilder::Project(std::shared_ptr schema) { +template +TableScanBuilder& TableScanBuilder::Project( + std::shared_ptr schema) { context_.projected_schema = std::move(schema); return *this; } -TableScanBuilder& TableScanBuilder::CaseSensitive(bool case_sensitive) { +template +TableScanBuilder& TableScanBuilder::CaseSensitive( + bool case_sensitive) { context_.case_sensitive = case_sensitive; return *this; } -TableScanBuilder& TableScanBuilder::IncludeColumnStats() { +template +TableScanBuilder& TableScanBuilder::IncludeColumnStats() { context_.return_column_stats = true; return *this; } -TableScanBuilder& TableScanBuilder::IncludeColumnStats( +template +TableScanBuilder& TableScanBuilder::IncludeColumnStats( const std::vector& requested_columns) { context_.return_column_stats = true; context_.columns_to_keep_stats.clear(); @@ -260,27 +294,35 @@ TableScanBuilder& TableScanBuilder::IncludeColumnStats( return *this; } -TableScanBuilder& TableScanBuilder::Select(const std::vector& column_names) { +template +TableScanBuilder& TableScanBuilder::Select( + const std::vector& column_names) { context_.selected_columns = column_names; return *this; } -TableScanBuilder& TableScanBuilder::Filter(std::shared_ptr filter) { +template +TableScanBuilder& TableScanBuilder::Filter( + std::shared_ptr filter) { context_.filter = std::move(filter); return *this; } -TableScanBuilder& TableScanBuilder::IgnoreResiduals() { +template +TableScanBuilder& TableScanBuilder::IgnoreResiduals() { context_.ignore_residuals = true; return *this; } -TableScanBuilder& TableScanBuilder::MinRowsRequested(int64_t num_rows) { +template +TableScanBuilder& TableScanBuilder::MinRowsRequested( + int64_t num_rows) { context_.min_rows_requested = num_rows; return *this; } -TableScanBuilder& TableScanBuilder::UseSnapshot(int64_t snapshot_id) { +template +TableScanBuilder& TableScanBuilder::UseSnapshot(int64_t snapshot_id) { ICEBERG_BUILDER_CHECK(!context_.snapshot_id.has_value(), "Cannot override snapshot, already set snapshot id={}", context_.snapshot_id.value()); @@ -289,7 +331,8 @@ TableScanBuilder& TableScanBuilder::UseSnapshot(int64_t snapshot_id) { return *this; } -TableScanBuilder& TableScanBuilder::UseRef(const std::string& ref) { +template +TableScanBuilder& TableScanBuilder::UseRef(const std::string& ref) { if (ref == SnapshotRef::kMainBranch) { snapshot_schema_ = nullptr; context_.snapshot_id.reset(); @@ -309,38 +352,61 @@ TableScanBuilder& TableScanBuilder::UseRef(const std::string& ref) { return *this; } -TableScanBuilder& TableScanBuilder::AsOfTime(int64_t timestamp_millis) { +template +TableScanBuilder& TableScanBuilder::AsOfTime( + int64_t timestamp_millis) { auto time_point_ms = TimePointMsFromUnixMs(timestamp_millis); ICEBERG_BUILDER_ASSIGN_OR_RETURN( auto snapshot_id, SnapshotUtil::SnapshotIdAsOfTime(*metadata_, time_point_ms)); return UseSnapshot(snapshot_id); } -TableScanBuilder& TableScanBuilder::FromSnapshot( - [[maybe_unused]] int64_t from_snapshot_id, [[maybe_unused]] bool inclusive) { - return AddError(NotImplemented("Incremental scan is not implemented")); +template +TableScanBuilder& TableScanBuilder::FromSnapshot( + [[maybe_unused]] int64_t from_snapshot_id, [[maybe_unused]] bool inclusive) + requires IsIncrementalScan +{ + AddError(NotImplemented("Incremental scan is not implemented")); + return *this; } -TableScanBuilder& TableScanBuilder::FromSnapshot([[maybe_unused]] const std::string& ref, - [[maybe_unused]] bool inclusive) { - return AddError(NotImplemented("Incremental scan is not implemented")); +template +TableScanBuilder& TableScanBuilder::FromSnapshot( + const std::string& ref, bool inclusive) + requires IsIncrementalScan +{ + AddError(NotImplemented("Incremental scan is not implemented")); + return *this; } -TableScanBuilder& TableScanBuilder::ToSnapshot([[maybe_unused]] int64_t to_snapshot_id) { - return AddError(NotImplemented("Incremental scan is not implemented")); +template +TableScanBuilder& TableScanBuilder::ToSnapshot(int64_t to_snapshot_id) + requires IsIncrementalScan +{ + AddError(NotImplemented("Incremental scan is not implemented")); + return *this; } -TableScanBuilder& TableScanBuilder::ToSnapshot([[maybe_unused]] const std::string& ref) { - return AddError(NotImplemented("Incremental scan is not implemented")); +template +TableScanBuilder& TableScanBuilder::ToSnapshot(const std::string& ref) + requires IsIncrementalScan +{ + AddError(NotImplemented("Incremental scan is not implemented")); + return *this; } -TableScanBuilder& TableScanBuilder::UseBranch(const std::string& branch) { +template +TableScanBuilder& TableScanBuilder::UseBranch( + const std::string& branch) + requires IsIncrementalScan +{ context_.branch = branch; return *this; } +template Result>> -TableScanBuilder::ResolveSnapshotSchema() { +TableScanBuilder::ResolveSnapshotSchema() { if (snapshot_schema_ == nullptr) { if (context_.snapshot_id.has_value()) { ICEBERG_ASSIGN_OR_RAISE(auto snapshot, @@ -355,22 +421,32 @@ TableScanBuilder::ResolveSnapshotSchema() { return snapshot_schema_; } -bool TableScanBuilder::IsIncrementalScan() const { - return context_.from_snapshot_id.has_value() || context_.to_snapshot_id.has_value(); -} - -Result> TableScanBuilder::Build() { +template <> +Result> TableScanBuilder::Build() { ICEBERG_RETURN_UNEXPECTED(CheckErrors()); ICEBERG_RETURN_UNEXPECTED(context_.Validate()); - if (IsIncrementalScan()) { - return NotImplemented("Incremental scan is not yet implemented"); - } - ICEBERG_ASSIGN_OR_RAISE(auto schema, ResolveSnapshotSchema()); return DataTableScan::Make(metadata_, schema.get(), io_, std::move(context_)); } +template <> +Result> +TableScanBuilder::Build() { + return NotImplemented("IncrementalAppendScanBuilder is not implemented"); +} + +template <> +Result> +TableScanBuilder::Build() { + return NotImplemented("IncrementalChangelogScanBuilder is not implemented"); +} + +// Explicit template instantiations +template class TableScanBuilder; +template class TableScanBuilder; +template class TableScanBuilder; + TableScan::TableScan(std::shared_ptr metadata, std::shared_ptr schema, std::shared_ptr file_io, internal::TableScanContext context) @@ -466,12 +542,6 @@ Result> DataTableScan::Make( std::move(metadata), std::move(schema), std::move(io), std::move(context))); } -DataTableScan::DataTableScan(std::shared_ptr metadata, - std::shared_ptr schema, std::shared_ptr io, - internal::TableScanContext context) - : TableScan(std::move(metadata), std::move(schema), std::move(io), - std::move(context)) {} - Result>> DataTableScan::PlanFiles() const { ICEBERG_ASSIGN_OR_RAISE(auto snapshot, this->snapshot()); if (!snapshot) { @@ -501,4 +571,42 @@ Result>> DataTableScan::PlanFiles() co return manifest_group->PlanFiles(); } +template +Result>> +IncrementalScan::PlanFiles() const { + return NotImplemented("IncrementalScan::PlanFiles is not implemented"); +} + +// IncrementalAppendScan implementation + +Result> IncrementalAppendScan::Make( + [[maybe_unused]] std::shared_ptr metadata, + [[maybe_unused]] std::shared_ptr schema, + [[maybe_unused]] std::shared_ptr io, + [[maybe_unused]] internal::TableScanContext context) { + return NotImplemented("IncrementalAppendScan is not implemented"); +} + +Result>> IncrementalAppendScan::PlanFiles( + std::optional from_snapshot_id_exclusive, + int64_t to_snapshot_id_inclusive) const { + return NotImplemented("IncrementalAppendScan::PlanFiles is not implemented"); +} + +// IncrementalChangelogScan implementation + +Result> IncrementalChangelogScan::Make( + [[maybe_unused]] std::shared_ptr metadata, + [[maybe_unused]] std::shared_ptr schema, + [[maybe_unused]] std::shared_ptr io, + [[maybe_unused]] internal::TableScanContext context) { + return NotImplemented("IncrementalChangelogScan is not implemented"); +} + +Result>> +IncrementalChangelogScan::PlanFiles(std::optional from_snapshot_id_exclusive, + int64_t to_snapshot_id_inclusive) const { + return NotImplemented("IncrementalChangelogScan::PlanFiles is not implemented"); +} + } // namespace iceberg diff --git a/src/iceberg/table_scan.h b/src/iceberg/table_scan.h index aa225ff81..49a8ffc5a 100644 --- a/src/iceberg/table_scan.h +++ b/src/iceberg/table_scan.h @@ -39,6 +39,7 @@ class ICEBERG_EXPORT ScanTask { public: enum class Kind : uint8_t { kFileScanTask, + kChangelogScanTask, }; /// \brief The kind of scan task. @@ -100,6 +101,16 @@ class ICEBERG_EXPORT FileScanTask : public ScanTask { std::shared_ptr residual_filter_; }; +/// \brief A scan task for reading changelog entries between snapshots. +class ICEBERG_EXPORT ChangelogScanTask : public ScanTask { + public: + Kind kind() const override { return Kind::kChangelogScanTask; } + // TODO(): Return actual values once member fields are implemented + int64_t size_bytes() const override { return 0; } + int32_t files_count() const override { return 0; } + int64_t estimated_row_count() const override { return 0; } +}; + namespace internal { // Internal table scan context used by different scan implementations. @@ -125,13 +136,19 @@ struct TableScanContext { } // namespace internal +// Concept to check if a type is an incremental scan +template +concept IsIncrementalScan = std::is_base_of_v, T> || + std::is_base_of_v, T>; + /// \brief Builder class for creating TableScan instances. +template class ICEBERG_EXPORT TableScanBuilder : public ErrorCollector { public: /// \brief Constructs a TableScanBuilder for the given table. /// \param metadata Current table metadata. /// \param io FileIO instance for reading manifests files. - static Result> Make( + static Result>> Make( std::shared_ptr metadata, std::shared_ptr io); /// \brief Update property that will override the table's behavior @@ -206,61 +223,69 @@ class ICEBERG_EXPORT TableScanBuilder : public ErrorCollector { /// \brief Instructs this scan to look for changes starting from a particular snapshot. /// - /// If the start snapshot is not configured, it defaults to the oldest ancestor of the - /// end snapshot (inclusive). + /// This method is only available for incremental scans. + /// If the start snapshot is not configured, it defaults to the oldest ancestor of + /// the end snapshot (inclusive). /// /// \param from_snapshot_id the start snapshot ID /// \param inclusive whether the start snapshot is inclusive, default is false /// \note InvalidArgument will be returned if the start snapshot is not an ancestor of /// the end snapshot - TableScanBuilder& FromSnapshot(int64_t from_snapshot_id, bool inclusive = false); + TableScanBuilder& FromSnapshot(int64_t from_snapshot_id, bool inclusive = false) + requires IsIncrementalScan; /// \brief Instructs this scan to look for changes starting from a particular snapshot. /// - /// If the start snapshot is not configured, it defaults to the oldest ancestor of the - /// end snapshot (inclusive). + /// This method is only available for incremental scans. + /// If the start snapshot is not configured, it defaults to the oldest ancestor of + /// the end snapshot (inclusive). /// /// \param ref the start ref name that points to a particular snapshot ID /// \param inclusive whether the start snapshot is inclusive, default is false /// \note InvalidArgument will be returned if the start snapshot is not an ancestor of /// the end snapshot - TableScanBuilder& FromSnapshot(const std::string& ref, bool inclusive = false); + TableScanBuilder& FromSnapshot(const std::string& ref, bool inclusive = false) + requires IsIncrementalScan; /// \brief Instructs this scan to look for changes up to a particular snapshot /// (inclusive). /// + /// This method is only available for incremental scans. /// If the end snapshot is not configured, it defaults to the current table snapshot /// (inclusive). /// /// \param to_snapshot_id the end snapshot ID (inclusive) - TableScanBuilder& ToSnapshot(int64_t to_snapshot_id); + TableScanBuilder& ToSnapshot(int64_t to_snapshot_id) + requires IsIncrementalScan; /// \brief Instructs this scan to look for changes up to a particular snapshot ref /// (inclusive). /// + /// This method is only available for incremental scans. /// If the end snapshot is not configured, it defaults to the current table snapshot /// (inclusive). /// /// \param ref the end snapshot Ref (inclusive) - TableScanBuilder& ToSnapshot(const std::string& ref); + TableScanBuilder& ToSnapshot(const std::string& ref) + requires IsIncrementalScan; /// \brief Use the specified branch + /// + /// This method is only available for incremental scans. /// \param branch the branch name - TableScanBuilder& UseBranch(const std::string& branch); + TableScanBuilder& UseBranch(const std::string& branch) + requires IsIncrementalScan; /// \brief Builds and returns a TableScan instance. /// \return A Result containing the TableScan or an error. - Result> Build(); + Result> Build(); - private: + protected: TableScanBuilder(std::shared_ptr metadata, std::shared_ptr io); // Return the schema bound to the specified snapshot. Result>> ResolveSnapshotSchema(); - // Return whether current configuration indicates an incremental scan mode. - bool IsIncrementalScan() const; - std::shared_ptr metadata_; std::shared_ptr io_; internal::TableScanContext context_; @@ -293,10 +318,6 @@ class ICEBERG_EXPORT TableScan { /// \brief Returns whether this scan is case-sensitive. bool is_case_sensitive() const; - /// \brief Plans the scan tasks by resolving manifests and data files. - /// \return A Result containing scan tasks or an error. - virtual Result>> PlanFiles() const = 0; - protected: TableScan(std::shared_ptr metadata, std::shared_ptr schema, std::shared_ptr io, internal::TableScanContext context); @@ -316,6 +337,8 @@ class ICEBERG_EXPORT TableScan { /// \brief A scan that reads data files and applies delete files to filter rows. class ICEBERG_EXPORT DataTableScan : public TableScan { public: + ~DataTableScan() override = default; + /// \brief Constructs a DataTableScan instance. static Result> Make( std::shared_ptr metadata, std::shared_ptr schema, @@ -323,11 +346,66 @@ class ICEBERG_EXPORT DataTableScan : public TableScan { /// \brief Plans the scan tasks by resolving manifests and data files. /// \return A Result containing scan tasks or an error. - Result>> PlanFiles() const override; + Result>> PlanFiles() const; + + protected: + using TableScan::TableScan; +}; + +/// \brief A base template class for incremental scans that read changes between +/// snapshots, and return scan tasks of the specified type. +template +class ICEBERG_EXPORT IncrementalScan : public TableScan { + public: + ~IncrementalScan() override = default; + + /// \brief Plans the scan tasks by resolving manifests and data files. + /// \return A Result containing scan tasks or an error. + Result>> PlanFiles() const; protected: - DataTableScan(std::shared_ptr metadata, std::shared_ptr schema, - std::shared_ptr io, internal::TableScanContext context); + virtual Result>> PlanFiles( + std::optional from_snapshot_id_exclusive, + int64_t to_snapshot_id_inclusive) const = 0; + + using TableScan::TableScan; +}; + +/// \brief A scan that reads data files added between snapshots (incremental appends). +class ICEBERG_EXPORT IncrementalAppendScan : public IncrementalScan { + public: + /// \brief Constructs an IncrementalAppendScan instance. + static Result> Make( + std::shared_ptr metadata, std::shared_ptr schema, + std::shared_ptr io, internal::TableScanContext context); + + ~IncrementalAppendScan() override = default; + + protected: + Result>> PlanFiles( + std::optional from_snapshot_id_exclusive, + int64_t to_snapshot_id_inclusive) const override; + + using IncrementalScan::IncrementalScan; +}; + +/// \brief A scan that reads changelog entries between snapshots. +class ICEBERG_EXPORT IncrementalChangelogScan + : public IncrementalScan { + public: + /// \brief Constructs an IncrementalChangelogScan instance. + static Result> Make( + std::shared_ptr metadata, std::shared_ptr schema, + std::shared_ptr io, internal::TableScanContext context); + + ~IncrementalChangelogScan() override = default; + + protected: + Result>> PlanFiles( + std::optional from_snapshot_id_exclusive, + int64_t to_snapshot_id_inclusive) const override; + + using IncrementalScan::IncrementalScan; }; } // namespace iceberg diff --git a/src/iceberg/test/table_scan_test.cc b/src/iceberg/test/table_scan_test.cc index 83e41ddd9..d9fc1e927 100644 --- a/src/iceberg/test/table_scan_test.cc +++ b/src/iceberg/test/table_scan_test.cc @@ -266,7 +266,8 @@ class TableScanTest : public testing::TestWithParam { TEST_P(TableScanTest, TableScanBuilderOptions) { // Test basic scan creation and default values - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(table_metadata_, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, + DataTableScanBuilder::Make(table_metadata_, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto basic_scan, builder->Build()); EXPECT_NE(basic_scan, nullptr); EXPECT_EQ(basic_scan->metadata(), table_metadata_); @@ -279,10 +280,9 @@ TEST_P(TableScanTest, TableScanBuilderOptions) { auto filter = Expressions::Equal("id", Literal::Int(42)); constexpr int64_t kMinRows = 1000; constexpr int64_t kSnapshotId = 1000L; - const std::string branch_name = "test-branch"; ICEBERG_UNWRAP_OR_FAIL(auto builder2, - TableScanBuilder::Make(table_metadata_, file_io_)); + DataTableScanBuilder::Make(table_metadata_, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder2->Option("key1", "value1") .Option("key2", "value2") .CaseSensitive(false) @@ -292,7 +292,6 @@ TEST_P(TableScanTest, TableScanBuilderOptions) { .IgnoreResiduals() .MinRowsRequested(kMinRows) .UseSnapshot(kSnapshotId) - .UseBranch(branch_name) .Build()); // Verify all options were set correctly @@ -313,11 +312,10 @@ TEST_P(TableScanTest, TableScanBuilderOptions) { EXPECT_EQ(context.min_rows_requested.value(), kMinRows); EXPECT_TRUE(context.snapshot_id.has_value()); EXPECT_EQ(context.snapshot_id.value(), kSnapshotId); - EXPECT_EQ(context.branch, branch_name); // Test UseRef separately ICEBERG_UNWRAP_OR_FAIL(auto builder3, - TableScanBuilder::Make(table_metadata_, file_io_)); + DataTableScanBuilder::Make(table_metadata_, file_io_)); builder3->UseRef("main"); ICEBERG_UNWRAP_OR_FAIL(auto ref_scan, builder3->Build()); ICEBERG_UNWRAP_OR_FAIL(auto snapshot, ref_scan->snapshot()); @@ -326,26 +324,27 @@ TEST_P(TableScanTest, TableScanBuilderOptions) { TEST_P(TableScanTest, TableScanBuilderValidationErrors) { // Test negative min rows - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(table_metadata_, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, + DataTableScanBuilder::Make(table_metadata_, file_io_)); builder->MinRowsRequested(-1); EXPECT_THAT(builder->Build(), IsError(ErrorKind::kValidationFailed)); // Test invalid snapshot ID ICEBERG_UNWRAP_OR_FAIL(auto builder2, - TableScanBuilder::Make(table_metadata_, file_io_)); + DataTableScanBuilder::Make(table_metadata_, file_io_)); builder2->UseSnapshot(9999L); EXPECT_THAT(builder2->Build(), IsError(ErrorKind::kValidationFailed)); // Test invalid ref ICEBERG_UNWRAP_OR_FAIL(auto builder3, - TableScanBuilder::Make(table_metadata_, file_io_)); + DataTableScanBuilder::Make(table_metadata_, file_io_)); builder3->UseRef("non-existent-ref"); EXPECT_THAT(builder3->Build(), IsError(ErrorKind::kValidationFailed)); // Test null inputs - EXPECT_THAT(TableScanBuilder::Make(nullptr, file_io_), + EXPECT_THAT(DataTableScanBuilder::Make(nullptr, file_io_), IsError(ErrorKind::kInvalidArgument)); - EXPECT_THAT(TableScanBuilder::Make(table_metadata_, nullptr), + EXPECT_THAT(DataTableScanBuilder::Make(table_metadata_, nullptr), IsError(ErrorKind::kInvalidArgument)); } @@ -360,7 +359,8 @@ TEST_P(TableScanTest, DataTableScanPlanFilesEmpty) { .snapshots = {}, .refs = {}}); - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(empty_metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, + DataTableScanBuilder::Make(empty_metadata, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); EXPECT_TRUE(tasks.empty()); @@ -418,7 +418,7 @@ TEST_P(TableScanTest, PlanFilesWithDataManifests) { })}}}); ICEBERG_UNWRAP_OR_FAIL(auto builder, - TableScanBuilder::Make(metadata_with_manifest, file_io_)); + DataTableScanBuilder::Make(metadata_with_manifest, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); ASSERT_EQ(tasks.size(), 2); @@ -485,7 +485,7 @@ TEST_P(TableScanTest, PlanFilesWithMultipleManifests) { })}}}); ICEBERG_UNWRAP_OR_FAIL(auto builder, - TableScanBuilder::Make(metadata_with_manifests, file_io_)); + DataTableScanBuilder::Make(metadata_with_manifests, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); ASSERT_EQ(tasks.size(), 2); @@ -547,7 +547,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) { // Test 1: Filter matches only data1.parquet (id=25 is in range [1, 50]) { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Filter(Expressions::Equal("id", Literal::Int(25))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); @@ -557,7 +557,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) { // Test 2: Filter matches only data2.parquet (id=75 is in range [51, 100]) { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Filter(Expressions::Equal("id", Literal::Int(75))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); @@ -567,7 +567,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) { // Test 3: Filter matches both files (id > 0 covers both ranges) { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Filter(Expressions::GreaterThan("id", Literal::Int(0))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); @@ -578,7 +578,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) { // Test 4: Filter matches no files (id=200 is outside both ranges) { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Filter(Expressions::Equal("id", Literal::Int(200))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); @@ -654,7 +654,7 @@ TEST_P(TableScanTest, PlanFilesWithDeleteFiles) { })}}}); ICEBERG_UNWRAP_OR_FAIL(auto builder, - TableScanBuilder::Make(metadata_with_manifests, file_io_)); + DataTableScanBuilder::Make(metadata_with_manifests, file_io_)); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto tasks, scan->PlanFiles()); ASSERT_EQ(tasks.size(), 2); @@ -704,7 +704,7 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) { // Select "data" column, filter on "id" column { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Select({"data"}).Filter(Expressions::Equal("id", Literal::Int(42))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto projected_schema, scan->schema()); @@ -722,7 +722,7 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) { // Select "id" and "value", filter on "data" { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Select({"id", "value"}) .Filter(Expressions::Equal("data", Literal::String("test"))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); @@ -742,7 +742,7 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) { // Select "id", filter on "id" - should only have "id" once { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Select({"id"}).Filter(Expressions::Equal("id", Literal::Int(42))); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto projected_schema, scan->schema()); @@ -756,7 +756,7 @@ TEST_P(TableScanTest, SchemaWithSelectedColumnsAndFilter) { // Select columns without filter { - ICEBERG_UNWRAP_OR_FAIL(auto builder, TableScanBuilder::Make(metadata, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto builder, DataTableScanBuilder::Make(metadata, file_io_)); builder->Select({"data"}); ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build()); ICEBERG_UNWRAP_OR_FAIL(auto projected_schema, scan->schema()); diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 7a3f50df2..5c476d4da 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -140,12 +140,23 @@ class ResidualEvaluator; class StrictMetricsEvaluator; /// \brief Scan. +class ChangelogScanTask; class DataTableScan; class FileScanTask; +template +class IncrementalScan; +class IncrementalAppendScan; +class IncrementalChangelogScan; class ScanTask; class TableScan; +template class TableScanBuilder; +// Type aliases for incremental scan builders +using DataTableScanBuilder = TableScanBuilder; +using IncrementalAppendScanBuilder = TableScanBuilder; +using IncrementalChangelogScanBuilder = TableScanBuilder; + /// \brief Manifest. enum class ManifestContent; struct DataFile;