From e8e889730b0a76ca81813e82ce3090a127d5f85c Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 16:46:00 +0000 Subject: [PATCH 01/10] Added snapshot validation --- include/storage/vacuumer.hh | 23 ++++-- include/test/file_system_check.hh | 33 +++++++- src/storage/vacuumer.cc | 6 +- src/sys_tbl_mgr/CMakeLists.txt | 1 + src/sys_tbl_mgr/test/file_system_check.cc | 15 +++- src/test/file_system_check.cc | 92 +++++++++++++++++++---- 6 files changed, 141 insertions(+), 29 deletions(-) diff --git a/include/storage/vacuumer.hh b/include/storage/vacuumer.hh index cc3061811..2b7689029 100644 --- a/include/storage/vacuumer.hh +++ b/include/storage/vacuumer.hh @@ -91,6 +91,21 @@ public: _global_file_size_threshold = size; } + /** + * @brief Get vacuum-safe XID for a DB + * + * @param db_id Database ID + * @return XID until which vacuum can run + */ + uint64_t get_vacuum_cutoff_xid(uint64_t db_id); + + /** + * @brief Return whether the vacuumer is enabled + * + * @return true - enabled + * @return false - disabled + */ + bool is_enabled() { return _vacuum_start_enabled; } protected: /** * @brief Constructor, that inits the vacuumer thread @@ -209,14 +224,6 @@ private: std::vector hole_punch_file(const std::string& file, const std::vector& input_extents); - /** - * @brief Get vacuum-safe XID for a DB - * - * @param db_id Database ID - * @return XID until which vacuum can run - */ - uint64_t _get_vacuum_cutoff_xid(uint64_t db_id); - /** * @brief Returns db_id following a keyword in a filesystem path. * diff --git a/include/test/file_system_check.hh b/include/test/file_system_check.hh index 876859a28..0a2d28cc6 100644 --- a/include/test/file_system_check.hh +++ b/include/test/file_system_check.hh @@ -10,7 +10,7 @@ namespace springtail::test { class FSCheck { public: - FSCheck(); + FSCheck(uint64_t max_xid = constant::LATEST_XID); ~FSCheck() = default; /** @@ -40,7 +40,12 @@ namespace springtail::test { private: std::map _databases; ///< map of database id to database name std::filesystem::path _table_base; ///< directory where all the tables are stored + uint64_t _max_xid; ///< maximum xid + /** + * @brief Storage for namespace data + * + */ struct FSNamespace { uint64_t ns_id; @@ -50,6 +55,10 @@ namespace springtail::test { bool exists; }; + /** + * @brief Storage for root data + * + */ struct FSRoot { uint64_t xid; @@ -57,6 +66,10 @@ namespace springtail::test { uint64_t snapshot_xid; }; + /** + * @brief Storage for stats data + * + */ struct FSStats { uint64_t xid; @@ -64,6 +77,10 @@ namespace springtail::test { uint64_t end_offset; }; + /** + * @brief Storage for index data + * + */ struct FSIndex { uint64_t xid; @@ -71,7 +88,10 @@ namespace springtail::test { Index index; }; - + /** + * @brief Storage for table data + * + */ struct FSTable { uint64_t ns_id; @@ -86,7 +106,16 @@ namespace springtail::test { std::map xid_to_stats; }; + /** + * @brief Map of database id and namespace id namespace data + * + */ std::map, FSNamespace> _db_ns_id_map; + + /** + * @brief Map of database id and table id to table data + * + */ std::map, FSTable> _db_tbl_id_map; /** diff --git a/src/storage/vacuumer.cc b/src/storage/vacuumer.cc index 852091fb9..10fc33c22 100644 --- a/src/storage/vacuumer.cc +++ b/src/storage/vacuumer.cc @@ -355,7 +355,7 @@ Vacuumer::hole_punch_file(const std::string& file, } uint64_t -Vacuumer::_get_vacuum_cutoff_xid(uint64_t db_id) +Vacuumer::get_vacuum_cutoff_xid(uint64_t db_id) { RedisDDL _redis_ddl; @@ -694,7 +694,7 @@ Vacuumer::_do_vacuum_run() const std::string& file = file_it->first; auto& xid_map = file_it->second; auto db_id = _get_db_id_from_path(file); - auto cutoff_xid = _get_vacuum_cutoff_xid(db_id); // Get safest XID to vacuum till that point + auto cutoff_xid = get_vacuum_cutoff_xid(db_id); // Get safest XID to vacuum till that point IntervalTree itree; std::vector xids_to_process; @@ -771,7 +771,7 @@ Vacuumer::_do_vacuum_run() /* --------------- Snapshot deletion flow -----------------------------------------------------------*/ // expire snapshots through the min XID for (auto db_it = expired_snapshots_map.begin(); db_it != expired_snapshots_map.end(); ) { - uint64_t cutoff_xid = _get_vacuum_cutoff_xid(db_it->first); // Get safest XID to vacuum till that point + uint64_t cutoff_xid = get_vacuum_cutoff_xid(db_it->first); // Get safest XID to vacuum till that point auto& xid_map = db_it->second; for (auto xid_it = xid_map.begin(); xid_it != xid_map.end(); ) { diff --git a/src/sys_tbl_mgr/CMakeLists.txt b/src/sys_tbl_mgr/CMakeLists.txt index 065babe0d..e449e4acd 100644 --- a/src/sys_tbl_mgr/CMakeLists.txt +++ b/src/sys_tbl_mgr/CMakeLists.txt @@ -37,6 +37,7 @@ add_executable(file_system_check test/file_system_check.cc) target_include_directories(file_system_check PRIVATE ${CMAKE_SOURCE_DIR}/include ${VCPKG_INCLUDE_PATH}) target_link_libraries(file_system_check PUBLIC springtail_test_check + PRIVATE Boost::program_options ) add_dsymutil_command(file_system_check) diff --git a/src/sys_tbl_mgr/test/file_system_check.cc b/src/sys_tbl_mgr/test/file_system_check.cc index 9c3aa817c..e677c8798 100644 --- a/src/sys_tbl_mgr/test/file_system_check.cc +++ b/src/sys_tbl_mgr/test/file_system_check.cc @@ -1,15 +1,28 @@ +#include + #include #include + using namespace springtail; +namespace po = boost::program_options; int main(int argc, char *argv[]) { + uint64_t max_xid = 0; + + po::options_description desc("Options"); + desc.add_options()("max_xid,mx", po::value(&max_xid)->default_value(constant::LATEST_XID), "Maximum xid, default is the latest"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + // no logging springtail_init(false, std::nullopt, LOG_NONE); - std::shared_ptr fs_check = std::make_shared(); + std::shared_ptr fs_check = std::make_shared(max_xid); fs_check->check_dbs(); fs_check.reset(); diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index 28590817b..c4a12e439 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -7,7 +8,7 @@ using namespace springtail; using namespace springtail::test; -FSCheck::FSCheck() +FSCheck::FSCheck(uint64_t max_xid) : _max_xid(max_xid) { // get all database ids _databases = Properties::get_databases(); @@ -16,12 +17,24 @@ FSCheck::FSCheck() nlohmann::json json = Properties::get(Properties::STORAGE_CONFIG); Json::get_to(json, "table_dir", _table_base); _table_base = Properties::make_absolute_path(_table_base); - LOG_INFO("Verifying tables at table_base = {}", _table_base.string()); - - for (auto [db_id, db_name]: _databases) { - _read_namespaces(db_id); - _read_tables(db_id); + LOG_INFO("Verifying tables at table_base = {}, max_xid = {}", _table_base.string(), _max_xid); + + for (auto it = _databases.begin(); it != _databases.end(); ) { + auto db_id = it->first; + auto db_name = it->second; + uint64_t cutoff_xid = Vacuumer::get_instance()->get_vacuum_cutoff_xid(db_id); + bool vacuumer_enabled = Vacuumer::get_instance()->is_enabled(); + LOG_INFO("\tDatabase {}, cutoff xid {}", db_id, cutoff_xid); + if (!vacuumer_enabled || _max_xid >= cutoff_xid) { + _read_namespaces(db_id); + _read_tables(db_id); + ++it; + } else { + LOG_INFO("\tDatabase {} - skipping validation, cutoff xid {} > max xid {}", db_id, cutoff_xid, max_xid); + it = _databases.erase(it); + } } + } template @@ -83,6 +96,10 @@ FSCheck::_read_namespaces(uint64_t db_id) bool exists = fields->at(sys_tbl::NamespaceNames::Data::EXISTS)->get_bool(&row); LOG_INFO("\tNamespace {}:{}", ns_id, name); + if (xid > _max_xid) { + break; + } + if (!exists) { _db_ns_id_map.erase(std::make_pair(db_id, ns_id)); } else { @@ -106,12 +123,18 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t xid = fields->at(sys_tbl::TableNames::Data::XID)->get_uint64(&row); uint64_t lsn = fields->at(sys_tbl::TableNames::Data::LSN)->get_uint64(&row); bool exists = fields->at(sys_tbl::TableNames::Data::EXISTS)->get_bool(&row); + + if (xid > _max_xid) { + break; + } + if (!exists) { _db_tbl_id_map.erase(std::pair(db_id, table_id)); + LOG_INFO("Removed table: db_id {}, namespace_id {}, table_id {}, xid {}", db_id, ns_id, table_id, xid); } else { FSTable table_data{ns_id, name, table_id, xid, lsn, exists}; _db_tbl_id_map[std::make_pair(db_id, table_id)] = table_data; - LOG_INFO("Added table: db_id {}, namespace_id {}, table_id {}", db_id, ns_id, table_id); + LOG_INFO("Added table: db_id {}, namespace_id {}, table_id {}, xid {}", db_id, ns_id, table_id, xid); } LOG_INFO("Table {}:{}", table_id, name); } @@ -120,9 +143,11 @@ FSCheck::_read_tables(uint64_t db_id) for (auto it = _db_tbl_id_map.lower_bound(std::make_pair(db_id, 0)); it != _db_tbl_id_map.end() && it->first.first == db_id; ++it) { uint64_t table_id_key = it->second.table_id; + uint64_t table_xid = it->second.xid; // 3. read all columns per table and filter out existing columns { + LOG_INFO("Getting columns for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map pos_to_column; auto [table, fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::Schemas::Primary::key_tuple(table_id_key, 0, 0, 0); @@ -146,14 +171,21 @@ FSCheck::_read_tables(uint64_t db_id) default_value = fields->at(sys_tbl::Schemas::Data::DEFAULT)->get_text(&row); } uint8_t update_type = fields->at(sys_tbl::Schemas::Data::UPDATE_TYPE)->get_uint8(&row); + + if (xid < table_xid || xid > _max_xid) { + continue; + } + if (!exists) { pos_to_column.erase(position); + LOG_INFO("\tRemoved column: db_id {}, table_id {}, xid {}, name '{}'", db_id, table_id, xid, name); } else { std::optional pk_position; SchemaColumn column(xid, lsn, name, position, static_cast(type), pg_type, exists, nullable, pk_position, default_value); column.update_type = static_cast(update_type); pos_to_column[position] = column; + LOG_INFO("\tAdded column: db_id {}, table_id {}, xid {}, name '{}'", db_id, table_id, xid, name); } } @@ -163,6 +195,7 @@ FSCheck::_read_tables(uint64_t db_id) // 5. read all indexes in READY state for this table and filter out columns { + LOG_INFO("Getting indexes for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map id_to_index; auto [table, fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::IndexNames::Primary::key_tuple(table_id_key, 0, 0, 0); @@ -180,9 +213,16 @@ FSCheck::_read_tables(uint64_t db_id) std::string name(fields->at(sys_tbl::IndexNames::Data::NAME)->get_text(&row)); uint8_t state = fields->at(sys_tbl::IndexNames::Data::STATE)->get_uint8(&row); bool is_unique = fields->at(sys_tbl::IndexNames::Data::IS_UNIQUE)->get_bool(&row); + + if (xid < table_xid || xid > _max_xid) { + continue; + } + if (static_cast(state) != sys_tbl::IndexNames::State::READY) { id_to_index.erase(index_id); + LOG_INFO("\tRemoved index: index_id {}, xid {}, name '{}', state {}", index_id, xid, name, state); } else { + LOG_INFO("\tAdding index: index_id {}, xid {}, name '{}'", index_id, xid, name); std::string schema_name = _db_ns_id_map.at(std::make_pair(db_id, namespace_id)).ns_name; FSIndex fs_index{xid, lsn, {index_id, schema_name, name, table_id, is_unique, state}}; @@ -204,12 +244,14 @@ FSCheck::_read_tables(uint64_t db_id) uint32_t position = idx_fields->at(sys_tbl::Indexes::Data::POSITION)->get_uint32(&idx_row); uint32_t column_id = idx_fields->at(sys_tbl::Indexes::Data::COLUMN_ID)->get_uint32(&idx_row); columns[position] = column_id; + LOG_INFO("\t\tAdding index column: index_id {}, index_xid {}, position = {}, column_id {}", index_id, idx_xid, position, column_id); } for (auto col_iter: columns) { Index::Column column{col_iter.first, col_iter.second}; fs_index.index.columns.push_back(column); } id_to_index[index_id] = fs_index; + LOG_INFO("\tAdded index: index_id {}, xid {}, name '{}'", index_id, xid, name); } } @@ -217,9 +259,11 @@ FSCheck::_read_tables(uint64_t db_id) _db_tbl_id_map.at(std::make_pair(db_id, table_id_key)).id_to_index = id_to_index; // 7. read all roots per index and set primary key positions + LOG_INFO("Getting roots for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); for (auto idx_iter: id_to_index) { uint64_t index_id = idx_iter.first; FSIndex fs_index = idx_iter.second; + LOG_INFO("\tGetting roots for index {}", index_id); // 8. set primary key positions in the table if (index_id == constant::INDEX_PRIMARY) { @@ -243,8 +287,18 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t root_xid = root_fields->at(sys_tbl::TableRoots::Data::XID)->get_uint64(&root_row); uint64_t root_extent_id = root_fields->at(sys_tbl::TableRoots::Data::EXTENT_ID)->get_uint64(&root_row); uint64_t root_snapshot_xid = root_fields->at(sys_tbl::TableRoots::Data::SNAPSHOT_XID)->get_uint64(&root_row); + + if (root_xid < table_xid) { + continue; + } + + if (root_xid > _max_xid) { + break; + } + FSRoot root{root_xid, root_extent_id, root_snapshot_xid}; roots.push_back(root); + LOG_INFO("\t\tAdded root for root_xid {}, extent_id {}, snapshot_xid {}", root_xid, root_extent_id, root_snapshot_xid); } // 10. set the roots in the table @@ -256,6 +310,7 @@ FSCheck::_read_tables(uint64_t db_id) // 10. read all stats for this table { + LOG_INFO("Getting stats for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map xid_to_stats; auto [table, fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::TableStats::Primary::key_tuple(table_id_key, 0); @@ -269,8 +324,18 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t xid = fields->at(sys_tbl::TableStats::Data::XID)->get_uint64(&row); uint64_t row_count = fields->at(sys_tbl::TableStats::Data::ROW_COUNT)->get_uint64(&row); uint64_t end_offset = fields->at(sys_tbl::TableStats::Data::END_OFFSET)->get_uint64(&row); + + if (xid < table_xid) { + continue; + } + + if (xid > _max_xid) { + break; + } + FSStats stats{xid, row_count, end_offset}; xid_to_stats[xid] = stats; + LOG_INFO("\tAdded stats for xid {}, row_count {}, end_offser {}", xid, row_count, end_offset); } _db_tbl_id_map.at(std::make_pair(db_id, table_id_key)).xid_to_stats = xid_to_stats; } @@ -356,7 +421,7 @@ FSCheck::_validate_primary_extent(std::shared_ptr table, ExtentSchemaPtr while(btree_iter != table_btree->end()) { const Extent::Row &btree_row = *btree_iter; uint64_t extent_id = extent_id_field->get_uint64(&btree_row); - LOG_INFO("\tVerifying extent_id = {}", extent_id); + LOG_INFO("\t\tVerifying extent_id = {}", extent_id); StorageCache::SafePagePtr page = table->read_page(extent_id); StorageCache::Page::Iterator page_iter = page->last(); @@ -476,9 +541,9 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab ++idx_root_it ) { const struct FSRoot &root = idx_root_it->second; last_root = &(idx_root_it->second); + LOG_INFO("\t\tVerifying roots: root.xid = {}, fs_table.xid = {}", root.xid, fs_table.xid); CHECK(root.xid >= fs_table.xid); if (root.extent_id != constant::UNKNOWN_EXTENT) { - // LOG_INFO("Looking for stats for root with data: {}:{}:{}", root.xid, root.extent_id, root.snapshot_xid); auto stat_it = fs_table.xid_to_stats.find(root.xid); CHECK(stat_it != fs_table.xid_to_stats.end()); last_stat = &(stat_it->second); @@ -511,15 +576,12 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab auto table = std::make_shared
(db_id, fs_table.table_id, fs_table.xid, _table_base, schema->get_sort_keys(), secondary_indexes, *tbl_meta, schema); - LOG_INFO("\tTable dir: {}, row_count: {}, end_offset: {}, sxid: {}", - table->get_dir_path().c_str(), row_count, end_offset, root_sxid); + LOG_INFO("\tValidata Table indexes for table {}, dir: {}, row_count: {}, end_offset: {}, sxid: {}", + table->id(), table->get_dir_path().c_str(), row_count, end_offset, root_sxid); - // 6. Validate primary index extent + // 6. Validate primary index extent _validate_primary_extent(table, schema); // 7. Validate secondary index extent _validate_secondary_extents(table, schema); - - // TODO: add validation of previous xid snapshot - } From db2e242fda10b2bc585eee4f67a5b48fa52177be Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 17:07:04 +0000 Subject: [PATCH 02/10] code quality improvement --- src/test/file_system_check.cc | 113 ++++++++++++++++------------------ 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index c4a12e439..f8078e9df 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -149,28 +149,28 @@ FSCheck::_read_tables(uint64_t db_id) { LOG_INFO("Getting columns for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map pos_to_column; - auto [table, fields] = _get_table_and_fields(db_id); + auto [schema_table, schema_fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::Schemas::Primary::key_tuple(table_id_key, 0, 0, 0); - auto table_iter = table->lower_bound(search_key); - for (; table_iter != table->end(); ++table_iter) { + auto table_iter = schema_table->lower_bound(search_key); + for (; table_iter != schema_table->end(); ++table_iter) { auto &row = *table_iter; uint64_t table_id = fields->at(sys_tbl::Schemas::Data::TABLE_ID)->get_uint64(&row); if (table_id != table_id_key) { break; } - uint32_t position = fields->at(sys_tbl::Schemas::Data::POSITION)->get_uint32(&row); - uint64_t xid = fields->at(sys_tbl::Schemas::Data::XID)->get_uint64(&row); - uint64_t lsn = fields->at(sys_tbl::Schemas::Data::LSN)->get_uint64(&row); - bool exists = fields->at(sys_tbl::Schemas::Data::EXISTS)->get_bool(&row); - std::string name(fields->at(sys_tbl::Schemas::Data::NAME)->get_text(&row)); - uint8_t type = fields->at(sys_tbl::Schemas::Data::TYPE)->get_uint8(&row); - uint32_t pg_type = fields->at(sys_tbl::Schemas::Data::PG_TYPE)->get_int32(&row); - bool nullable = fields->at(sys_tbl::Schemas::Data::NULLABLE)->get_bool(&row); + uint32_t position = schema_fields->at(sys_tbl::Schemas::Data::POSITION)->get_uint32(&row); + uint64_t xid = schema_fields->at(sys_tbl::Schemas::Data::XID)->get_uint64(&row); + uint64_t lsn = schema_fields->at(sys_tbl::Schemas::Data::LSN)->get_uint64(&row); + bool exists = schema_fields->at(sys_tbl::Schemas::Data::EXISTS)->get_bool(&row); + std::string name(schema_fields->at(sys_tbl::Schemas::Data::NAME)->get_text(&row)); + uint8_t type = schema_fields->at(sys_tbl::Schemas::Data::TYPE)->get_uint8(&row); + uint32_t pg_type = schema_fields->at(sys_tbl::Schemas::Data::PG_TYPE)->get_int32(&row); + bool nullable = schema_fields->at(sys_tbl::Schemas::Data::NULLABLE)->get_bool(&row); std::optional default_value; - if (!fields->at(sys_tbl::Schemas::Data::DEFAULT)->is_null(&row)) { - default_value = fields->at(sys_tbl::Schemas::Data::DEFAULT)->get_text(&row); + if (!schema_fields->at(sys_tbl::Schemas::Data::DEFAULT)->is_null(&row)) { + default_value = schema_fields->at(sys_tbl::Schemas::Data::DEFAULT)->get_text(&row); } - uint8_t update_type = fields->at(sys_tbl::Schemas::Data::UPDATE_TYPE)->get_uint8(&row); + uint8_t update_type = schema_fields->at(sys_tbl::Schemas::Data::UPDATE_TYPE)->get_uint8(&row); if (xid < table_xid || xid > _max_xid) { continue; @@ -197,22 +197,22 @@ FSCheck::_read_tables(uint64_t db_id) { LOG_INFO("Getting indexes for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map id_to_index; - auto [table, fields] = _get_table_and_fields(db_id); + auto [index_table, index_fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::IndexNames::Primary::key_tuple(table_id_key, 0, 0, 0); - auto table_iter = table->lower_bound(search_key); - for (; table_iter != table->end(); ++table_iter) { + auto table_iter = index_table->lower_bound(search_key); + for (; table_iter != index_table->end(); ++table_iter) { auto &row = *table_iter; - uint64_t table_id = fields->at(sys_tbl::IndexNames::Data::TABLE_ID)->get_uint64(&row); + uint64_t table_id = index_fields->at(sys_tbl::IndexNames::Data::TABLE_ID)->get_uint64(&row); if (table_id != table_id_key) { break; } - uint64_t index_id = fields->at(sys_tbl::IndexNames::Data::INDEX_ID)->get_uint64(&row); - uint64_t xid = fields->at(sys_tbl::IndexNames::Data::XID)->get_uint64(&row); - uint64_t lsn = fields->at(sys_tbl::IndexNames::Data::LSN)->get_uint64(&row); - uint64_t namespace_id = fields->at(sys_tbl::IndexNames::Data::NAMESPACE_ID)->get_uint64(&row); - std::string name(fields->at(sys_tbl::IndexNames::Data::NAME)->get_text(&row)); - uint8_t state = fields->at(sys_tbl::IndexNames::Data::STATE)->get_uint8(&row); - bool is_unique = fields->at(sys_tbl::IndexNames::Data::IS_UNIQUE)->get_bool(&row); + uint64_t index_id = index_fields->at(sys_tbl::IndexNames::Data::INDEX_ID)->get_uint64(&row); + uint64_t xid = index_fields->at(sys_tbl::IndexNames::Data::XID)->get_uint64(&row); + uint64_t lsn = index_fields->at(sys_tbl::IndexNames::Data::LSN)->get_uint64(&row); + uint64_t namespace_id = index_fields->at(sys_tbl::IndexNames::Data::NAMESPACE_ID)->get_uint64(&row); + std::string name(index_fields->at(sys_tbl::IndexNames::Data::NAME)->get_text(&row)); + uint8_t state = index_fields->at(sys_tbl::IndexNames::Data::STATE)->get_uint8(&row); + bool is_unique = index_fields->at(sys_tbl::IndexNames::Data::IS_UNIQUE)->get_bool(&row); if (xid < table_xid || xid > _max_xid) { continue; @@ -246,8 +246,8 @@ FSCheck::_read_tables(uint64_t db_id) columns[position] = column_id; LOG_INFO("\t\tAdding index column: index_id {}, index_xid {}, position = {}, column_id {}", index_id, idx_xid, position, column_id); } - for (auto col_iter: columns) { - Index::Column column{col_iter.first, col_iter.second}; + for (const auto &[idx_position, col_position]: columns) { + Index::Column column{idx_position, col_position}; fs_index.index.columns.push_back(column); } id_to_index[index_id] = fs_index; @@ -260,9 +260,7 @@ FSCheck::_read_tables(uint64_t db_id) // 7. read all roots per index and set primary key positions LOG_INFO("Getting roots for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); - for (auto idx_iter: id_to_index) { - uint64_t index_id = idx_iter.first; - FSIndex fs_index = idx_iter.second; + for (const auto &[index_id, fs_index]: id_to_index) { LOG_INFO("\tGetting roots for index {}", index_id); // 8. set primary key positions in the table @@ -303,27 +301,30 @@ FSCheck::_read_tables(uint64_t db_id) // 10. set the roots in the table for (auto root: roots) { - _db_tbl_id_map.at(std::make_pair(db_id, table_id_key)).index_xid_to_root.insert(std::make_pair(std::make_pair(index_id, root.xid), root)); + _db_tbl_id_map.at(std::make_pair(db_id, table_id_key)) + .index_xid_to_root + .try_emplace({index_id, root.xid}, root); } } } - // 10. read all stats for this table + // 11. read all stats for this table { LOG_INFO("Getting stats for table {}:{}, xid {}", table_id_key, it->second.name, it->second.xid); std::map xid_to_stats; - auto [table, fields] = _get_table_and_fields(db_id); + + auto [stats_table, stats_fields] = _get_table_and_fields(db_id); auto search_key = sys_tbl::TableStats::Primary::key_tuple(table_id_key, 0); - auto table_iter = table->lower_bound(search_key); - for (; table_iter != table->end(); ++table_iter) { + auto table_iter = stats_table->lower_bound(search_key); + for (; table_iter != stats_table->end(); ++table_iter) { auto &row = *table_iter; - uint64_t table_id = fields->at(sys_tbl::TableStats::Data::TABLE_ID)->get_uint64(&row); + uint64_t table_id = stats_fields->at(sys_tbl::TableStats::Data::TABLE_ID)->get_uint64(&row); if (table_id != table_id_key) { break; } - uint64_t xid = fields->at(sys_tbl::TableStats::Data::XID)->get_uint64(&row); - uint64_t row_count = fields->at(sys_tbl::TableStats::Data::ROW_COUNT)->get_uint64(&row); - uint64_t end_offset = fields->at(sys_tbl::TableStats::Data::END_OFFSET)->get_uint64(&row); + uint64_t xid = stats_fields->at(sys_tbl::TableStats::Data::XID)->get_uint64(&row); + uint64_t row_count = stats_fields->at(sys_tbl::TableStats::Data::ROW_COUNT)->get_uint64(&row); + uint64_t end_offset = stats_fields->at(sys_tbl::TableStats::Data::END_OFFSET)->get_uint64(&row); if (xid < table_xid) { continue; @@ -346,10 +347,7 @@ void FSCheck::check_dbs() { // iterate over databases - for (const auto &db_id_name: _databases) { - uint64_t db_id = db_id_name.first; - const std::string &db_name = db_id_name.second; - + for (const auto &[db_id, db_name]: _databases) { LOG_INFO("Verifying database {}:{}", db_id, db_name); _check_db(db_id, db_name); } @@ -417,7 +415,7 @@ FSCheck::_validate_primary_extent(std::shared_ptr
table, ExtentSchemaPtr } // Verify extents for the primary key - BTree::Iterator btree_iter = table_btree->begin(); + auto btree_iter = table_btree->begin(); while(btree_iter != table_btree->end()) { const Extent::Row &btree_row = *btree_iter; uint64_t extent_id = extent_id_field->get_uint64(&btree_row); @@ -456,9 +454,9 @@ FSCheck::_validate_secondary_extents(std::shared_ptr
table, ExtentSchemaP FieldArrayPtr key_fields = index_btree_schema->get_sort_fields(); LOG_INFO("\tSecondary index: schema size {}, fields size {}", index_btree_schema->get_sort_keys().size(), key_fields->size()); - // Verify extents for the primary key + // Verify extents for the secondary key std::set extent_set; - BTree::Iterator btree_iter = table_btree->begin(); + auto btree_iter = table_btree->begin(); while(btree_iter != table_btree->end()) { const Extent::Row &btree_row = *btree_iter; uint64_t extent_id = extent_id_field->get_uint64(&btree_row); @@ -466,14 +464,14 @@ FSCheck::_validate_secondary_extents(std::shared_ptr
table, ExtentSchemaP extent_set.insert(extent_id); StorageCache::SafePagePtr page = table->read_page(extent_id); - StorageCache::Page::Iterator page_iter = page->begin(); + auto page_iter = page->begin(); page_iter += row_id; Extent::Row table_extent_row = *(page_iter); - std::shared_ptr key_tuple = std::make_shared(key_fields, &btree_row); + auto key_tuple = std::make_shared(key_fields, &btree_row); auto btree_keys = index_btree_schema->tuple_subset(key_tuple, index_table_cols); - std::shared_ptr table_extent_row_tuple = std::make_shared(table_fields, &table_extent_row); + auto table_extent_row_tuple = std::make_shared(table_fields, &table_extent_row); auto table_keys = table_schema->tuple_subset(table_extent_row_tuple, index_table_cols); CHECK(btree_keys->size() == table_keys->size()); CHECK(table_keys->equal_prefix(*btree_keys)); @@ -500,8 +498,7 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab // 2. Verify column xids std::vector columns; - for (auto col_it: fs_table.pos_to_column) { - const struct SchemaColumn column = col_it.second; + for (const auto &[pos, column]: fs_table.pos_to_column) { LOG_INFO("\tVerifying Column {}:{}, type {}, pg_type {}, nullable {}, pkey_position {}, default: {}", column.position, column.name, to_string(column.type), column.pg_type, column.nullable, (column.pkey_position.has_value())? column.pkey_position.value(): -1, @@ -509,18 +506,16 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab ); CHECK(column.exists); CHECK(column.xid >= fs_table.xid); - columns.push_back(col_it.second); + columns.push_back(column); } // 3. Verify indexes xids and roots std::vector roots; - uint64_t root_sxid; - uint64_t row_count; - uint64_t end_offset; + uint64_t root_sxid = constant::INVALID_XID; + uint64_t row_count = 0; + uint64_t end_offset = 0; std::vector secondary_indexes; - for (auto idx_it: fs_table.id_to_index) { - uint64_t index_id = idx_it.first; - const struct FSIndex fs_index = idx_it.second; + for (const auto &[index_id, fs_index]: fs_table.id_to_index) { const struct Index index = fs_index.index; LOG_INFO("\tVerifying Index {}:{}:{}, is_unique {}, state {}", index.schema, index.id, index.name, index.is_unique, index.state @@ -565,7 +560,7 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab } // 5. Create table - ExtentSchemaPtr schema = std::make_shared(columns); + auto schema = std::make_shared(columns); auto tbl_meta = std::make_shared(); tbl_meta->roots = roots; From e6b2c8630c0573d7ccaa5026651c7414d7b322ed Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 18:08:34 +0000 Subject: [PATCH 03/10] fixed snapshot xid --- src/test/file_system_check.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index f8078e9df..c37bf3bd9 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -511,7 +511,7 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab // 3. Verify indexes xids and roots std::vector roots; - uint64_t root_sxid = constant::INVALID_XID; + uint64_t root_sxid = constant::LATEST_XID; uint64_t row_count = 0; uint64_t end_offset = 0; std::vector secondary_indexes; @@ -559,6 +559,8 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab secondary_indexes.push_back(index); } + CHECK(root_sxid != constant::LATEST_XID); + // 5. Create table auto schema = std::make_shared(columns); auto tbl_meta = std::make_shared(); From 3722f76889edc7c667b59cef4ee0d9f1693b19a6 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 18:09:38 +0000 Subject: [PATCH 04/10] added explicit --- include/test/file_system_check.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/test/file_system_check.hh b/include/test/file_system_check.hh index 0a2d28cc6..b7a974127 100644 --- a/include/test/file_system_check.hh +++ b/include/test/file_system_check.hh @@ -10,7 +10,7 @@ namespace springtail::test { class FSCheck { public: - FSCheck(uint64_t max_xid = constant::LATEST_XID); + explicit FSCheck(uint64_t max_xid = constant::LATEST_XID); ~FSCheck() = default; /** From 78f7683500441a0be5fa5472b3141ec773c72ae1 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 18:41:42 +0000 Subject: [PATCH 05/10] fixed some sonar errors --- src/sys_tbl_mgr/test/file_system_check.cc | 2 +- src/test/file_system_check.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sys_tbl_mgr/test/file_system_check.cc b/src/sys_tbl_mgr/test/file_system_check.cc index e677c8798..0187173b7 100644 --- a/src/sys_tbl_mgr/test/file_system_check.cc +++ b/src/sys_tbl_mgr/test/file_system_check.cc @@ -22,7 +22,7 @@ main(int argc, char *argv[]) // no logging springtail_init(false, std::nullopt, LOG_NONE); - std::shared_ptr fs_check = std::make_shared(max_xid); + auto fs_check = std::make_shared(max_xid); fs_check->check_dbs(); fs_check.reset(); diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index c37bf3bd9..6cf4bfb98 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -423,7 +423,7 @@ FSCheck::_validate_primary_extent(std::shared_ptr
table, ExtentSchemaPtr StorageCache::SafePagePtr page = table->read_page(extent_id); StorageCache::Page::Iterator page_iter = page->last(); - Extent::Row table_extent_last_row = *(page_iter); + Extent::Row table_extent_last_row = *page_iter; if (table->has_primary()) { FieldTuple key_tuple(key_fields, &btree_row); FieldTuple table_extent_last_row_tuple(table_key_fields, &table_extent_last_row); @@ -466,7 +466,7 @@ FSCheck::_validate_secondary_extents(std::shared_ptr
table, ExtentSchemaP StorageCache::SafePagePtr page = table->read_page(extent_id); auto page_iter = page->begin(); page_iter += row_id; - Extent::Row table_extent_row = *(page_iter); + Extent::Row table_extent_row = *page_iter; auto key_tuple = std::make_shared(key_fields, &btree_row); auto btree_keys = index_btree_schema->tuple_subset(key_tuple, index_table_cols); From e95bdb449db46c6f345d1f4c0b6d7e3a90d8c4d6 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 7 Aug 2025 18:45:52 +0000 Subject: [PATCH 06/10] another sonar fix --- src/test/file_system_check.cc | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index 6cf4bfb98..bfee3f5ce 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -19,22 +19,23 @@ FSCheck::FSCheck(uint64_t max_xid) : _max_xid(max_xid) _table_base = Properties::make_absolute_path(_table_base); LOG_INFO("Verifying tables at table_base = {}, max_xid = {}", _table_base.string(), _max_xid); - for (auto it = _databases.begin(); it != _databases.end(); ) { - auto db_id = it->first; - auto db_name = it->second; + std::erase_if(_databases, [this](auto& pair) { + auto db_id = pair.first; + auto db_name = pair.second; uint64_t cutoff_xid = Vacuumer::get_instance()->get_vacuum_cutoff_xid(db_id); bool vacuumer_enabled = Vacuumer::get_instance()->is_enabled(); LOG_INFO("\tDatabase {}, cutoff xid {}", db_id, cutoff_xid); if (!vacuumer_enabled || _max_xid >= cutoff_xid) { _read_namespaces(db_id); _read_tables(db_id); - ++it; + // keep database + return false; } else { - LOG_INFO("\tDatabase {} - skipping validation, cutoff xid {} > max xid {}", db_id, cutoff_xid, max_xid); - it = _databases.erase(it); + LOG_INFO("\tDatabase {} - skipping validation, cutoff xid {} > max xid {}", db_id, cutoff_xid, _max_xid); + // erase database + return true; } - } - + }); } template From dc1309e4e0d93580154cb2a93431a8678b1f5497 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Mon, 11 Aug 2025 17:42:11 +0000 Subject: [PATCH 07/10] added the ability to verify file system for every xid up to maximum given xid --- include/test/file_system_check.hh | 41 ++++-- src/sys_tbl_mgr/test/file_system_check.cc | 4 +- src/test/file_system_check.cc | 155 +++++++++++++++------- 3 files changed, 142 insertions(+), 58 deletions(-) diff --git a/include/test/file_system_check.hh b/include/test/file_system_check.hh index b7a974127..7882d3fb7 100644 --- a/include/test/file_system_check.hh +++ b/include/test/file_system_check.hh @@ -16,31 +16,36 @@ namespace springtail::test { /** * @brief Check all databases * + * @param all_xids - check all available xids */ void - check_dbs(); + check_dbs(bool all_xids = false); /** * @brief Check given database * * @param db_id - database id + * @param all_xids - check all available xids */ void - check_db(uint64_t db_id); + check_db(uint64_t db_id, bool all_xids = false); /** * @brief Check given table in the given database * * @param db_id - database id * @param table_id - table id + * @param all_xids - check all available xids */ void - check_db_table(uint64_t db_id, uint64_t table_id); + check_db_table(uint64_t db_id, uint64_t table_id, bool all_xids = false); private: std::map _databases; ///< map of database id to database name + std::map _db_id_to_cutoff_xid; ///< cuttoff xid per database std::filesystem::path _table_base; ///< directory where all the tables are stored uint64_t _max_xid; ///< maximum xid + uint64_t _max_recorded_xid{0}; /** * @brief Storage for namespace data @@ -122,17 +127,28 @@ namespace springtail::test { * @brief Read all namespaces for the database * * @param db_id - database id + * @param max_xid - maximum xid that limits data scan */ void - _read_namespaces(uint64_t db_id); + _read_namespaces(uint64_t db_id, uint64_t max_xid); /** * @brief Read all table data for the given database * - * @param db_id + * @param db_id - database id + * @param max_xid - maximum xid that limits data scan + */ + void + _read_tables(uint64_t db_id, uint64_t max_xid); + + /** + * @brief Read all information for the given database from the system tables. + * + * @param db_id - database id + * @param max_xid - maximum xid that limits data scan */ void - _read_tables(uint64_t db_id); + _read_database_info(uint64_t db_id, uint64_t max_xid); /** * @brief Validate primary key index @@ -156,10 +172,11 @@ namespace springtail::test { * @brief Internal function for checking specific database. * * @param db_id - database id - * @param db_name - database name + * @param first_xid - first xid + * @param cuttoff_xid - cutoff xid */ void - _check_db(uint64_t db_id, const std::string &db_name); + _check_db(uint64_t db_id, uint64_t first_xid, uint64_t cutoff_xid); /** * @brief Internal function to reading specific database table @@ -183,6 +200,14 @@ namespace springtail::test { template std::pair>> _get_table_and_fields(uint64_t db_id); + + /** + * @brief Save the next max recorded xid if applicable + * + * @param xid - xid value + */ + void + _record_max_xid(uint64_t xid); }; } // springtail::test \ No newline at end of file diff --git a/src/sys_tbl_mgr/test/file_system_check.cc b/src/sys_tbl_mgr/test/file_system_check.cc index 0187173b7..e22e7b3b8 100644 --- a/src/sys_tbl_mgr/test/file_system_check.cc +++ b/src/sys_tbl_mgr/test/file_system_check.cc @@ -11,9 +11,11 @@ int main(int argc, char *argv[]) { uint64_t max_xid = 0; + bool all_xids = false; po::options_description desc("Options"); desc.add_options()("max_xid,mx", po::value(&max_xid)->default_value(constant::LATEST_XID), "Maximum xid, default is the latest"); + desc.add_options()("all_xids,ax", po::value(&all_xids)->default_value(false), "Flag to check all xids, default is the false"); po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); @@ -23,7 +25,7 @@ main(int argc, char *argv[]) springtail_init(false, std::nullopt, LOG_NONE); auto fs_check = std::make_shared(max_xid); - fs_check->check_dbs(); + fs_check->check_dbs(all_xids); fs_check.reset(); springtail_shutdown(); diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index bfee3f5ce..7fa6099fd 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -19,23 +19,16 @@ FSCheck::FSCheck(uint64_t max_xid) : _max_xid(max_xid) _table_base = Properties::make_absolute_path(_table_base); LOG_INFO("Verifying tables at table_base = {}, max_xid = {}", _table_base.string(), _max_xid); - std::erase_if(_databases, [this](auto& pair) { - auto db_id = pair.first; - auto db_name = pair.second; - uint64_t cutoff_xid = Vacuumer::get_instance()->get_vacuum_cutoff_xid(db_id); - bool vacuumer_enabled = Vacuumer::get_instance()->is_enabled(); - LOG_INFO("\tDatabase {}, cutoff xid {}", db_id, cutoff_xid); + bool vacuumer_enabled = Vacuumer::get_instance()->is_enabled(); + for (const auto &[db_id, db_name]: _databases) { + uint64_t cutoff_xid = 0; + if (vacuumer_enabled) { + cutoff_xid = Vacuumer::get_instance()->get_vacuum_cutoff_xid(db_id); + } if (!vacuumer_enabled || _max_xid >= cutoff_xid) { - _read_namespaces(db_id); - _read_tables(db_id); - // keep database - return false; - } else { - LOG_INFO("\tDatabase {} - skipping validation, cutoff xid {} > max xid {}", db_id, cutoff_xid, _max_xid); - // erase database - return true; + _db_id_to_cutoff_xid.insert(std::make_pair(db_id, cutoff_xid)); } - }); + } } template @@ -84,7 +77,7 @@ FSCheck::_get_table_and_fields(uint64_t db_id) } void -FSCheck::_read_namespaces(uint64_t db_id) +FSCheck::_read_namespaces(uint64_t db_id, uint64_t max_xid) { LOG_INFO("Namespaces for db {}", db_id); auto [table, fields] = _get_table_and_fields(db_id); @@ -96,8 +89,9 @@ FSCheck::_read_namespaces(uint64_t db_id) uint64_t lsn = fields->at(sys_tbl::NamespaceNames::Data::LSN)->get_uint64(&row); bool exists = fields->at(sys_tbl::NamespaceNames::Data::EXISTS)->get_bool(&row); LOG_INFO("\tNamespace {}:{}", ns_id, name); + _record_max_xid(xid); - if (xid > _max_xid) { + if (xid > max_xid) { break; } @@ -111,7 +105,7 @@ FSCheck::_read_namespaces(uint64_t db_id) } void -FSCheck::_read_tables(uint64_t db_id) +FSCheck::_read_tables(uint64_t db_id, uint64_t max_xid) { LOG_INFO("Tables for db {}", db_id); auto [table, fields] = _get_table_and_fields(db_id); @@ -124,8 +118,9 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t xid = fields->at(sys_tbl::TableNames::Data::XID)->get_uint64(&row); uint64_t lsn = fields->at(sys_tbl::TableNames::Data::LSN)->get_uint64(&row); bool exists = fields->at(sys_tbl::TableNames::Data::EXISTS)->get_bool(&row); + _record_max_xid(xid); - if (xid > _max_xid) { + if (xid > max_xid) { break; } @@ -172,8 +167,9 @@ FSCheck::_read_tables(uint64_t db_id) default_value = schema_fields->at(sys_tbl::Schemas::Data::DEFAULT)->get_text(&row); } uint8_t update_type = schema_fields->at(sys_tbl::Schemas::Data::UPDATE_TYPE)->get_uint8(&row); + _record_max_xid(xid); - if (xid < table_xid || xid > _max_xid) { + if (xid < table_xid || xid > max_xid) { continue; } @@ -214,8 +210,9 @@ FSCheck::_read_tables(uint64_t db_id) std::string name(index_fields->at(sys_tbl::IndexNames::Data::NAME)->get_text(&row)); uint8_t state = index_fields->at(sys_tbl::IndexNames::Data::STATE)->get_uint8(&row); bool is_unique = index_fields->at(sys_tbl::IndexNames::Data::IS_UNIQUE)->get_bool(&row); + _record_max_xid(xid); - if (xid < table_xid || xid > _max_xid) { + if (xid < table_xid || xid > max_xid) { continue; } @@ -286,12 +283,14 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t root_xid = root_fields->at(sys_tbl::TableRoots::Data::XID)->get_uint64(&root_row); uint64_t root_extent_id = root_fields->at(sys_tbl::TableRoots::Data::EXTENT_ID)->get_uint64(&root_row); uint64_t root_snapshot_xid = root_fields->at(sys_tbl::TableRoots::Data::SNAPSHOT_XID)->get_uint64(&root_row); + _record_max_xid(root_xid); + _record_max_xid(root_snapshot_xid); if (root_xid < table_xid) { continue; } - if (root_xid > _max_xid) { + if (root_xid > max_xid) { break; } @@ -326,12 +325,13 @@ FSCheck::_read_tables(uint64_t db_id) uint64_t xid = stats_fields->at(sys_tbl::TableStats::Data::XID)->get_uint64(&row); uint64_t row_count = stats_fields->at(sys_tbl::TableStats::Data::ROW_COUNT)->get_uint64(&row); uint64_t end_offset = stats_fields->at(sys_tbl::TableStats::Data::END_OFFSET)->get_uint64(&row); + _record_max_xid(xid); if (xid < table_xid) { continue; } - if (xid > _max_xid) { + if (xid > max_xid) { break; } @@ -345,46 +345,105 @@ FSCheck::_read_tables(uint64_t db_id) } void -FSCheck::check_dbs() +FSCheck::_record_max_xid(uint64_t xid) +{ + if (xid > _max_recorded_xid) { + _max_recorded_xid = xid; + } +} + +void +FSCheck::_read_database_info(uint64_t db_id, uint64_t max_xid) { + _max_recorded_xid = 0; + _db_ns_id_map.clear(); + _db_tbl_id_map.clear(); + _read_namespaces(db_id, max_xid); + _read_tables(db_id, max_xid); +} + +void +FSCheck::check_dbs(bool all_xids) +{ + uint64_t first_xid = 1; + if (!all_xids) { + first_xid = _max_xid; + } + // iterate over databases - for (const auto &[db_id, db_name]: _databases) { - LOG_INFO("Verifying database {}:{}", db_id, db_name); - _check_db(db_id, db_name); + for (const auto &[db_id, cutoff_xid]: _db_id_to_cutoff_xid) { + _check_db(db_id, first_xid, cutoff_xid); } } void -FSCheck::check_db(uint64_t db_id) +FSCheck::check_db(uint64_t db_id, bool all_xids) { - const std::string &db_name = _databases.at(db_id); - LOG_INFO("Verifying database {}:{}", db_id, db_name); - _check_db(db_id, db_name); + uint64_t first_xid = 1; + if (!all_xids) { + first_xid = _max_xid; + } + + uint64_t cutoff_xid = _db_id_to_cutoff_xid.at(db_id); + _check_db(db_id, first_xid, cutoff_xid); } void -FSCheck::_check_db(uint64_t db_id, const std::string &db_name) +FSCheck::_check_db(uint64_t db_id, uint64_t first_xid, uint64_t cutoff_xid) { - for (auto it = _db_tbl_id_map.lower_bound(std::make_pair(db_id, 0)); - it != _db_tbl_id_map.end() && it->first.first == db_id; ++it) { - LOG_INFO("Verifying table {}:{}:{}", it->first.first, it->first.second, it->second.name); - _check_db_table(db_id, db_name, it->second); + const std::string &db_name = _databases.at(db_id); + LOG_INFO("Verifying database {}:{} with first_xid = {} and cuttoff_xid = {}", + db_id, db_name, first_xid, cutoff_xid); + + uint64_t start_xid = (first_xid < cutoff_xid)? cutoff_xid : first_xid; + for (uint64_t max_xid = start_xid; max_xid <= _max_xid; ++max_xid) { + LOG_INFO("Verifying database {}:{} iteration max_xid = {}", + db_id, db_name, max_xid); + _read_database_info(db_id, max_xid); + if (_max_recorded_xid < max_xid) { + break; + } + + for (auto it = _db_tbl_id_map.lower_bound(std::make_pair(db_id, 0)); + it != _db_tbl_id_map.end() && it->first.first == db_id; ++it) { + LOG_INFO("Verifying table {}:{}:{}", it->first.first, it->first.second, it->second.name); + _check_db_table(db_id, db_name, it->second); + } + if (max_xid == _max_xid) { + break; + } } } void -FSCheck::check_db_table(uint64_t db_id, uint64_t table_id) +FSCheck::check_db_table(uint64_t db_id, uint64_t table_id, bool all_xids) { + uint64_t first_xid = 1; + if (!all_xids) { + first_xid = _max_xid; + } + + uint64_t cutoff_xid = _db_id_to_cutoff_xid.at(db_id); const std::string &db_name = _databases.at(db_id); - LOG_INFO("Verifying database {}:{} for table {}", db_id, db_name, table_id); + LOG_INFO("Verifying database {}:{} table {} with first_xid = {} and cuttoff_xid = {}", + db_id, db_name, table_id, first_xid, cutoff_xid); + uint64_t start_xid = (first_xid < cutoff_xid)? cutoff_xid : first_xid; + for (uint64_t max_xid = start_xid; max_xid < _max_xid; ++max_xid) { + LOG_INFO("Verifying database {}:{} table {} iteration max_xid = {}", + db_id, db_name, table_id, max_xid); + _read_database_info(db_id, max_xid); + if (_max_recorded_xid < max_xid) { + break; + } - std::pair key = std::make_pair(db_id, table_id); - auto it = _db_tbl_id_map.lower_bound(key); - if (it == _db_tbl_id_map.end() || it->first != key) { - LOG_ERROR("Database {}:{}: table {} is not found", db_id, db_name, table_id); - CHECK(false); + std::pair key = std::make_pair(db_id, table_id); + auto it = _db_tbl_id_map.lower_bound(key); + if (it == _db_tbl_id_map.end() || it->first != key) { + LOG_ERROR("Database {}:{}: table {} is not found", db_id, db_name, table_id); + CHECK(false); + } + _check_db_table(db_id, db_name, it->second); } - _check_db_table(db_id, db_name, it->second); } void @@ -539,11 +598,9 @@ FSCheck::_check_db_table(uint64_t db_id, const std::string &db_name, const FSTab last_root = &(idx_root_it->second); LOG_INFO("\t\tVerifying roots: root.xid = {}, fs_table.xid = {}", root.xid, fs_table.xid); CHECK(root.xid >= fs_table.xid); - if (root.extent_id != constant::UNKNOWN_EXTENT) { - auto stat_it = fs_table.xid_to_stats.find(root.xid); - CHECK(stat_it != fs_table.xid_to_stats.end()); - last_stat = &(stat_it->second); - } + auto stat_it = fs_table.xid_to_stats.find(root.xid); + CHECK(stat_it != fs_table.xid_to_stats.end()); + last_stat = &(stat_it->second); } if (last_root != nullptr) { From 0496e126967c7c7077c5aa56b74bbe230a5e174b Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Thu, 14 Aug 2025 14:28:30 +0000 Subject: [PATCH 08/10] made function constant --- include/storage/vacuumer.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/storage/vacuumer.hh b/include/storage/vacuumer.hh index 2b7689029..492c8c74a 100644 --- a/include/storage/vacuumer.hh +++ b/include/storage/vacuumer.hh @@ -105,7 +105,7 @@ public: * @return true - enabled * @return false - disabled */ - bool is_enabled() { return _vacuum_start_enabled; } + bool is_enabled() const { return _vacuum_start_enabled; } protected: /** * @brief Constructor, that inits the vacuumer thread From c6a4a7293bad3d3b1daac5dac32b5f1a9dee65e3 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Fri, 22 Aug 2025 10:33:52 -0700 Subject: [PATCH 09/10] removed function call --- include/storage/vacuumer.hh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/storage/vacuumer.hh b/include/storage/vacuumer.hh index 20b93860f..ec47770ba 100644 --- a/include/storage/vacuumer.hh +++ b/include/storage/vacuumer.hh @@ -92,14 +92,6 @@ public: _global_file_size_threshold = size; } - /** - * @brief Get vacuum-safe XID for a DB - * - * @param db_id Database ID - * @return XID until which vacuum can run - */ - uint64_t get_vacuum_cutoff_xid(uint64_t db_id); - /** * @brief Return whether the vacuumer is enabled * From 03e1e515c4c0ac45a0b710fa6805e0be73f4cc68 Mon Sep 17 00:00:00 2001 From: Ella Baron Date: Mon, 25 Aug 2025 15:19:00 +0000 Subject: [PATCH 10/10] bug fix --- include/test/file_system_check.hh | 11 +++++------ src/sys_tbl_mgr/test/file_system_check.cc | 4 ++-- src/test/file_system_check.cc | 15 ++++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/test/file_system_check.hh b/include/test/file_system_check.hh index 7882d3fb7..e17ec454e 100644 --- a/include/test/file_system_check.hh +++ b/include/test/file_system_check.hh @@ -10,25 +10,23 @@ namespace springtail::test { class FSCheck { public: - explicit FSCheck(uint64_t max_xid = constant::LATEST_XID); + explicit FSCheck(uint64_t max_xid = constant::LATEST_XID, bool all_xids = false); ~FSCheck() = default; /** * @brief Check all databases * - * @param all_xids - check all available xids */ void - check_dbs(bool all_xids = false); + check_dbs(); /** * @brief Check given database * * @param db_id - database id - * @param all_xids - check all available xids */ void - check_db(uint64_t db_id, bool all_xids = false); + check_db(uint64_t db_id); /** * @brief Check given table in the given database @@ -45,7 +43,8 @@ namespace springtail::test { std::map _db_id_to_cutoff_xid; ///< cuttoff xid per database std::filesystem::path _table_base; ///< directory where all the tables are stored uint64_t _max_xid; ///< maximum xid - uint64_t _max_recorded_xid{0}; + uint64_t _max_recorded_xid{0}; ///< maximum xid found in system tables + bool _all_xids; ///< iterate over all xids /** * @brief Storage for namespace data diff --git a/src/sys_tbl_mgr/test/file_system_check.cc b/src/sys_tbl_mgr/test/file_system_check.cc index e22e7b3b8..e62088d6b 100644 --- a/src/sys_tbl_mgr/test/file_system_check.cc +++ b/src/sys_tbl_mgr/test/file_system_check.cc @@ -24,8 +24,8 @@ main(int argc, char *argv[]) // no logging springtail_init(false, std::nullopt, LOG_NONE); - auto fs_check = std::make_shared(max_xid); - fs_check->check_dbs(all_xids); + auto fs_check = std::make_shared(max_xid, all_xids); + fs_check->check_dbs(); fs_check.reset(); springtail_shutdown(); diff --git a/src/test/file_system_check.cc b/src/test/file_system_check.cc index b0c0d2f40..4aa035233 100644 --- a/src/test/file_system_check.cc +++ b/src/test/file_system_check.cc @@ -9,7 +9,7 @@ using namespace springtail; using namespace springtail::test; -FSCheck::FSCheck(uint64_t max_xid) : _max_xid(max_xid) +FSCheck::FSCheck(uint64_t max_xid, bool all_xids) : _max_xid(max_xid), _all_xids(all_xids) { // get all database ids _databases = Properties::get_databases(); @@ -364,10 +364,10 @@ FSCheck::_read_database_info(uint64_t db_id, uint64_t max_xid) } void -FSCheck::check_dbs(bool all_xids) +FSCheck::check_dbs() { uint64_t first_xid = 1; - if (!all_xids) { + if (!_all_xids) { first_xid = _max_xid; } @@ -378,10 +378,10 @@ FSCheck::check_dbs(bool all_xids) } void -FSCheck::check_db(uint64_t db_id, bool all_xids) +FSCheck::check_db(uint64_t db_id) { uint64_t first_xid = 1; - if (!all_xids) { + if (!_all_xids) { first_xid = _max_xid; } @@ -401,7 +401,7 @@ FSCheck::_check_db(uint64_t db_id, uint64_t first_xid, uint64_t cutoff_xid) LOG_INFO("Verifying database {}:{} iteration max_xid = {}", db_id, db_name, max_xid); _read_database_info(db_id, max_xid); - if (_max_recorded_xid < max_xid) { + if (_all_xids && _max_recorded_xid < max_xid) { break; } @@ -410,7 +410,8 @@ FSCheck::_check_db(uint64_t db_id, uint64_t first_xid, uint64_t cutoff_xid) LOG_INFO("Verifying table {}:{}:{}", it->first.first, it->first.second, it->second.name); _check_db_table(db_id, db_name, it->second); } - if (max_xid == _max_xid) { + + if (!_all_xids) { break; } }