From d4ccf296099f0ba47245658f6678e8a4fe5ac893 Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Sun, 9 Nov 2025 12:26:00 -0500 Subject: [PATCH 01/14] Did the first draft of concurrent implementation according to concurrent ART paper --- concurrent/ConcurrentART.h | 297 ++++++++++++++++++++++++++++++++++ concurrent/VersionControl.cpp | 85 ++++++++++ concurrent/VersionControl.h | 41 +++++ 3 files changed, 423 insertions(+) create mode 100644 concurrent/ConcurrentART.h create mode 100644 concurrent/VersionControl.cpp create mode 100644 concurrent/VersionControl.h diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h new file mode 100644 index 0000000..b04cef0 --- /dev/null +++ b/concurrent/ConcurrentART.h @@ -0,0 +1,297 @@ +#pragma once + +#include "../ART.h" +#include "../ArtNode.h" +#include "VersionControl.h" +#include +#include + + +namespace ART { + +class ConcurrentART : public ART { + public: + ConcurrentART() : ART() {} + + void insert(uint8_t key[], uintptr_t value) { + ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); + } + private: + void insert(ConcurrentART* tree, ArtNode* node, ArtNode** nodeRef, uint8_t key[], int depth, uintptr_t value, int maxKeyLength, ArtNode* parent, int parentVersion) { + // Insert the leaf value into the tree + + uint64_t version = readLockOrRestart(node); + + // Handle prefix of inner node + if (node->prefixLength) { + unsigned mismatchPos = + prefixMismatch(node, key, depth, maxKeyLength); + if (mismatchPos != node->prefixLength) { + // Prefix differs, create new node + + upgradeToWriteLockOrRestart(parent, parentVersion); + upgradeToWriteLockOrRestart(node, version, parent); + + Node4* newNode = new Node4(); + *nodeRef = newNode; + newNode->prefixLength = mismatchPos; + memcpy(newNode->prefix, node->prefix, + min(mismatchPos, maxPrefixLength)); + // Break up prefix + if (node->prefixLength < maxPrefixLength) { + newNode->insertNode4(this, nodeRef, + node->prefix[mismatchPos], node); + node->prefixLength -= (mismatchPos + 1); + memmove(node->prefix, node->prefix + mismatchPos + 1, + min(node->prefixLength, maxPrefixLength)); + } else { + node->prefixLength -= (mismatchPos + 1); + uint8_t minKey[maxKeyLength]; + loadKey(getLeafValue(minimum(node)), minKey); + newNode->insertNode4(this, nodeRef, + minKey[depth + mismatchPos], node); + memmove(node->prefix, minKey + depth + mismatchPos + 1, + min(node->prefixLength, maxPrefixLength)); + } + newNode->insertNode4(this, nodeRef, key[depth + mismatchPos], + makeLeaf(value)); + + writeUnlock(node); + writeUnlock(parent); + + return; + } + depth += node->prefixLength; + } + + // Recurse + ArtNode** child = findChild(node, key[depth]); + + checkOrRestart(node, version); + + if (!child) { + // Insert leaf into inner node + ArtNode* newNode = makeLeaf(value); + switch (node->type) { + case NodeType4: { + // Cast node to Node4 to access its members + Node4* node4 = static_cast(node); + uint8_t keyByte = key[depth]; + // Insert leaf into inner node + if (node4->count < 4) { + + upgradeToWriteLockOrRestart(node, version); + readUnlockOrRestart(parent, parentVersion, node); + + // Insert element + unsigned pos; + for (pos = 0; (pos < node4->count) && (node4->key[pos] < keyByte); pos++) + ; + // Shift keys and children to the right to make space for the new + // key/child. This preserves the sorted order of keys in the node. + memmove(node4->key + pos + 1, node4->key + pos, node4->count - pos); + memmove(node4->child + pos + 1, node4->child + pos, + (node4->count - pos) * sizeof(uintptr_t)); + node4->key[pos] = keyByte; + node4->child[pos] = newNode; // Use newNode, not child + node4->count++; + + writeUnlock(node); + + } else { + + upgradeToWriteLockOrRestart(parent, parentVersion); + upgradeToWriteLockOrRestart(node, version, parent); + + // Grow to Node16 + Node16* newNode16 = new Node16(); + *nodeRef = newNode16; + newNode16->count = 4; + copyPrefix(node4, newNode16); + for (unsigned i = 0; i < 4; i++) + newNode16->key[i] = flipSign(node4->key[i]); + memcpy(newNode16->child, node4->child, node4->count * sizeof(uintptr_t)); + + writeUnlockObsolete(node); + writeUnlock(parent); + + delete node4; + return newNode16->insertNode16(tree, nodeRef, keyByte, newNode); + } + break; + } + case NodeType16: { + // Cast node to Node16 to access its members + Node16* node16 = static_cast(node); + uint8_t keyByte = key[depth]; + + // Insert leaf into inner node + if (node16->count < 16) { + // Insert element + + upgradeToWriteLockOrRestart(node, version); + readUnlockOrRestart(parent, parentVersion, node); + + // Flip the sign bit of the key byte for correct ordering in signed + // comparisons + uint8_t keyByteFlipped = flipSign(keyByte); + + // SIMD: Compare keyByteFlipped with all keys in the node in parallel + // _mm_set1_epi8 sets all 16 bytes of an SSE register to keyByteFlipped + // _mm_loadu_si128 loads the node's keys into an SSE register + // _mm_cmplt_epi8 does a signed comparison of each byte + __m128i cmp = _mm_cmplt_epi8( + _mm_set1_epi8(keyByteFlipped), + _mm_loadu_si128(reinterpret_cast<__m128i*>(node16->key))); + + // _mm_movemask_epi8 creates a 16-bit mask from the comparison results + // Only consider the bits for the active keys (node16->count) + uint16_t bitfield = + _mm_movemask_epi8(cmp) & (0xFFFF >> (16 - node16->count)); + + // Find the position of the first set bit (i.e., where keyByteFlipped < + // key[i]) + unsigned pos = bitfield ? ctz(bitfield) : node16->count; + + // Shift keys and children to the right to make space for the new + // key/child. This preserves the sorted order of keys in the node. + memmove(node16->key + pos + 1, node16->key + pos, node16->count - pos); + memmove(node16->child + pos + 1, node16->child + pos, + (node16->count - pos) * sizeof(uintptr_t)); + node16->key[pos] = keyByteFlipped; + node16->child[pos] = newNode; + node16->count++; + + writeUnlock(node); + + } else { + + upgradeToWriteLockOrRestart(parent, parentVersion); + upgradeToWriteLockOrRestart(node, version, parent); + + // Grow to Node48 + Node48* newNode = new Node48(); + *nodeRef = newNode; + memcpy(newNode->child, node16->child, node16->count * sizeof(uintptr_t)); + for (unsigned i = 0; i < node16->count; i++) + newNode->childIndex[flipSign(node16->key[i])] = i; + copyPrefix(node16, newNode); + newNode->count = node16->count; + + writeUnlockObsolete(node); + writeUnlock(parent); + + delete node16; + return newNode->insertNode48(tree, nodeRef, keyByte, newNode); + } + break; + } + case NodeType48: { + // Cast node to Node48 to access its members + Node48* node48 = static_cast(node); + uint8_t keyByte = key[depth]; + + // Insert leaf into inner node + if (node48->count < 48) { + + upgradeToWriteLockOrRestart(node, version); + readUnlockOrRestart(parent, parentVersion, node); + + // Insert element + unsigned pos = node48->count; + if (node48->child[pos]) + for (pos = 0; node48->child[pos] != NULL; pos++) + ; + // No memmove needed here because Node48 uses a mapping (childIndex) and + // a dense array. + node48->child[pos] = newNode; + node48->childIndex[keyByte] = pos; + node48->count++; + + writeUnlock(node); + + } else { + // Grow to Node256 + + upgradeToWriteLockOrRestart(parent, parentVersion); + upgradeToWriteLockOrRestart(node, version, parent); + + Node256* newNode = new Node256(); + for (unsigned i = 0; i < 256; i++) + if (node48->childIndex[i] != 48) + newNode->child[i] = node48->child[node48->childIndex[i]]; + newNode->count = node48->count; + copyPrefix(node48, newNode); + *nodeRef = newNode; + + writeUnlockObsolete(node); + writeUnlock(parent); + + delete node48; + return newNode->insertNode256(tree, nodeRef, keyByte, newNode); + } + break; + } + case NodeType256: { + // Cast node to Node256 to access its members + Node256* node256 = static_cast(node); + uint8_t keyByte = key[depth]; + + upgradeToWriteLockOrRestart(node, version); + readUnlockOrRestart(parent, parentVersion, node); + + // Insert leaf into inner node + // No memmove needed here because Node256 uses a direct mapping for all + // possible keys. + node256->count++; + node256->child[keyByte] = newNode; + + writeUnlock(node); + + break; + } + } + return; + } + + if (parent != nullptr) { + readUnlockOrRestart(parent, parentVersion); + } + + if (isLeaf(*child)) { + + upgradeToWriteLockOrRestart(node, version); + + depth++; + // Replace leaf with Node4 and store both leaves in it + uint8_t existingKey[maxKeyLength]; + loadKey(getLeafValue(*child), existingKey); + unsigned newPrefixLength = 0; + while (existingKey[depth + newPrefixLength] == + key[depth + newPrefixLength]) + newPrefixLength++; + + Node4* newNode = new Node4(); + newNode->prefixLength = newPrefixLength; + memcpy(newNode->prefix, key + depth, + min(newPrefixLength, maxPrefixLength)); + + ArtNode* oldLeaf = *child; + *child = newNode; + + newNode->insertNode4(this, child, + existingKey[depth + newPrefixLength], oldLeaf); + newNode->insertNode4(this, child, key[depth + newPrefixLength], + makeLeaf(value)); + + writeUnlock(node); + + return; + } + + insert(tree, *child, child, key, depth + 1, value, maxKeyLength, node, version); + return; + } +}; + +} // namespace ART \ No newline at end of file diff --git a/concurrent/VersionControl.cpp b/concurrent/VersionControl.cpp new file mode 100644 index 0000000..603752b --- /dev/null +++ b/concurrent/VersionControl.cpp @@ -0,0 +1,85 @@ +#include "VersionControl.h" + +namespace ART { + +uint64_t readLockOrRestart(ArtNode* node) { + uint64_t version = awaitNodeUnlocked(node); + if (isObsolete(version)) { + throw RestartException(); + } + return version; +} + +void checkOrRestart(ArtNode* node, uint64_t version) { + readUnlockOrRestart(node, version); +} + +void readUnlockOrRestart(ArtNode* node, uint64_t version) { + if (version != node->version.load()) { + throw RestartException(); + } +} + +void readUnlockOrRestart(ArtNode* node, uint64_t version, ArtNode* lockedNode) { + if (version != node->version.load()) { + writeUnlock(lockedNode); + throw RestartException(); + } +} + +void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version) { + uint64_t expected = version; + if (!node->version.compare_exchange_strong(expected, setLockedBit(version))) { + throw RestartException(); + } +} + +void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version, ArtNode* lockedNode) { + uint64_t expected = version; + if (!node->version.compare_exchange_strong(expected, setLockedBit(version))) { + writeUnlock(lockedNode); + throw RestartException(); + } +} + +void writeLockOrRestart(ArtNode* node) { + uint64_t version; + do { + version = readLockOrRestart(node); + try { + upgradeToWriteLockOrRestart(node, version); + break; // Successfully acquired write lock + } catch (const RestartException&) { + // Continue loop to retry + } + } while (true); +} + +void writeUnlock(ArtNode* node) { + // reset locked bit and overflow into version + node->version.fetch_add(2); +} + +void writeUnlockObsolete(ArtNode* node) { + // set obsolete, reset locked, overflow into version + node->version.fetch_add(3); +} + +uint64_t awaitNodeUnlocked(ArtNode* node) { + uint64_t version = node->version.load(); + while ((version & 2) == 2) { // spinlock while locked + PAUSE(); + version = node->version.load(); + } + return version; +} + +uint64_t setLockedBit(uint64_t version) { + return version + 2; +} + +bool isObsolete(uint64_t version) { + return (version & 1) == 1; +} + +} // namespace ART \ No newline at end of file diff --git a/concurrent/VersionControl.h b/concurrent/VersionControl.h new file mode 100644 index 0000000..e9b6155 --- /dev/null +++ b/concurrent/VersionControl.h @@ -0,0 +1,41 @@ +#pragma once + +#include "../ArtNode.h" +#include +#include +#include + +#ifdef _MSC_VER +#include +#define PAUSE() _mm_pause() +#else +#include +#define PAUSE() _mm_pause() +#endif + +namespace ART { + +class RestartException : public std::exception { +public: + const char* what() const noexcept override { + return "Operation needs to restart"; + } +}; + +// Version control function declarations - matching your implementation +uint64_t readLockOrRestart(ArtNode* node); +void checkOrRestart(ArtNode* node, uint64_t version); +void readUnlockOrRestart(ArtNode* node, uint64_t version); +void readUnlockOrRestart(ArtNode* node, uint64_t version, ArtNode* lockedNode); +void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version); +void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version, ArtNode* lockedNode); +void writeLockOrRestart(ArtNode* node); +void writeUnlock(ArtNode* node); +void writeUnlockObsolete(ArtNode* node); + +// Helper functions +uint64_t awaitNodeUnlocked(ArtNode* node); +uint64_t setLockedBit(uint64_t version); +bool isObsolete(uint64_t version); + +} // namespace ART \ No newline at end of file From 2aa73afb628b80b6415414bd6b8448334ee9bf36 Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Sun, 9 Nov 2025 12:51:36 -0500 Subject: [PATCH 02/14] Implemented a benchmark, having compile issues --- .vscode/settings.json | 4 +- ArtNode.h | 5 + CMakeLists.txt | 1 + concurrent/ConcurrentART.h | 26 ++- concurrent/run_cc.cpp | 387 +++++++++++++++++++++++++++++++++++++ run_bin.cpp | 376 +++++++++++++++++++++++++++++++++++ 6 files changed, 796 insertions(+), 3 deletions(-) create mode 100644 concurrent/run_cc.cpp create mode 100644 run_bin.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index 636e8a1..2bb6935 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -79,6 +79,8 @@ "cassert": "cpp", "csignal": "cpp", "regex": "cpp", - "*.ipp": "cpp" + "*.ipp": "cpp", + "barrier": "cpp", + "text_encoding": "cpp" }, } \ No newline at end of file diff --git a/ArtNode.h b/ArtNode.h index d7a19ba..c68c2e5 100644 --- a/ArtNode.h +++ b/ArtNode.h @@ -25,6 +25,8 @@ #include #include +#include // for concurrency + #include "Helper.h" namespace ART { @@ -51,6 +53,9 @@ struct ArtNode { // compressed path (prefix) uint8_t prefix[maxPrefixLength]; + // version for concurrency + std::atomic version{0}; + ArtNode(int8_t type) : prefixLength(0), count(0), type(type) {} }; diff --git a/CMakeLists.txt b/CMakeLists.txt index 3315bfd..6f34d8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,3 +24,4 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") add_executable(main main.cpp) add_executable(insert_profiling insert_profiling.cpp) add_executable(run run.cpp) +add_executable(run_cc concurrent/run_cc.cpp) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index b04cef0..d678410 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -11,12 +11,34 @@ namespace ART { class ConcurrentART : public ART { public: - ConcurrentART() : ART() {} + ConcurrentART() : ART() , concurrent_mode{false} {} void insert(uint8_t key[], uintptr_t value) { - ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); + if (!concurrent_mode) { + // Use sequential insert for the first insertions + ART::insert(key, value); + } else { + // Use concurrent insert after switching to concurrent mode + bool restart; + do { + restart = false; + try { + ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); + } catch (const RestartException&) { + restart = true; + } + } while (restart); + } + } + + void enableConcurrentMode() { + concurrent_mode = true; } + private: + + bool concurrent_mode; + void insert(ConcurrentART* tree, ArtNode* node, ArtNode** nodeRef, uint8_t key[], int depth, uintptr_t value, int maxKeyLength, ArtNode* parent, int parentVersion) { // Insert the leaf value into the tree diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp new file mode 100644 index 0000000..e3c0f97 --- /dev/null +++ b/concurrent/run_cc.cpp @@ -0,0 +1,387 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ART.h" +#include "../ArtNode.h" +#include "../Chain.h" +#include "../Helper.h" +#include "ConcurrentART.h" + +using namespace std; + +struct Config { + bool verbose = false; + int N = 500000000; + string input_file; + int num_threads = 1; + int query_threads = 1; + int runs = 1; + string results_csv = "results.csv"; +}; + +template +std::vector read_bin(const char* filename) { + std::ifstream inputFile(filename, std::ios::binary); + inputFile.seekg(0, std::ios::end); + const std::streampos fileSize = inputFile.tellg(); + inputFile.seekg(0, std::ios::beg); + std::vector data(fileSize / sizeof(key_type)); + inputFile.read(reinterpret_cast(data.data()), fileSize); + return data; +} + +namespace utils { +namespace executor { + +template +class Workload { +private: + Tree& tree; + const Config& config; + +public: + Workload(Tree& t, const Config& conf) : tree(t), config(conf) {} + + struct WorkerStats { + long long insertion_time = 0; + long long query_time = 0; + size_t keys_processed = 0; + size_t queries_processed = 0; + }; + + // Insert operation for a single thread + void insert_worker(const std::vector& keys, + size_t start_idx, size_t end_idx, + std::barrier<>& sync_point, + WorkerStats& stats) { + + // Wait for all threads to be ready + sync_point.arrive_and_wait(); + + auto start_time = chrono::high_resolution_clock::now(); + + for (size_t i = start_idx; i < end_idx; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + + auto op_start = chrono::high_resolution_clock::now(); + tree.insert(key, keys[i]); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + stats.insertion_time += duration.count(); + stats.keys_processed++; + } + + auto end_time = chrono::high_resolution_clock::now(); + if (config.verbose) { + auto total_time = chrono::duration_cast(end_time - start_time); + cout << "Thread processed " << stats.keys_processed + << " insertions in " << total_time.count() << " ns" << endl; + } + } + + // Query operation for a single thread + void query_worker(const std::vector& keys, + const std::vector& random_indices, + size_t start_idx, size_t end_idx, + std::barrier<>& sync_point, + WorkerStats& stats) { + + // Wait for all threads to be ready + sync_point.arrive_and_wait(); + + auto start_time = chrono::high_resolution_clock::now(); + + for (size_t i = start_idx; i < end_idx; i++) { + int random = random_indices[i]; + uint8_t key[4]; + ART::loadKey(keys[random], key); + + auto op_start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree.lookup(key); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + stats.query_time += duration.count(); + stats.queries_processed++; + + assert(ART::isLeaf(leaf) && ART::getLeafValue(leaf) == keys[random]); + } + + auto end_time = chrono::high_resolution_clock::now(); + if (config.verbose) { + auto total_time = chrono::duration_cast(end_time - start_time); + cout << "Thread processed " << stats.queries_processed + << " queries in " << total_time.count() << " ns" << endl; + } + } + + // Run insertion workload + pair run_insertions(const std::vector& keys) { + WorkerStats total_stats; + + // HYBRID MODE: Always insert first 2 keys sequentially + auto seq_start = chrono::high_resolution_clock::now(); + for (size_t i = 0; i < 2; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + + auto op_start = chrono::high_resolution_clock::now(); + tree.insert(key, keys[i]); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + total_stats.insertion_time += duration.count(); + total_stats.keys_processed++; + } + auto seq_stop = chrono::high_resolution_clock::now(); + + if (config.verbose) { + auto seq_time = chrono::duration_cast(seq_stop - seq_start); + cout << "Sequential phase: " << seq_time.count() << " ns" << endl; + } + + // Enable concurrent mode and continue with remaining keys + if (keys.size() > 2) { + tree.enableConcurrentMode(); + + if (config.num_threads == 1) { + // Single-threaded for remaining keys + for (size_t i = 2; i < keys.size(); i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + + auto op_start = chrono::high_resolution_clock::now(); + tree.insert(key, keys[i]); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + total_stats.insertion_time += duration.count(); + total_stats.keys_processed++; + } + } else { + // Multi-threaded for remaining keys + vector threads; + vector worker_stats(config.num_threads); + barrier sync_point(config.num_threads); + + size_t remaining_keys = keys.size() - 2; + size_t keys_per_thread = remaining_keys / config.num_threads; + + auto concurrent_start = chrono::high_resolution_clock::now(); + + for (int t = 0; t < config.num_threads; t++) { + size_t start_idx = 2 + t * keys_per_thread; + size_t end_idx = (t == config.num_threads - 1) ? keys.size() : (2 + (t + 1) * keys_per_thread); + + threads.emplace_back(&Workload::insert_worker, this, + ref(keys), start_idx, end_idx, + ref(sync_point), ref(worker_stats[t])); + } + + for (auto& t : threads) { + t.join(); + } + + auto concurrent_stop = chrono::high_resolution_clock::now(); + auto concurrent_wall_time = chrono::duration_cast(concurrent_stop - concurrent_start); + + // Aggregate worker stats + for (const auto& stats : worker_stats) { + total_stats.keys_processed += stats.keys_processed; + } + + total_stats.insertion_time += concurrent_wall_time.count(); + + if (config.verbose) { + cout << "Concurrent phase wall time: " << concurrent_wall_time.count() << " ns" << endl; + + long long total_cpu_time = 0; + for (const auto& stats : worker_stats) { + total_cpu_time += stats.insertion_time; + } + cout << "Concurrent phase CPU time: " << total_cpu_time << " ns" << endl; + } + } + } + + return {total_stats.insertion_time, total_stats}; + } + + // Run query workload + pair run_queries(const std::vector& keys) { + WorkerStats total_stats; + size_t num_queries = keys.size() / 100; + + // Pre-generate random indices + srand(time(0)); + vector random_indices(num_queries); + for (size_t i = 0; i < num_queries; i++) { + random_indices[i] = rand() % keys.size(); + } + + if (config.query_threads == 1) { + // Single-threaded queries + auto start_time = chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < num_queries; i++) { + int random = random_indices[i]; + uint8_t key[4]; + ART::loadKey(keys[random], key); + + auto op_start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree.lookup(key); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + total_stats.query_time += duration.count(); + total_stats.queries_processed++; + + assert(ART::isLeaf(leaf) && ART::getLeafValue(leaf) == keys[random]); + } + + auto end_time = chrono::high_resolution_clock::now(); + auto wall_time = chrono::duration_cast(end_time - start_time); + + return {wall_time.count(), total_stats}; + } else { + // Multi-threaded queries + vector threads; + vector worker_stats(config.query_threads); + barrier sync_point(config.query_threads); + + size_t queries_per_thread = num_queries / config.query_threads; + + auto concurrent_start = chrono::high_resolution_clock::now(); + + for (int t = 0; t < config.query_threads; t++) { + size_t start_idx = t * queries_per_thread; + size_t end_idx = (t == config.query_threads - 1) ? num_queries : (t + 1) * queries_per_thread; + + threads.emplace_back(&Workload::query_worker, this, + ref(keys), ref(random_indices), + start_idx, end_idx, + ref(sync_point), ref(worker_stats[t])); + } + + for (auto& t : threads) { + t.join(); + } + + auto concurrent_stop = chrono::high_resolution_clock::now(); + auto wall_time = chrono::duration_cast(concurrent_stop - concurrent_start); + + // Aggregate stats + for (const auto& stats : worker_stats) { + total_stats.queries_processed += stats.queries_processed; + } + + if (config.verbose) { + long long total_cpu_time = 0; + for (const auto& stats : worker_stats) { + total_cpu_time += stats.query_time; + } + cout << "Query wall time: " << wall_time.count() << " ns" << endl; + cout << "Query CPU time: " << total_cpu_time << " ns" << endl; + } + + return {wall_time.count(), total_stats}; + } + } + + void run_all(const std::vector& keys) { + if (config.verbose) { + cout << "Running workload with " << config.num_threads + << " insertion threads, " << config.query_threads << " query threads" << endl; + } + + // Run insertions + auto [insertion_time, insert_stats] = run_insertions(keys); + + // Run queries + auto [query_time, query_stats] = run_queries(keys); + + if (config.verbose) { + cout << "Total insertion time: " << insertion_time << " ns" << endl; + cout << "Total query time: " << query_time << " ns" << endl; + + double insertion_throughput = (double)insert_stats.keys_processed / (insertion_time / 1e9); + double query_throughput = (double)query_stats.queries_processed / (query_time / 1e9); + + cout << "Insertion throughput: " << insertion_throughput << " ops/sec" << endl; + cout << "Query throughput: " << query_throughput << " ops/sec" << endl; + } + + // Output CSV format + cout << config.num_threads << "," << config.query_threads << "," + << insertion_time << "," << query_time << "," + << insert_stats.keys_processed << "," << query_stats.queries_processed << endl; + } +}; + +} // namespace executor +} // namespace utils + +int main(int argc, char** argv) { + Config config; + + // Parse arguments + for (int i = 1; i < argc;) { + if (string(argv[i]) == "-v") { + config.verbose = true; + i++; + } else if (string(argv[i]) == "-N") { + config.N = atoi(argv[i + 1]); + i += 2; + } else if (string(argv[i]) == "-f") { + config.input_file = argv[i + 1]; + i += 2; + } else if (string(argv[i]) == "-it") { + config.num_threads = atoi(argv[i + 1]); + i += 2; + } else if (string(argv[i]) == "-qt") { + config.query_threads = atoi(argv[i + 1]); + i += 2; + } else if (string(argv[i]) == "-runs") { + config.runs = atoi(argv[i + 1]); + i += 2; + } else { + i++; + } + } + + // Load data + auto keys = read_bin(config.input_file.c_str()); + config.N = keys.size(); + + if (config.verbose) { + cout << "Loaded " << keys.size() << " keys" << endl; + cout << "Configuration:" << endl; + cout << " Insertion threads: " << config.num_threads << endl; + cout << " Query threads: " << config.query_threads << endl; + cout << " Runs: " << config.runs << endl; + } + + cout << "insertion_threads,query_threads,insertion_time_ns,query_time_ns,keys_inserted,queries_processed" << endl; + + // Run multiple iterations + for (int run = 0; run < config.runs; run++) { + if (config.verbose && config.runs > 1) { + cout << "Run " << (run + 1) << "/" << config.runs << endl; + } + + ART::ConcurrentART tree; + utils::executor::Workload workload(tree, config); + workload.run_all(keys); + } + + return 0; +} \ No newline at end of file diff --git a/run_bin.cpp b/run_bin.cpp new file mode 100644 index 0000000..03fae44 --- /dev/null +++ b/run_bin.cpp @@ -0,0 +1,376 @@ +#include +#include +#include +#include +#include + +#include "ART.h" +#include "ArtNode.h" +#include "Chain.h" +#include "Helper.h" +#include "trees/QuART_lil.h" +#include "trees/QuART_tail.h" +#include "trees/QuART_stail.h" +#include "trees/QuART_lil_can.h" +#include "trees/QuART_stail_reset.h"s + +using namespace std; + +template +std::vector read_bin(const char* filename) { + std::ifstream inputFile(filename, std::ios::binary); + inputFile.seekg(0, std::ios::end); + const std::streampos fileSize = inputFile.tellg(); + inputFile.seekg(0, std::ios::beg); + std::vector data(fileSize / sizeof(key_type)); + inputFile.read(reinterpret_cast(data.data()), fileSize); + return data; +} + +int main(int argc, char** argv) { + bool verbose = false; // optional argument + int N = 500000000; // optional argument + string input_file; // required argument + string tree_type = "ART"; // default tree type + + + // Query 1% of entries + uint64_t minval = 0; + + // Parse arguments; make sure to increment i by 2 if you consume an argument + for (int i = 1; i < argc;) { + if (string(argv[i]) == "-v") { + verbose = true; + i++; + } else if (string(argv[i]) == "-N") { + N = atoi(argv[i + 1]); + i += 2; + } else if (string(argv[i]) == "-f") { + input_file = argv[i + 1]; + i += 2; + } else if (string(argv[i]) == "-t") { + tree_type = argv[i + 1]; + i += 2; + } else { + i++; + } + } + + // read data + auto keys = read_bin(input_file.c_str()); + + cout << "keys size: " << keys.size() << endl; + + // Use the actual number of keys loaded, not the default N + N = keys.size(); + + // Update maxval to match the actual data size + uint64_t maxval = N - 1; + + if (tree_type == "ART") { + ART::ART* tree = new ART::ART(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_tail") { + ART::QuART_tail* tree = new ART::QuART_tail(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_lil") { + ART::QuART_lil* tree = new ART::QuART_lil(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_stail") { + ART::QuART_stail* tree = new ART::QuART_stail(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_stail_vdebug") { + ART::QuART_stail_vdebug* tree = new ART::QuART_stail_vdebug(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_lil_can") { + ART::QuART_lil_can* tree = new ART::QuART_lil_can(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } else if (tree_type == "QuART_stail_reset") { + ART::QuART_stail_reset* tree = new ART::QuART_stail_reset(); + long long insertion_time = 0; + for (uint64_t i = 0; i < N; i++) { + uint8_t key[4]; + ART::loadKey(keys[i], key); + auto start = chrono::high_resolution_clock::now(); + tree->insert(key, keys[i]); + + // cout << "fp value is " << ART::getLeafValue(tree->fp_leaf) << " after inserting key " + //<< keys[i] << endl; + + + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + insertion_time += duration.count(); + } + + if (verbose) { + cout << "Tree type: " << tree_type << endl; + cout << "Insertion time: " << insertion_time << " ns" << endl; + } + + srand(time(0)); + + long long query_time = 0; + for (uint64_t i = 0; i < (N / 100); i++) { + int random = rand() % (maxval - minval + 1) + minval; + uint8_t key[4]; + ART::loadKey(keys[random], key); + auto start = chrono::high_resolution_clock::now(); + ART::ArtNode* leaf = tree->lookup(key); + auto stop = chrono::high_resolution_clock::now(); + auto duration = + chrono::duration_cast(stop - start); + query_time += duration.count(); + assert(ART::isLeaf(leaf) && + ART::getLeafValue(leaf) == keys[random]); + } + + if (verbose) { + cout << "Query time: " << query_time << " ns" << endl; + } + + // Output the times in csv format, including tree type + cout << insertion_time << "," << query_time << endl; + } + else { + cerr << "Unknown tree type: " << tree_type << endl; + return 1; + } + return 0; +} From 500ad15ccfee72f6c7b04f425897e4b67c6815bc Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Mon, 10 Nov 2025 00:28:10 -0500 Subject: [PATCH 03/14] Benchmark builds correctly --- CMakeLists.txt | 4 +++- concurrent/ConcurrentART.h | 1 + concurrent/run_cc.cpp | 17 +++-------------- run.cpp | 2 +- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f34d8b..dfa9757 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,9 @@ project(ART VERSION 1.0 DESCRIPTION "DISC fork of Adaptive Radix Tree" LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) # Default to Release only if no build type is set (and not a multi-config generator) if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index d678410..d842d8e 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -3,6 +3,7 @@ #include "../ART.h" #include "../ArtNode.h" #include "VersionControl.h" +#include "VersionControl.cpp" #include #include diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index e3c0f97..4cedcfe 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -22,7 +22,6 @@ struct Config { string input_file; int num_threads = 1; int query_threads = 1; - int runs = 1; string results_csv = "results.csv"; }; @@ -350,9 +349,6 @@ int main(int argc, char** argv) { } else if (string(argv[i]) == "-qt") { config.query_threads = atoi(argv[i + 1]); i += 2; - } else if (string(argv[i]) == "-runs") { - config.runs = atoi(argv[i + 1]); - i += 2; } else { i++; } @@ -367,21 +363,14 @@ int main(int argc, char** argv) { cout << "Configuration:" << endl; cout << " Insertion threads: " << config.num_threads << endl; cout << " Query threads: " << config.query_threads << endl; - cout << " Runs: " << config.runs << endl; } cout << "insertion_threads,query_threads,insertion_time_ns,query_time_ns,keys_inserted,queries_processed" << endl; - // Run multiple iterations - for (int run = 0; run < config.runs; run++) { - if (config.verbose && config.runs > 1) { - cout << "Run " << (run + 1) << "/" << config.runs << endl; - } - ART::ConcurrentART tree; - utils::executor::Workload workload(tree, config); - workload.run_all(keys); - } + ART::ConcurrentART tree; + utils::executor::Workload workload(tree, config); + workload.run_all(keys); return 0; } \ No newline at end of file diff --git a/run.cpp b/run.cpp index 2f73b08..6e87156 100644 --- a/run.cpp +++ b/run.cpp @@ -47,7 +47,7 @@ int main(int argc, char** argv) { N = atoi(argv[i + 1]); i += 2; } else if (string(argv[i]) == "-f") { - input_file = argv[i + 1]; + i += 2; } else if (string(argv[i]) == "-t") { tree_type = argv[i + 1]; From ba457d840fd818738a175f0bcded293185c63b2f Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Mon, 10 Nov 2025 01:47:23 -0500 Subject: [PATCH 04/14] Current issue: parent can be null, for example for readUnlockOrRestart --- concurrent/ConcurrentART.h | 24 +++++------------------- concurrent/run_cc.cpp | 19 ++++++++++--------- run.cpp | 2 +- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index d842d8e..6b14d5e 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -12,28 +12,14 @@ namespace ART { class ConcurrentART : public ART { public: - ConcurrentART() : ART() , concurrent_mode{false} {} + ConcurrentART() : ART() {} void insert(uint8_t key[], uintptr_t value) { - if (!concurrent_mode) { - // Use sequential insert for the first insertions - ART::insert(key, value); - } else { - // Use concurrent insert after switching to concurrent mode - bool restart; - do { - restart = false; - try { - ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); - } catch (const RestartException&) { - restart = true; - } - } while (restart); - } + ART::insert(key, value); } - void enableConcurrentMode() { - concurrent_mode = true; + void insertCC(uint8_t key[], uintptr_t value) { + ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); } private: @@ -92,7 +78,7 @@ class ConcurrentART : public ART { checkOrRestart(node, version); - if (!child) { + if (!*child) { // Insert leaf into inner node ArtNode* newNode = makeLeaf(value); switch (node->type) { diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 4cedcfe..7212688 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -71,7 +71,7 @@ class Workload { ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); - tree.insert(key, keys[i]); + tree.insertCC(key, keys[i]); auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); @@ -134,7 +134,7 @@ class Workload { ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); - tree.insert(key, keys[i]); + tree.insert(key, keys[i]); // we should use ART's insert here auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); @@ -150,7 +150,6 @@ class Workload { // Enable concurrent mode and continue with remaining keys if (keys.size() > 2) { - tree.enableConcurrentMode(); if (config.num_threads == 1) { // Single-threaded for remaining keys @@ -159,7 +158,7 @@ class Workload { ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); - tree.insert(key, keys[i]); + tree.insertCC(key, keys[i]); auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); @@ -332,6 +331,8 @@ class Workload { int main(int argc, char** argv) { Config config; + string input_f; + // Parse arguments for (int i = 1; i < argc;) { if (string(argv[i]) == "-v") { @@ -342,6 +343,7 @@ int main(int argc, char** argv) { i += 2; } else if (string(argv[i]) == "-f") { config.input_file = argv[i + 1]; + input_f = argv[i + 1]; i += 2; } else if (string(argv[i]) == "-it") { config.num_threads = atoi(argv[i + 1]); @@ -355,8 +357,10 @@ int main(int argc, char** argv) { } // Load data - auto keys = read_bin(config.input_file.c_str()); - config.N = keys.size(); + auto keys = read_bin(input_f.c_str()); + + cout << "Succesfully loaded " << keys.size() << " keys from " + << config.input_file << endl; if (config.verbose) { cout << "Loaded " << keys.size() << " keys" << endl; @@ -364,9 +368,6 @@ int main(int argc, char** argv) { cout << " Insertion threads: " << config.num_threads << endl; cout << " Query threads: " << config.query_threads << endl; } - - cout << "insertion_threads,query_threads,insertion_time_ns,query_time_ns,keys_inserted,queries_processed" << endl; - ART::ConcurrentART tree; utils::executor::Workload workload(tree, config); diff --git a/run.cpp b/run.cpp index 6e87156..2f73b08 100644 --- a/run.cpp +++ b/run.cpp @@ -47,7 +47,7 @@ int main(int argc, char** argv) { N = atoi(argv[i + 1]); i += 2; } else if (string(argv[i]) == "-f") { - + input_file = argv[i + 1]; i += 2; } else if (string(argv[i]) == "-t") { tree_type = argv[i + 1]; From 6541a627c5d042cf95224307957fdbea1d259b86 Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Mon, 10 Nov 2025 02:11:09 -0500 Subject: [PATCH 05/14] Current issue: calling default insertNodeX functions. They should all be implemented in the main body --- concurrent/ConcurrentART.h | 34 +++++++++++++++++++++------------- concurrent/VersionControl.cpp | 2 +- concurrent/run_cc.cpp | 5 +++-- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index 6b14d5e..ade2e31 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -38,7 +38,9 @@ class ConcurrentART : public ART { if (mismatchPos != node->prefixLength) { // Prefix differs, create new node - upgradeToWriteLockOrRestart(parent, parentVersion); + if (parent) { + upgradeToWriteLockOrRestart(parent, parentVersion); + } upgradeToWriteLockOrRestart(node, version, parent); Node4* newNode = new Node4(); @@ -66,7 +68,7 @@ class ConcurrentART : public ART { makeLeaf(value)); writeUnlock(node); - writeUnlock(parent); + if (parent) { writeUnlock(parent); } return; } @@ -90,7 +92,7 @@ class ConcurrentART : public ART { if (node4->count < 4) { upgradeToWriteLockOrRestart(node, version); - readUnlockOrRestart(parent, parentVersion, node); + if (parent) { readUnlockOrRestart(parent, parentVersion, node); } // Insert element unsigned pos; @@ -109,7 +111,9 @@ class ConcurrentART : public ART { } else { - upgradeToWriteLockOrRestart(parent, parentVersion); + if (parent) { + upgradeToWriteLockOrRestart(parent, parentVersion); + } upgradeToWriteLockOrRestart(node, version, parent); // Grow to Node16 @@ -122,7 +126,7 @@ class ConcurrentART : public ART { memcpy(newNode16->child, node4->child, node4->count * sizeof(uintptr_t)); writeUnlockObsolete(node); - writeUnlock(parent); + if (parent) { writeUnlock(parent); } delete node4; return newNode16->insertNode16(tree, nodeRef, keyByte, newNode); @@ -139,7 +143,7 @@ class ConcurrentART : public ART { // Insert element upgradeToWriteLockOrRestart(node, version); - readUnlockOrRestart(parent, parentVersion, node); + if (parent) { readUnlockOrRestart(parent, parentVersion, node); } // Flip the sign bit of the key byte for correct ordering in signed // comparisons @@ -175,7 +179,9 @@ class ConcurrentART : public ART { } else { - upgradeToWriteLockOrRestart(parent, parentVersion); + if (parent) { + upgradeToWriteLockOrRestart(parent, parentVersion); + } upgradeToWriteLockOrRestart(node, version, parent); // Grow to Node48 @@ -188,7 +194,7 @@ class ConcurrentART : public ART { newNode->count = node16->count; writeUnlockObsolete(node); - writeUnlock(parent); + if (parent) { writeUnlock(parent); } delete node16; return newNode->insertNode48(tree, nodeRef, keyByte, newNode); @@ -204,7 +210,7 @@ class ConcurrentART : public ART { if (node48->count < 48) { upgradeToWriteLockOrRestart(node, version); - readUnlockOrRestart(parent, parentVersion, node); + if (parent) { readUnlockOrRestart(parent, parentVersion, node); } // Insert element unsigned pos = node48->count; @@ -222,7 +228,9 @@ class ConcurrentART : public ART { } else { // Grow to Node256 - upgradeToWriteLockOrRestart(parent, parentVersion); + if (parent) { + upgradeToWriteLockOrRestart(parent, parentVersion); + } upgradeToWriteLockOrRestart(node, version, parent); Node256* newNode = new Node256(); @@ -234,7 +242,7 @@ class ConcurrentART : public ART { *nodeRef = newNode; writeUnlockObsolete(node); - writeUnlock(parent); + if (parent) { writeUnlock(parent); } delete node48; return newNode->insertNode256(tree, nodeRef, keyByte, newNode); @@ -247,7 +255,7 @@ class ConcurrentART : public ART { uint8_t keyByte = key[depth]; upgradeToWriteLockOrRestart(node, version); - readUnlockOrRestart(parent, parentVersion, node); + if (parent) { readUnlockOrRestart(parent, parentVersion, node); } // Insert leaf into inner node // No memmove needed here because Node256 uses a direct mapping for all @@ -263,7 +271,7 @@ class ConcurrentART : public ART { return; } - if (parent != nullptr) { + if (parent) { readUnlockOrRestart(parent, parentVersion); } diff --git a/concurrent/VersionControl.cpp b/concurrent/VersionControl.cpp index 603752b..0006b21 100644 --- a/concurrent/VersionControl.cpp +++ b/concurrent/VersionControl.cpp @@ -37,7 +37,7 @@ void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version) { void upgradeToWriteLockOrRestart(ArtNode* node, uint64_t version, ArtNode* lockedNode) { uint64_t expected = version; if (!node->version.compare_exchange_strong(expected, setLockedBit(version))) { - writeUnlock(lockedNode); + if (lockedNode) { writeUnlock(lockedNode); } throw RestartException(); } } diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 7212688..02c99f0 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -149,16 +149,17 @@ class Workload { } // Enable concurrent mode and continue with remaining keys - if (keys.size() > 2) { + if (config.N > 2) { if (config.num_threads == 1) { // Single-threaded for remaining keys - for (size_t i = 2; i < keys.size(); i++) { + for (size_t i = 2; i < config.N; i++) { uint8_t key[4]; ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); tree.insertCC(key, keys[i]); + tree.printTree(); auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); From f2e843744b8904397acf5904f4e6df7ca10ba3ee Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Mon, 10 Nov 2025 12:05:36 -0500 Subject: [PATCH 06/14] Implemented concurrent ART --- concurrent/ConcurrentART.h | 112 ++++++++++++++++++++++++++++--------- concurrent/run_cc.cpp | 9 ++- 2 files changed, 95 insertions(+), 26 deletions(-) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index ade2e31..bc36c6a 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -19,7 +19,15 @@ class ConcurrentART : public ART { } void insertCC(uint8_t key[], uintptr_t value) { - ConcurrentART::insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); + while (true) { // Retry loop + try { + insert(this, root, &root, key, 0, value, maxPrefixLength, nullptr, 0); + return; // Success - exit retry loop + } catch (const RestartException&) { + // Conflict detected - loop will retry from beginning + continue; + } + } } private: @@ -83,11 +91,12 @@ class ConcurrentART : public ART { if (!*child) { // Insert leaf into inner node ArtNode* newNode = makeLeaf(value); + uint8_t keyByte = key[depth]; + switch (node->type) { case NodeType4: { // Cast node to Node4 to access its members Node4* node4 = static_cast(node); - uint8_t keyByte = key[depth]; // Insert leaf into inner node if (node4->count < 4) { @@ -125,18 +134,50 @@ class ConcurrentART : public ART { newNode16->key[i] = flipSign(node4->key[i]); memcpy(newNode16->child, node4->child, node4->count * sizeof(uintptr_t)); + // Shouldn't be deleted, other threads might still be reading it + // delete node; + + // INSERT NODE 16 PART + + // Flip the sign bit of the key byte for correct ordering in signed + // comparisons + uint8_t keyByteFlipped = flipSign(keyByte); + + // SIMD: Compare keyByteFlipped with all keys in the node in parallel + // _mm_set1_epi8 sets all 16 bytes of an SSE register to keyByteFlipped + // _mm_loadu_si128 loads the node's keys into an SSE register + // _mm_cmplt_epi8 does a signed comparison of each byte + __m128i cmp = _mm_cmplt_epi8( + _mm_set1_epi8(keyByteFlipped), + _mm_loadu_si128(reinterpret_cast<__m128i*>(newNode16->key))); + + // _mm_movemask_epi8 creates a 16-bit mask from the comparison results + // Only consider the bits for the active keys (node16->count) + uint16_t bitfield = + _mm_movemask_epi8(cmp) & (0xFFFF >> (16 - newNode16->count)); + + // Find the position of the first set bit (i.e., where keyByteFlipped < + // key[i]) + unsigned pos = bitfield ? ctz(bitfield) : newNode16->count; + + // Shift keys and children to the right to make space for the new + // key/child. This preserves the sorted order of keys in the node. + memmove(newNode16->key + pos + 1, newNode16->key + pos, newNode16->count - pos); + memmove(newNode16->child + pos + 1, newNode16->child + pos, + (newNode16->count - pos) * sizeof(uintptr_t)); + newNode16->key[pos] = keyByteFlipped; + newNode16->child[pos] = newNode; + newNode16->count++; + writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } - - delete node4; - return newNode16->insertNode16(tree, nodeRef, keyByte, newNode); + } break; } case NodeType16: { // Cast node to Node16 to access its members Node16* node16 = static_cast(node); - uint8_t keyByte = key[depth]; // Insert leaf into inner node if (node16->count < 16) { @@ -185,26 +226,38 @@ class ConcurrentART : public ART { upgradeToWriteLockOrRestart(node, version, parent); // Grow to Node48 - Node48* newNode = new Node48(); - *nodeRef = newNode; - memcpy(newNode->child, node16->child, node16->count * sizeof(uintptr_t)); + Node48* newNode48 = new Node48(); + *nodeRef = newNode48; + memcpy(newNode48->child, node16->child, node16->count * sizeof(uintptr_t)); for (unsigned i = 0; i < node16->count; i++) - newNode->childIndex[flipSign(node16->key[i])] = i; - copyPrefix(node16, newNode); - newNode->count = node16->count; + newNode48->childIndex[flipSign(node16->key[i])] = i; + copyPrefix(node16, newNode48); + newNode48->count = node16->count; + + // Shouldn't be deleted, other threads might still be reading it + // delete node; + + // INSERT NODE 48 PART + + // Insert element + unsigned pos = newNode48->count; + if (newNode48->child[pos]) + for (pos = 0; newNode48->child[pos] != NULL; pos++) + ; + // No memmove needed here because Node48 uses a mapping (childIndex) and + // a dense array. + newNode48->child[pos] = newNode; + newNode48->childIndex[keyByte] = pos; + newNode48->count++; writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } - - delete node16; - return newNode->insertNode48(tree, nodeRef, keyByte, newNode); } break; } case NodeType48: { // Cast node to Node48 to access its members Node48* node48 = static_cast(node); - uint8_t keyByte = key[depth]; // Insert leaf into inner node if (node48->count < 48) { @@ -233,26 +286,35 @@ class ConcurrentART : public ART { } upgradeToWriteLockOrRestart(node, version, parent); - Node256* newNode = new Node256(); + Node256* newNode256 = new Node256(); for (unsigned i = 0; i < 256; i++) if (node48->childIndex[i] != 48) - newNode->child[i] = node48->child[node48->childIndex[i]]; - newNode->count = node48->count; - copyPrefix(node48, newNode); - *nodeRef = newNode; + newNode256->child[i] = node48->child[node48->childIndex[i]]; + newNode256->count = node48->count; + copyPrefix(node48, newNode256); + *nodeRef = newNode256; + + // tree->printTree(); + + // Shouldn't be deleted, other threads might still be reading it + // delete node; + + // INSERT NODE 256 PART + + // Insert leaf into inner node + // No memmove needed here because Node256 uses a direct mapping for all + // possible keys. + newNode256->count++; + newNode256->child[keyByte] = newNode; writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } - - delete node48; - return newNode->insertNode256(tree, nodeRef, keyByte, newNode); } break; } case NodeType256: { // Cast node to Node256 to access its members Node256* node256 = static_cast(node); - uint8_t keyByte = key[depth]; upgradeToWriteLockOrRestart(node, version); if (parent) { readUnlockOrRestart(parent, parentVersion, node); } diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 02c99f0..6627628 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -159,13 +159,20 @@ class Workload { auto op_start = chrono::high_resolution_clock::now(); tree.insertCC(key, keys[i]); - tree.printTree(); + cout << i << endl; auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); total_stats.insertion_time += duration.count(); total_stats.keys_processed++; } + cout << "insert ended" << endl; + /* + uint8_t key[4]; + ART::loadKey(keys[433373], key); + tree.insertCC(key, keys[433373]); + */ + } else { // Multi-threaded for remaining keys vector threads; From 95a962c9dfd3af51bc7b0264788a72b18e63392d Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Mon, 10 Nov 2025 14:50:59 -0500 Subject: [PATCH 07/14] Changed insert order, queries all the values in one qt succesfully --- concurrent/run_cc.cpp | 109 ++++++++++++++++++++++++++++-------------- 1 file changed, 74 insertions(+), 35 deletions(-) diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 6627628..5522517 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -89,7 +89,6 @@ class Workload { // Query operation for a single thread void query_worker(const std::vector& keys, - const std::vector& random_indices, size_t start_idx, size_t end_idx, std::barrier<>& sync_point, WorkerStats& stats) { @@ -100,9 +99,8 @@ class Workload { auto start_time = chrono::high_resolution_clock::now(); for (size_t i = start_idx; i < end_idx; i++) { - int random = random_indices[i]; uint8_t key[4]; - ART::loadKey(keys[random], key); + ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); ART::ArtNode* leaf = tree.lookup(key); @@ -112,7 +110,23 @@ class Workload { stats.query_time += duration.count(); stats.queries_processed++; - assert(ART::isLeaf(leaf) && ART::getLeafValue(leaf) == keys[random]); + if (!leaf) { + cerr << "Thread query " << i << " failed: key " << keys[i] + << " (index " << i << ") returned NULL" << endl; + abort(); + } + if (!ART::isLeaf(leaf)) { + cerr << "Thread query " << i << " failed: key " << keys[i] + << " (index " << i << ") returned non-leaf node" << endl; + abort(); + } + uint64_t found_value = ART::getLeafValue(leaf); + if (found_value != keys[i]) { + cerr << "Thread query " << i << " failed: key " << keys[i] + << " (index " << i << ") expected " << keys[i] + << " but got " << found_value << endl; + abort(); + } } auto end_time = chrono::high_resolution_clock::now(); @@ -159,38 +173,56 @@ class Workload { auto op_start = chrono::high_resolution_clock::now(); tree.insertCC(key, keys[i]); - cout << i << endl; + // cout << i << endl; auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); total_stats.insertion_time += duration.count(); total_stats.keys_processed++; } - cout << "insert ended" << endl; - /* - uint8_t key[4]; - ART::loadKey(keys[433373], key); - tree.insertCC(key, keys[433373]); - */ + cout << "Insert has ended" << endl; } else { - // Multi-threaded for remaining keys + // Multi-threaded for remaining keys - round-robin assignment vector threads; vector worker_stats(config.num_threads); barrier sync_point(config.num_threads); - size_t remaining_keys = keys.size() - 2; - size_t keys_per_thread = remaining_keys / config.num_threads; + // Create per-thread key lists (round-robin distribution) + vector> thread_keys(config.num_threads); + for (size_t i = 2; i < config.N; i++) { + int thread_id = (i - 2) % config.num_threads; + thread_keys[thread_id].push_back(i); + } auto concurrent_start = chrono::high_resolution_clock::now(); for (int t = 0; t < config.num_threads; t++) { - size_t start_idx = 2 + t * keys_per_thread; - size_t end_idx = (t == config.num_threads - 1) ? keys.size() : (2 + (t + 1) * keys_per_thread); - - threads.emplace_back(&Workload::insert_worker, this, - ref(keys), start_idx, end_idx, - ref(sync_point), ref(worker_stats[t])); + threads.emplace_back([this, &keys, &thread_keys, t, &sync_point, &worker_stats]() { + sync_point.arrive_and_wait(); + + auto start_time = chrono::high_resolution_clock::now(); + + for (size_t idx : thread_keys[t]) { + uint8_t key[4]; + ART::loadKey(keys[idx], key); + + auto op_start = chrono::high_resolution_clock::now(); + tree.insertCC(key, keys[idx]); + auto op_stop = chrono::high_resolution_clock::now(); + + auto duration = chrono::duration_cast(op_stop - op_start); + worker_stats[t].insertion_time += duration.count(); + worker_stats[t].keys_processed++; + } + + auto end_time = chrono::high_resolution_clock::now(); + if (config.verbose) { + auto total_time = chrono::duration_cast(end_time - start_time); + cout << "Thread " << t << " processed " << worker_stats[t].keys_processed + << " insertions in " << total_time.count() << " ns" << endl; + } + }); } for (auto& t : threads) { @@ -199,6 +231,7 @@ class Workload { auto concurrent_stop = chrono::high_resolution_clock::now(); auto concurrent_wall_time = chrono::duration_cast(concurrent_stop - concurrent_start); + cout << "Insert has ended" << endl; // Aggregate worker stats for (const auto& stats : worker_stats) { @@ -208,6 +241,7 @@ class Workload { total_stats.insertion_time += concurrent_wall_time.count(); if (config.verbose) { + cout << "Inserted " << total_stats.keys_processed << " keys" << endl; cout << "Concurrent phase wall time: " << concurrent_wall_time.count() << " ns" << endl; long long total_cpu_time = 0; @@ -225,23 +259,15 @@ class Workload { // Run query workload pair run_queries(const std::vector& keys) { WorkerStats total_stats; - size_t num_queries = keys.size() / 100; - - // Pre-generate random indices - srand(time(0)); - vector random_indices(num_queries); - for (size_t i = 0; i < num_queries; i++) { - random_indices[i] = rand() % keys.size(); - } + size_t num_queries = config.N; if (config.query_threads == 1) { // Single-threaded queries auto start_time = chrono::high_resolution_clock::now(); for (size_t i = 0; i < num_queries; i++) { - int random = random_indices[i]; uint8_t key[4]; - ART::loadKey(keys[random], key); + ART::loadKey(keys[i], key); auto op_start = chrono::high_resolution_clock::now(); ART::ArtNode* leaf = tree.lookup(key); @@ -251,7 +277,23 @@ class Workload { total_stats.query_time += duration.count(); total_stats.queries_processed++; - assert(ART::isLeaf(leaf) && ART::getLeafValue(leaf) == keys[random]); + if (!leaf) { + cerr << "Query " << i << " failed: key " << keys[i] + << " (index " << i << ") returned NULL" << endl; + abort(); + } + if (!ART::isLeaf(leaf)) { + cerr << "Query " << i << " failed: key " << keys[i] + << " (index " << i << ") returned non-leaf node" << endl; + abort(); + } + uint64_t found_value = ART::getLeafValue(leaf); + if (found_value != keys[i]) { + cerr << "Query " << i << " failed: key " << keys[i] + << " (index " << i << ") expected value " << keys[i] + << " but got " << found_value << endl; + abort(); + } } auto end_time = chrono::high_resolution_clock::now(); @@ -273,7 +315,7 @@ class Workload { size_t end_idx = (t == config.query_threads - 1) ? num_queries : (t + 1) * queries_per_thread; threads.emplace_back(&Workload::query_worker, this, - ref(keys), ref(random_indices), + ref(keys), start_idx, end_idx, ref(sync_point), ref(worker_stats[t])); } @@ -367,9 +409,6 @@ int main(int argc, char** argv) { // Load data auto keys = read_bin(input_f.c_str()); - cout << "Succesfully loaded " << keys.size() << " keys from " - << config.input_file << endl; - if (config.verbose) { cout << "Loaded " << keys.size() << " keys" << endl; cout << "Configuration:" << endl; From 07cda9ce857f206478ae89bbc11e2b7646f111cf Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Tue, 11 Nov 2025 00:56:25 -0500 Subject: [PATCH 08/14] Changed query to 0.01 of N --- concurrent/ConcurrentART.h | 14 +++++++------- concurrent/run_cc.cpp | 24 +++++++++++++++--------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index bc36c6a..1c269cd 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -52,7 +52,6 @@ class ConcurrentART : public ART { upgradeToWriteLockOrRestart(node, version, parent); Node4* newNode = new Node4(); - *nodeRef = newNode; newNode->prefixLength = mismatchPos; memcpy(newNode->prefix, node->prefix, min(mismatchPos, maxPrefixLength)); @@ -74,7 +73,7 @@ class ConcurrentART : public ART { } newNode->insertNode4(this, nodeRef, key[depth + mismatchPos], makeLeaf(value)); - + *nodeRef = newNode; writeUnlock(node); if (parent) { writeUnlock(parent); } @@ -127,7 +126,6 @@ class ConcurrentART : public ART { // Grow to Node16 Node16* newNode16 = new Node16(); - *nodeRef = newNode16; newNode16->count = 4; copyPrefix(node4, newNode16); for (unsigned i = 0; i < 4; i++) @@ -168,6 +166,7 @@ class ConcurrentART : public ART { newNode16->key[pos] = keyByteFlipped; newNode16->child[pos] = newNode; newNode16->count++; + *nodeRef = newNode16; writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } @@ -227,7 +226,7 @@ class ConcurrentART : public ART { // Grow to Node48 Node48* newNode48 = new Node48(); - *nodeRef = newNode48; + memcpy(newNode48->child, node16->child, node16->count * sizeof(uintptr_t)); for (unsigned i = 0; i < node16->count; i++) newNode48->childIndex[flipSign(node16->key[i])] = i; @@ -249,6 +248,7 @@ class ConcurrentART : public ART { newNode48->child[pos] = newNode; newNode48->childIndex[keyByte] = pos; newNode48->count++; + *nodeRef = newNode48; writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } @@ -292,7 +292,6 @@ class ConcurrentART : public ART { newNode256->child[i] = node48->child[node48->childIndex[i]]; newNode256->count = node48->count; copyPrefix(node48, newNode256); - *nodeRef = newNode256; // tree->printTree(); @@ -306,6 +305,7 @@ class ConcurrentART : public ART { // possible keys. newNode256->count++; newNode256->child[keyByte] = newNode; + *nodeRef = newNode256; writeUnlockObsolete(node); if (parent) { writeUnlock(parent); } @@ -356,13 +356,13 @@ class ConcurrentART : public ART { min(newPrefixLength, maxPrefixLength)); ArtNode* oldLeaf = *child; - *child = newNode; newNode->insertNode4(this, child, existingKey[depth + newPrefixLength], oldLeaf); newNode->insertNode4(this, child, key[depth + newPrefixLength], makeLeaf(value)); - + + *child = newNode; writeUnlock(node); return; diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 5522517..3399f50 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -232,6 +232,8 @@ class Workload { auto concurrent_stop = chrono::high_resolution_clock::now(); auto concurrent_wall_time = chrono::duration_cast(concurrent_stop - concurrent_start); cout << "Insert has ended" << endl; + + // tree.printTree(); // Aggregate worker stats for (const auto& stats : worker_stats) { @@ -259,15 +261,19 @@ class Workload { // Run query workload pair run_queries(const std::vector& keys) { WorkerStats total_stats; - size_t num_queries = config.N; + size_t num_queries = config.N / 100; + + uint64_t minval = 0; + uint64_t maxval = config.N-1; if (config.query_threads == 1) { // Single-threaded queries auto start_time = chrono::high_resolution_clock::now(); for (size_t i = 0; i < num_queries; i++) { + int random = rand() % (maxval - minval + 1) + minval; uint8_t key[4]; - ART::loadKey(keys[i], key); + ART::loadKey(keys[random], key); auto op_start = chrono::high_resolution_clock::now(); ART::ArtNode* leaf = tree.lookup(key); @@ -278,19 +284,19 @@ class Workload { total_stats.queries_processed++; if (!leaf) { - cerr << "Query " << i << " failed: key " << keys[i] - << " (index " << i << ") returned NULL" << endl; + cerr << "Query " << random << " failed: key " << keys[random] + << " (index " << random << ") returned NULL" << endl; abort(); } if (!ART::isLeaf(leaf)) { - cerr << "Query " << i << " failed: key " << keys[i] - << " (index " << i << ") returned non-leaf node" << endl; + cerr << "Query " << random << " failed: key " << keys[random] + << " (index " << random << ") returned non-leaf node" << endl; abort(); } uint64_t found_value = ART::getLeafValue(leaf); - if (found_value != keys[i]) { - cerr << "Query " << i << " failed: key " << keys[i] - << " (index " << i << ") expected value " << keys[i] + if (found_value != keys[random]) { + cerr << "Query " << random << " failed: key " << keys[random] + << " (index " << random << ") expected value " << keys[random] << " but got " << found_value << endl; abort(); } From 96c1824b185624bf9c9d1b87ece02fcbcf2dce1d Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Tue, 11 Nov 2025 21:42:50 -0500 Subject: [PATCH 09/14] Implemented run for txt and bin --- CMakeLists.txt | 3 +- run_bin.cpp | 66 ++++-------------------------------------- run.cpp => run_txt.cpp | 43 ++++++++++++++++++--------- 3 files changed, 37 insertions(+), 75 deletions(-) rename run.cpp => run_txt.cpp (93%) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfa9757..cac1d6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,5 +25,6 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") # Targets add_executable(main main.cpp) add_executable(insert_profiling insert_profiling.cpp) -add_executable(run run.cpp) +add_executable(run_bin run_bin.cpp) +add_executable(run_txt run_txt.cpp) add_executable(run_cc concurrent/run_cc.cpp) diff --git a/run_bin.cpp b/run_bin.cpp index 03fae44..7957dd0 100644 --- a/run_bin.cpp +++ b/run_bin.cpp @@ -12,7 +12,7 @@ #include "trees/QuART_tail.h" #include "trees/QuART_stail.h" #include "trees/QuART_lil_can.h" -#include "trees/QuART_stail_reset.h"s +#include "trees/QuART_stail_reset.h" using namespace std; @@ -33,10 +33,6 @@ int main(int argc, char** argv) { string input_file; // required argument string tree_type = "ART"; // default tree type - - // Query 1% of entries - uint64_t minval = 0; - // Parse arguments; make sure to increment i by 2 if you consume an argument for (int i = 1; i < argc;) { if (string(argv[i]) == "-v") { @@ -56,17 +52,13 @@ int main(int argc, char** argv) { } } + // Used during querying + uint64_t minval = 0; + uint64_t maxval = N - 1; + // read data auto keys = read_bin(input_file.c_str()); - cout << "keys size: " << keys.size() << endl; - - // Use the actual number of keys loaded, not the default N - N = keys.size(); - - // Update maxval to match the actual data size - uint64_t maxval = N - 1; - if (tree_type == "ART") { ART::ART* tree = new ART::ART(); long long insertion_time = 0; @@ -233,49 +225,6 @@ int main(int argc, char** argv) { cout << "Query time: " << query_time << " ns" << endl; } - // Output the times in csv format, including tree type - cout << insertion_time << "," << query_time << endl; - } else if (tree_type == "QuART_stail_vdebug") { - ART::QuART_stail_vdebug* tree = new ART::QuART_stail_vdebug(); - long long insertion_time = 0; - for (uint64_t i = 0; i < N; i++) { - uint8_t key[4]; - ART::loadKey(keys[i], key); - auto start = chrono::high_resolution_clock::now(); - tree->insert(key, keys[i]); - - auto stop = chrono::high_resolution_clock::now(); - auto duration = - chrono::duration_cast(stop - start); - insertion_time += duration.count(); - } - - if (verbose) { - cout << "Tree type: " << tree_type << endl; - cout << "Insertion time: " << insertion_time << " ns" << endl; - } - - srand(time(0)); - - long long query_time = 0; - for (uint64_t i = 0; i < (N / 100); i++) { - int random = rand() % (maxval - minval + 1) + minval; - uint8_t key[4]; - ART::loadKey(keys[random], key); - auto start = chrono::high_resolution_clock::now(); - ART::ArtNode* leaf = tree->lookup(key); - auto stop = chrono::high_resolution_clock::now(); - auto duration = - chrono::duration_cast(stop - start); - query_time += duration.count(); - assert(ART::isLeaf(leaf) && - ART::getLeafValue(leaf) == keys[random]); - } - - if (verbose) { - cout << "Query time: " << query_time << " ns" << endl; - } - // Output the times in csv format, including tree type cout << insertion_time << "," << query_time << endl; } else if (tree_type == "QuART_lil_can") { @@ -328,11 +277,6 @@ int main(int argc, char** argv) { ART::loadKey(keys[i], key); auto start = chrono::high_resolution_clock::now(); tree->insert(key, keys[i]); - - // cout << "fp value is " << ART::getLeafValue(tree->fp_leaf) << " after inserting key " - //<< keys[i] << endl; - - auto stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(stop - start); diff --git a/run.cpp b/run_txt.cpp similarity index 93% rename from run.cpp rename to run_txt.cpp index 2f73b08..a3e5d9d 100644 --- a/run.cpp +++ b/run_txt.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include "ART.h" #include "ArtNode.h" @@ -17,13 +19,29 @@ using namespace std; template -std::vector read_bin(const char* filename) { - std::ifstream inputFile(filename, std::ios::binary); - inputFile.seekg(0, std::ios::end); - const std::streampos fileSize = inputFile.tellg(); - inputFile.seekg(0, std::ios::beg); - std::vector data(fileSize / sizeof(key_type)); - inputFile.read(reinterpret_cast(data.data()), fileSize); +std::vector read_txt(const char* filename) { + std::ifstream inputFile(filename); + if (!inputFile) { + std::cerr << "Failed to open file: " << filename << std::endl; + exit(1); + } + + std::vector data; + std::string line; + + while (std::getline(inputFile, line)) { + // Skip empty lines + if (line.empty()) continue; + + // Parse the number from the line + std::stringstream ss(line); + key_type key; + ss >> key; + + data.push_back(key); + } + + inputFile.close(); return data; } @@ -33,11 +51,6 @@ int main(int argc, char** argv) { string input_file; // required argument string tree_type = "ART"; // default tree type - - // Query 1% of entries - uint64_t minval = 0; - uint64_t maxval = N-1; - // Parse arguments; make sure to increment i by 2 if you consume an argument for (int i = 1; i < argc;) { if (string(argv[i]) == "-v") { @@ -57,8 +70,12 @@ int main(int argc, char** argv) { } } + // Used during querying + uint64_t minval = 0; + uint64_t maxval = N - 1; + // read data - auto keys = read_bin(input_file.c_str()); + auto keys = read_txt(input_file.c_str()); if (tree_type == "ART") { ART::ART* tree = new ART::ART(); From e2f5b46b36c2da7fdd2344590a84f5038826a111 Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Tue, 11 Nov 2025 21:47:47 -0500 Subject: [PATCH 10/14] Added comments to concurrency files --- concurrent/ConcurrentART.h | 7 +++++++ concurrent/VersionControl.cpp | 5 +++++ concurrent/VersionControl.h | 5 +++++ concurrent/run_cc.cpp | 4 ++++ 4 files changed, 21 insertions(+) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index 1c269cd..1cab34e 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -1,3 +1,10 @@ +/* + * Concurrent Adaptive Radix Tree (ART) implementation with concurrency control. + * This implementation extends the base ART to support concurrent insertions + * and queries using version control mechanisms. The insert function is refactored + * according to the implementation in "The ART of Practical Synchronization" paper. + */ + #pragma once #include "../ART.h" diff --git a/concurrent/VersionControl.cpp b/concurrent/VersionControl.cpp index 0006b21..20486ac 100644 --- a/concurrent/VersionControl.cpp +++ b/concurrent/VersionControl.cpp @@ -1,3 +1,8 @@ +/* + * Implementation file for concurrency version control functions. Taken directly from + * "The ART of Practical Synchronization" paper. + */ + #include "VersionControl.h" namespace ART { diff --git a/concurrent/VersionControl.h b/concurrent/VersionControl.h index e9b6155..e9c6da8 100644 --- a/concurrent/VersionControl.h +++ b/concurrent/VersionControl.h @@ -1,3 +1,8 @@ +/* + * Header file for concurrency version control functions. Taken directly from + * "The ART of Practical Synchronization" paper. + */ + #pragma once #include "../ArtNode.h" diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 3399f50..a92bdd8 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -1,3 +1,7 @@ +/* + * Concurrent ART benchmark with concurrency control (CC) insertions and queries. + */ + #include #include #include From 16366edf9ddedd2977b48f73375b00b262f7b5bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Can=20G=C3=B6kmen?= <47925886+cangokmen@users.noreply.github.com> Date: Tue, 11 Nov 2025 22:17:31 -0500 Subject: [PATCH 11/14] Update concurrent/run_cc.cpp, remove commented out line Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- concurrent/run_cc.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index a92bdd8..5548566 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -177,7 +177,6 @@ class Workload { auto op_start = chrono::high_resolution_clock::now(); tree.insertCC(key, keys[i]); - // cout << i << endl; auto op_stop = chrono::high_resolution_clock::now(); auto duration = chrono::duration_cast(op_stop - op_start); From b0e1f0df176a9958df0a16cf29b82bce2b062225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Can=20G=C3=B6kmen?= <47925886+cangokmen@users.noreply.github.com> Date: Tue, 11 Nov 2025 22:17:57 -0500 Subject: [PATCH 12/14] Update concurrent/run_cc.cpp, remove commented out line #2 Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- concurrent/run_cc.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index 5548566..06f1d62 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -236,7 +236,6 @@ class Workload { auto concurrent_wall_time = chrono::duration_cast(concurrent_stop - concurrent_start); cout << "Insert has ended" << endl; - // tree.printTree(); // Aggregate worker stats for (const auto& stats : worker_stats) { From 4a11045ea763854e1688935bb942ed7cdbc2bca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Can=20G=C3=B6kmen?= <47925886+cangokmen@users.noreply.github.com> Date: Tue, 11 Nov 2025 22:19:12 -0500 Subject: [PATCH 13/14] Removed unused variable Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- concurrent/ConcurrentART.h | 1 - 1 file changed, 1 deletion(-) diff --git a/concurrent/ConcurrentART.h b/concurrent/ConcurrentART.h index 1cab34e..6b9fcc3 100644 --- a/concurrent/ConcurrentART.h +++ b/concurrent/ConcurrentART.h @@ -39,7 +39,6 @@ class ConcurrentART : public ART { private: - bool concurrent_mode; void insert(ConcurrentART* tree, ArtNode* node, ArtNode** nodeRef, uint8_t key[], int depth, uintptr_t value, int maxKeyLength, ArtNode* parent, int parentVersion) { // Insert the leaf value into the tree From 1c85687b04ef43d8268765f9ebcf6be93c2082f4 Mon Sep 17 00:00:00 2001 From: Can Gokmen Date: Tue, 11 Nov 2025 22:25:33 -0500 Subject: [PATCH 14/14] Removed unused input variable --- concurrent/run_cc.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/concurrent/run_cc.cpp b/concurrent/run_cc.cpp index a92bdd8..3dadd4e 100644 --- a/concurrent/run_cc.cpp +++ b/concurrent/run_cc.cpp @@ -391,8 +391,6 @@ class Workload { int main(int argc, char** argv) { Config config; - string input_f; - // Parse arguments for (int i = 1; i < argc;) { if (string(argv[i]) == "-v") { @@ -403,7 +401,6 @@ int main(int argc, char** argv) { i += 2; } else if (string(argv[i]) == "-f") { config.input_file = argv[i + 1]; - input_f = argv[i + 1]; i += 2; } else if (string(argv[i]) == "-it") { config.num_threads = atoi(argv[i + 1]); @@ -417,7 +414,7 @@ int main(int argc, char** argv) { } // Load data - auto keys = read_bin(input_f.c_str()); + auto keys = read_bin(config.input_file.c_str()); if (config.verbose) { cout << "Loaded " << keys.size() << " keys" << endl;