Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions script/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
WRITE_FACTOR = 0.4
READ_FACTOR = 0.2
MIXED_RATIO = 0.5
SELECTIVITY_FACTOR = ["0.01", "0.1", "0.25"]
RANGE_QUERY_FACTOR = 0.5
PRELOAD = False


Expand All @@ -27,6 +29,8 @@ def main(args):
"file_type": "binary",
"seed": 0,
"use_preload": PRELOAD,
"range_query_factor": RANGE_QUERY_FACTOR,
"selectivity": SELECTIVITY_FACTOR,
}

exp_pairs = ((file, index) for file in files for index in INDEXES)
Expand Down
15 changes: 12 additions & 3 deletions script/infra/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ def __init__(self, db_path: str) -> None:
preload_creation_time INT,
write_time INT,
mixed_time INT,
read_time INT
read_time INT,
range_read_time_short INT,
range_read_time_mid INT,
range_read_time_long INT,
);
"""
)
Expand Down Expand Up @@ -56,8 +59,11 @@ def log_row(
preload_creation_time,
write_time,
mixed_time,
read_time
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
read_time,
range_read_time_short,
range_read_time_mid,
range_read_time_long
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
(
datetime.now(timezone.utc),
Expand All @@ -75,6 +81,9 @@ def log_row(
stats.write_time,
stats.mixed_time,
stats.read_time,
stats.range_read_time_short,
stats.range_read_time_mid,
stats.range_read_time_long,
),
)
self.db_con.commit()
Expand Down
33 changes: 32 additions & 1 deletion script/infra/pybliss.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from typing import List
import re
import os
import logging
Expand All @@ -14,6 +15,8 @@ class BlissArgs:
write_factor: float
read_factor: float
mixed_ratio: float
range_query_factor: float
selectivity: List[str]
seed: int = 0
file_type: str = "binary"
use_preload: bool = False
Expand All @@ -26,6 +29,9 @@ class BlissStats:
write_time: int
read_time: int
mixed_time: int
range_read_time_short: int = 0
range_read_time_mid: int = 0
range_read_time_long: int = 0


class PyBliss:
Expand All @@ -51,10 +57,13 @@ def __init__(
self.preload_creation_time_regex = re.compile(
r"\[[0-9 :.-]+\] \[info\] Preload Creation Time \(ns\): (\d+)"
)
self.range_read_time_regex = re.compile(
r"\[[0-9 :.-]+\] \[info\] Range Query Times \(ns\) for selectivity \[([\d\., ]+)\]: ([\d\., ]+)"
)

def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats:
if self.smoke_test:
return BlissStats(*(random.randint(0, 2 << 16) for _ in range(5)))
return BlissStats(*(random.randint(0, 2 << 16) for _ in range(8)))

cmd = [
self.bliss_execute_path,
Expand All @@ -67,6 +76,8 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats:
f"--seed {args.seed}",
f"--file_type {'binary' if args.file_type else 'txt'}",
"--use_preload" if args.use_preload else "",
f"--selectivity " + ",".join(args.selectivity),
f"--range_query_factor {args.range_query_factor}",
]
process = subprocess.Popen(
" ".join(cmd),
Expand All @@ -93,6 +104,23 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats:
read_time = self.read_time_regex.search(proc_results)
read_time = int(read_time.group(1)) if read_time else 0

# Extract range query times from the comma-separated list
range_times_match = self.range_read_time_regex.search(proc_results)
range_read_time_short = 0
range_read_time_mid = 0
range_read_time_long = 0

if range_times_match:
range_times_str = range_times_match.group(2)
range_times = [int(x.strip()) for x in range_times_str.split(',')]

if len(range_times) >= 1:
range_read_time_short = range_times[0]
if len(range_times) >= 2:
range_read_time_mid = range_times[1]
if len(range_times) >= 3:
range_read_time_long = range_times[2]

os.makedirs("./run_logs", exist_ok=True)
_, file_name = os.path.split(args.data_file)
file_name, _ = os.path.splitext(file_name + f"_{args.index_type}")
Expand All @@ -105,4 +133,7 @@ def run_single_bliss_bench(self, args: BlissArgs) -> BlissStats:
write_time=write_time,
read_time=read_time,
mixed_time=mixed_time,
range_read_time_short=range_read_time_short,
range_read_time_mid=range_read_time_mid,
range_read_time_long=range_read_time_long,
)
2 changes: 1 addition & 1 deletion src/bliss/bench_alex.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ template <typename KEY_TYPE, typename VALUE_TYPE>
class BlissAlexIndex : public BlissIndex<KEY_TYPE, VALUE_TYPE> {
public:
alex::Alex<KEY_TYPE, VALUE_TYPE> _index;
BlissAlexIndex() : _index(){};
BlissAlexIndex() : _index() {};

void bulkload(
std::vector<std::pair<KEY_TYPE, VALUE_TYPE>> values) override {
Expand Down
20 changes: 19 additions & 1 deletion src/bliss/util/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cxxopts.hpp>
#include <iostream>
#include <string>
#include <vector>

#include "bliss/util/config.h"

Expand Down Expand Up @@ -36,9 +37,24 @@ BlissConfig parse_args(int argc, char *argv[]) {
"file_type", "Input file type [binary | txt]",
cxxopts::value<std::string>()->default_value("txt"))(
"use_preload", "Use index defined preload",
cxxopts::value<bool>()->default_value("false"));
cxxopts::value<bool>()->default_value("false"))(
"range_query_perc", "Range query factor",
cxxopts::value<double>()->default_value("0.0"))(
"selectivity", "Selectivity factor(s) (comma-separated, percentage of domain)",
cxxopts::value<std::string>()->default_value("0.01"));

auto result = options.parse(argc, argv);

// Parse selectivity factors
std::vector<double> selectivities;
std::string selectivity_str = result["selectivity"].as<std::string>();
size_t start = 0, end = 0;
while ((end = selectivity_str.find(',', start)) != std::string::npos) {
selectivities.push_back(std::stod(selectivity_str.substr(start, end - start)));
start = end + 1;
}
selectivities.push_back(std::stod(selectivity_str.substr(start)));

config = {
.data_file = result["data_file"].as<std::string>(),
.preload_factor = result["preload_factor"].as<double>(),
Expand All @@ -51,6 +67,8 @@ BlissConfig parse_args(int argc, char *argv[]) {
.index = result["index"].as<std::string>(),
.file_type = result["file_type"].as<std::string>(),
.use_preload = result["use_preload"].as<bool>(),
.range_query_perc = result["range_query_perc"].as<double>(),
.selectivity_factor = selectivities,
};
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
Expand Down
11 changes: 11 additions & 0 deletions src/bliss/util/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <spdlog/common.h>

#include <string>
#include <vector>

namespace bliss {
namespace utils {
Expand All @@ -19,6 +20,8 @@ struct BlissConfig {
std::string index;
std::string file_type;
bool use_preload;
double range_query_perc = 0.0;
std::vector<double> selectivity_factor = {0.01};
};

void display_config(BlissConfig config) {
Expand All @@ -30,6 +33,14 @@ void display_config(BlissConfig config) {
spdlog::trace("Verbosity {}", config.verbosity);
spdlog::trace("Index: {}", config.index);
spdlog::trace("File type: {}", config.file_type);
spdlog::trace("Use Preload: {}", config.use_preload);
spdlog::trace("Range Query Factor: {}", config.range_query_perc);
std::string selectivities;
for (size_t i = 0; i < config.selectivity_factor.size(); ++i) {
selectivities += std::to_string(config.selectivity_factor[i]);
if (i != config.selectivity_factor.size() - 1) selectivities += ", ";
}
spdlog::trace("Selectivity Factor(s): {}", selectivities);
}
} // namespace config
} // namespace utils
Expand Down
21 changes: 21 additions & 0 deletions src/bliss/util/execute.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <iostream>
#include <random>
#include <vector>
#include <algorithm>

#include "bliss/bliss_index.h"

Expand Down Expand Up @@ -43,6 +44,26 @@ void execute_non_empty_reads(bliss::BlissIndex<key_type, value_type> &tree,
}
}

void execute_range_queries(bliss::BlissIndex<key_type, value_type> &tree,
const std::vector<key_type> &data, int num_queries,
double selectivity = 0.01, int seed = 0) {
spdlog::trace("Executing Range Queries");
std::mt19937 gen(seed);
std::uniform_int_distribution<size_t> key_dist(0, data.size() - 1);

key_type selected_data_range = *std::max_element(data.begin(), data.end()) -
*std::min_element(data.begin(), data.end());
key_type avg_range_size = static_cast<key_type>(selected_data_range * selectivity);

for (int i = 0; i < num_queries; ++i) {
size_t start_idx = key_dist(gen);
key_type start_key = data.at(start_idx);
key_type end_key = start_key + avg_range_size;

tree.get(start_key, end_key);
}
}

} // namespace executor
} // namespace utils
} // namespace bliss
Expand Down
40 changes: 40 additions & 0 deletions src/bliss_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ void workload_executor(bliss::BlissIndex<key_type, value_type> &tree,
size_t num_writes = std::round(config.write_factor * data.size());
size_t num_mixed = num_inserts - (num_preload + num_writes);
size_t num_reads = std::round(config.read_factor * data.size());
size_t num_ranges = std::round(config.range_query_perc * data.size());

// Timing for preloading index
spdlog::debug("Preloading {} items", num_preload);
Expand Down Expand Up @@ -143,8 +144,47 @@ void workload_executor(bliss::BlissIndex<key_type, value_type> &tree,
executor::execute_non_empty_reads(tree, data, num_reads, seed);
});
spdlog::info("Read Time (ns): {}", read_time);

// Timing for range queries with configured amount
if (num_ranges > 0) {
spdlog::debug("Executing {} range queries", num_ranges);
std::vector<unsigned long long> range_times;
std::string selectivity_values;

// Process all selectivity factors first
for (const auto& selectivity : config.selectivity_factor) {
auto range_time = 0ULL;
try {
range_time = time_function([&]() {
executor::execute_range_queries(tree, data, num_ranges, selectivity);
});
} catch (const std::exception& e) {
if (std::string(e.what()) == "Not implemented") {
range_time = 0;
} else {
throw;
}
}

range_times.push_back(range_time);
if (!selectivity_values.empty()) {
selectivity_values += ", ";
}
selectivity_values += std::to_string(selectivity);
}

std::string time_values;
for (size_t i = 0; i < range_times.size(); ++i) {
if (i > 0) time_values += ", ";
time_values += std::to_string(range_times[i]);
}

spdlog::info("Range Query Times (ns) for selectivity [{}]: {}",
selectivity_values, time_values);
}
}


int main(int argc, char *argv[]) {
auto config = args::parse_args(argc, argv);
switch (config.verbosity) {
Expand Down
21 changes: 21 additions & 0 deletions tests/test_alex/alex_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
#include "bliss_index_tests.h"

class AlexTest : public BlissIndexTest {};

TEST_F(AlexTest, TestAlex_RangeQuery) {
index.reset(new bliss::BlissAlexIndex<key_type, key_type>());
std::vector<key_type> data;
GenerateData(data, num_keys);

auto insert_start = data.begin();
auto insert_end = data.end();
executor::execute_inserts(*index, insert_start, insert_end);

key_type start_key = data.front();
key_type end_key = start_key + (data.back() - data.front()) / 4;

try {
index->get(start_key, end_key);
SUCCEED();
} catch (const std::runtime_error& e) {
EXPECT_STREQ("Not implemented", e.what());
}
}

TEST_F(AlexTest, TestAlex_Sorted) {
index.reset(new bliss::BlissAlexIndex<key_type, key_type>());
std::vector<key_type> data;
Expand Down
20 changes: 20 additions & 0 deletions tests/test_art/art_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,26 @@ TEST_F(ArtTest, TestArt_Sanity) {
EXPECT_TRUE(index->get(key));
}

TEST_F(ArtTest, TestArt_RangeQuery) {
index.reset(new bliss::BlissARTIndex<key_type, key_type>());
std::vector<key_type> data;
GenerateData(data, num_keys);

auto insert_start = data.begin();
auto insert_end = data.end();
executor::execute_inserts(*index, insert_start, insert_end);

key_type start_key = data.front();
key_type end_key = start_key + (data.back() - data.front()) / 4;

try {
index->get(start_key, end_key);
SUCCEED();
} catch (const std::runtime_error& e) {
EXPECT_STREQ("Not implemented", e.what());
}
}

TEST_F(ArtTest, TestArt_Sorted) {
index.reset(new bliss::BlissARTIndex<key_type, key_type>());
std::vector<key_type> data;
Expand Down
20 changes: 20 additions & 0 deletions tests/test_btree/btree_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,26 @@ TEST_F(BTreeTest, TestBtree_Sanity) {
EXPECT_TRUE(index->get(key));
}

TEST_F(BTreeTest, TestBtree_RangeQuery) {
index.reset(new bliss::BlissBTreeIndex<key_type, key_type>());
std::vector<key_type> data;
GenerateData(data, num_keys);

auto insert_start = data.begin();
auto insert_end = data.end();
executor::execute_inserts(*index, insert_start, insert_end);

key_type start_key = data.front();
key_type end_key = start_key + (data.back() - data.front()) / 4;

try {
index->get(start_key, end_key);
SUCCEED();
} catch (const std::runtime_error& e) {
EXPECT_STREQ("Not implemented", e.what());
}
}

TEST_F(BTreeTest, TestBtree_Sorted) {
index.reset(new bliss::BlissBTreeIndex<key_type, key_type>());
std::vector<key_type> data;
Expand Down
Loading