From 3fdd0834d599529f5c764679d2102cbda478a566 Mon Sep 17 00:00:00 2001
From: Ray Andrew <rs@rs.ht>
Date: Mon, 6 Apr 2026 00:31:20 -0500
Subject: [PATCH] feat(rocksdb): migrate SQLite indexing to RocksDB

Replace SQLite-backed indexing and provenance storage with RocksDB-backed stores.

  Key changes:
  - add RocksDB async/database/db-manager/filesystem/key-codec layers
  - migrate index and provenance databases from SQLite to RocksDB
  - update index builder, trace reader, reorganize, view, stats, and comparator paths for
  RocksDB
  - harden transaction atomicity and rollback behavior with TransactionScope
  - add iterator status checking for prefix scans
  - harden gzip/tar indexer cache state and metadata handling
  - capture executor context in RocksDB awaitables
  - clean up failed RocksDB open paths and manager lifecycle behavior
  - vendor CPM 0.42.1 and update CI/build integration
  - refresh docs, Python bindings, and C++/Python test coverage for the new backend

  Validation:
  - full test suite passed
  - Ubuntu 22.04 Docker run passed
  - focused RocksDB/indexer regression tests passed.
---
 .github/workflows/ci.yml                      |   44 +-
 .github/workflows/format-check.yaml           |   10 +-
 .github/workflows/python-publish.yaml         |   14 +-
 .readthedocs.yaml                             |    8 -
 CMakeLists.txt                                |   12 +
 Makefile                                      |   11 +-
 cmake/modules/CPM.cmake                       |   26 +-
 cmake/modules/Dependencies.cmake              |  510 +++--
 cmake/modules/InstallHelpers.cmake            |   36 -
 cmake/modules/LibraryHelpers.cmake            |    4 +
 cmake/vendor/CPM_0.42.1.cmake                 | 1363 ++++++++++++++
 docs/Makefile                                 |    2 +
 docs/scripts/generate_api_index.py            |  167 +-
 docs/source/_static/custom.css                |   19 +
 docs/source/api/indexer.rst                   |    5 +-
 docs/source/api/trace_reader.rst              |    5 +-
 docs/source/conf.py                           |  837 ++++++++-
 docs/source/cpp_api/coro.rst                  |   24 +-
 docs/source/cpp_api/index.rst                 |    9 +-
 docs/source/cpp_api/pipeline/executors.rst    |    7 +-
 docs/source/cpp_api/rocksdb.rst               |   35 +
 docs/source/cpp_api/sqlite.rst                |  337 ----
 docs/source/installation.rst                  |    5 +-
 docs/source/quickstart.rst                    |    2 +-
 docs/source/utilities/indexer.rst             |    7 +-
 .../dftracer/utils/core/common/constants.h    |    4 +-
 .../dftracer/utils/core/common/scoped_fd.h    |   44 +
 include/dftracer/utils/core/env.h             |   35 +
 include/dftracer/utils/core/io/io_backend.h   |    8 +
 .../dftracer/utils/core/pipeline/executor.h   |   12 +-
 .../utils/core/pipeline/pipeline_config.h     |    8 +-
 include/dftracer/utils/core/rocksdb/async.h   |  130 ++
 .../dftracer/utils/core/rocksdb/database.h    |   82 +
 .../dftracer/utils/core/rocksdb/db_manager.h  |   40 +
 .../dftracer/utils/core/rocksdb/filesystem.h  |   19 +
 .../dftracer/utils/core/rocksdb/key_codec.h   |   39 +
 include/dftracer/utils/core/runtime.h         |    4 +
 include/dftracer/utils/core/sqlite/async.h    |  112 --
 include/dftracer/utils/core/sqlite/database.h |   36 -
 include/dftracer/utils/core/sqlite/error.h    |   31 -
 .../dftracer/utils/core/sqlite/statement.h    |   65 -
 include/dftracer/utils/core/sqlite/vfs.h      |   44 -
 include/dftracer/utils/server/trace_index.h   |    6 +-
 .../aggregators/chunk_aggregator_utility.h    |    6 +-
 .../composites/dft/chunk_extractor_utility.h  |    2 +-
 .../dft/comparator/comparison_config.h        |    2 +-
 .../composites/dft/indexing/bloom_filter.h    |    2 +-
 .../dft/indexing/bloom_filter_cache.h         |   18 +-
 .../dft/indexing/chunk_dimension_stats.h      |    2 +-
 .../dft/indexing/chunk_indexer_utility.h      |    6 +-
 .../dft/indexing/chunk_pruner_utility.h       |    2 +-
 .../dft/indexing/chunk_statistics.h           |    3 +-
 .../composites/dft/internal/chunk_spec.h      |    2 +-
 .../utilities/composites/dft/internal/utils.h |   21 +-
 .../dft/metadata_collector_utility.h          |   15 +-
 .../dft/reorganize/provenance_tracker.h       |   10 +-
 .../dft/reorganize/reorganization_planner.h   |    2 +-
 .../statistics/chunk_detail_scanner_utility.h |    2 +-
 .../composites/dft/statistics/statistics.h    |    3 +-
 .../statistics_aggregator_utility.h           |    2 +-
 .../dft/statistics/trace_statistics.h         |    2 +-
 .../dft/views/view_builder_utility.h          |    6 +-
 .../dft/views/view_reader_utility.h           |    4 +-
 .../composites/file_merger_utility.h          |    5 +-
 .../composites/indexed_file_reader_utility.h  |   43 +-
 .../composites/line_batch_processor_utility.h |   10 +-
 .../utils/utilities/composites/types.h        |   13 +-
 .../utilities/fileio/lines/line_bytes_range.h |    2 +-
 .../utils/utilities/fileio/lines/line_types.h |   13 +-
 .../async_plain_file_bytes_generator.h        |   14 +-
 .../sources/async_plain_file_line_generator.h |    9 +-
 .../async_streaming_gz_line_generator.h       |    9 +-
 .../fileio/lines/streaming_line_reader.h      |   47 +-
 .../utils/utilities/fileio/types/chunk_spec.h |    8 +-
 .../utilities/indexer/index_builder_utility.h |    2 +-
 .../utils/utilities/indexer/index_database.h  |   71 +-
 .../utilities/indexer/internal/indexer.h      |    4 +-
 .../indexer/internal/indexer_factory.h        |    6 +-
 .../utilities/indexer/internal/scan_prefix.h  |   38 +
 .../utilities/indexer/provenance_database.h   |   44 +-
 .../utils/utilities/reader/internal/reader.h  |    5 +-
 .../reader/internal/reader_factory.h          |    2 +-
 .../utils/utilities/reader/trace_reader.h     |    6 +-
 python/dftracer/utils/dftracer_utils_ext.pyi  |   35 +-
 setup.py                                      |    1 -
 src/CMakeLists.txt                            |   80 +-
 .../utils/binaries/dftracer_aggregator.cpp    |   22 +-
 .../utils/binaries/dftracer_comparator.cpp    |   19 +-
 .../utils/binaries/dftracer_event_count.cpp   |    9 +-
 .../binaries/dftracer_gen_fake_trace.cpp      |    6 +-
 .../utils/binaries/dftracer_index.cpp         |    7 +-
 src/dftracer/utils/binaries/dftracer_info.cpp |   23 +-
 .../utils/binaries/dftracer_organize.cpp      |   14 +-
 .../utils/binaries/dftracer_reader.cpp        |   55 +-
 .../utils/binaries/dftracer_reconstruct.cpp   |    6 +-
 .../utils/binaries/dftracer_server.cpp        |    4 +-
 .../utils/binaries/dftracer_split.cpp         |   22 +-
 .../utils/binaries/dftracer_stats.cpp         |   41 +-
 src/dftracer/utils/binaries/dftracer_tar.cpp  |   10 +-
 src/dftracer/utils/binaries/dftracer_view.cpp |   29 +-
 src/dftracer/utils/core/env.cpp               |   57 +
 .../core/io/epoll_thread_pool_backend.cpp     |   25 +-
 .../utils/core/io/epoll_thread_pool_backend.h |    3 +
 .../utils/core/io/io_backend_sync.cpp         |    4 +-
 .../utils/core/io/io_uring_backend.cpp        |   66 +-
 src/dftracer/utils/core/io/io_uring_backend.h |    7 +
 .../core/io/kqueue_thread_pool_backend.cpp    |   25 +-
 .../core/io/kqueue_thread_pool_backend.h      |    3 +
 .../utils/core/io/thread_pool_backend.cpp     |   26 +-
 .../utils/core/io/thread_pool_backend.h       |    5 +
 src/dftracer/utils/core/pipeline/executor.cpp |   23 +-
 src/dftracer/utils/core/pipeline/pipeline.cpp |    2 +-
 src/dftracer/utils/core/rocksdb/async.cpp     |   32 +
 src/dftracer/utils/core/rocksdb/database.cpp  |  275 +++
 .../utils/core/rocksdb/db_manager.cpp         |  143 ++
 .../utils/core/rocksdb/filesystem.cpp         |  849 +++++++++
 src/dftracer/utils/core/rocksdb/key_codec.cpp |   88 +
 src/dftracer/utils/core/runtime.cpp           |    4 +
 src/dftracer/utils/core/sqlite/async.cpp      |   32 -
 src/dftracer/utils/core/sqlite/database.cpp   |   85 -
 src/dftracer/utils/core/sqlite/error.cpp      |   25 -
 src/dftracer/utils/core/sqlite/statement.cpp  |  175 --
 src/dftracer/utils/core/sqlite/vfs.cpp        |  620 -------
 src/dftracer/utils/python/indexer.cpp         |   95 +-
 src/dftracer/utils/python/indexer.h           |    2 +-
 src/dftracer/utils/python/trace_reader.cpp    |   31 +-
 .../utils/python/trace_reader_iterator.cpp    |   24 +-
 .../utils/python/trace_reader_iterator.h      |    3 +
 .../utils/python/utilities/aggregator.cpp     |    4 +-
 .../utils/python/utilities/comparator.cpp     |   20 +-
 .../python/utilities/metadata_collector.cpp   |   11 +-
 .../utilities/reconstruction_planner.cpp      |    4 +-
 .../utilities/reorganization_planner.cpp      |    6 +-
 .../utilities/statistics_aggregator.cpp       |    6 +-
 .../python/utilities/statistics_query.cpp     |    6 +-
 src/dftracer/utils/server/trace_api.cpp       |   11 +-
 src/dftracer/utils/server/trace_index.cpp     |   63 +-
 src/dftracer/utils/server/viz_api.cpp         |   16 +-
 .../call_tree/call_tree_internal.cpp          |   11 +-
 .../utilities/call_tree/call_tree_mpi.cpp     |   17 +-
 .../dft/aggregators/aggregator_utility.cpp    |   13 +-
 .../aggregators/chunk_aggregator_utility.cpp  |    4 +-
 .../dft/aggregators/chunk_mapper_utility.cpp  |    4 +-
 .../dft/chunk_extractor_utility.cpp           |    6 +-
 .../dft/chunk_manifest_mapper_utility.cpp     |    2 +-
 .../dft/event_collector_utility.cpp           |    4 +-
 .../dft/indexing/chunk_indexer_utility.cpp    |    2 +-
 .../dft/indexing/chunk_pruner_utility.cpp     |   30 +-
 .../dft/indexing/chunk_statistics.cpp         |   12 +-
 .../queries/delete_chunk_bloom_filters.cpp    |   29 -
 .../queries/delete_chunk_dimension_stats.cpp  |   23 -
 .../queries/delete_chunk_statistics.cpp       |   23 -
 .../indexing/queries/delete_event_ranges.cpp  |   24 -
 .../queries/delete_file_bloom_filter.cpp      |   29 -
 .../queries/delete_hash_resolutions.cpp       |   23 -
 .../queries/delete_metadata_lines.cpp         |   24 -
 .../queries/insert_chunk_bloom_filter.cpp     |   75 -
 .../queries/insert_chunk_dimension_stats.cpp  |   84 -
 .../queries/insert_chunk_statistics.cpp       |   86 -
 .../indexing/queries/insert_event_range.cpp   |   65 -
 .../queries/insert_file_bloom_filter.cpp      |   42 -
 .../queries/insert_hash_resolution.cpp        |   58 -
 .../queries/insert_index_dimension.cpp        |   29 -
 .../queries/insert_metadata_lines.cpp         |   47 -
 .../indexing/queries/insert_provenance.cpp    |   95 -
 .../dft/indexing/queries/manifest_queries.h   |  103 +-
 .../composites/dft/indexing/queries/queries.h |  130 --
 .../queries/query_chunk_bloom_filters.cpp     |   45 -
 .../query_chunk_bloom_filters_batch.cpp       |   58 -
 .../queries/query_chunk_dimension_stats.cpp   |  119 --
 .../queries/query_chunk_statistics.cpp        |  142 --
 .../indexing/queries/query_event_ranges.cpp   |   72 -
 .../queries/query_file_bloom_filter.cpp       |   40 -
 .../query_file_bloom_filters_batch.cpp        |   55 -
 .../queries/query_hash_by_resolved.cpp        |   32 -
 .../queries/query_index_dimensions.cpp        |   41 -
 .../indexing/queries/query_metadata_lines.cpp |   66 -
 .../dft/indexing/queries/query_provenance.cpp |  117 --
 .../queries/query_resolved_by_hash.cpp        |   32 -
 .../indexing/queries/query_time_bounds.cpp    |   36 -
 .../composites/dft/internal/utils.cpp         |   20 +-
 .../dft/metadata_collector_utility.cpp        |   36 +-
 .../dft/reorganize/event_router.cpp           |   12 +-
 .../dft/reorganize/provenance_tracker.cpp     |   70 +-
 .../dft/reorganize/reconstruction_planner.cpp |   13 +-
 .../dft/reorganize/reorganization_planner.cpp |  126 +-
 .../chunk_detail_scanner_utility.cpp          |    2 +-
 .../statistics_aggregator_utility.cpp         |   27 +-
 .../dft/statistics/trace_statistics.cpp       |    2 +-
 .../dft/views/view_builder_utility.cpp        |   20 +-
 .../dft/views/view_reader_utility.cpp         |    6 +-
 .../composites/file_merger_utility.cpp        |    8 +-
 .../indexer/index_builder_utility.cpp         |   69 +-
 .../utilities/indexer/index_database.cpp      | 1645 +++++++++++++----
 .../indexer/internal/checkpoint_size.h        |    2 +-
 .../indexer/internal/gzip/gzip_indexer.cpp    |  434 +++--
 .../indexer/internal/gzip/gzip_indexer.h      |   14 +-
 .../gzip/queries/delete_file_record.cpp       |   37 -
 .../gzip/queries/insert_checkpoint_record.cpp |   36 -
 .../queries/insert_file_metadata_record.cpp   |   33 -
 .../gzip/queries/insert_file_record.cpp       |   35 -
 .../indexer/internal/gzip/queries/queries.h   |   66 -
 .../gzip/queries/query_checkpoint.cpp         |   76 -
 .../gzip/queries/query_checkpoint_size.cpp    |   19 -
 .../gzip/queries/query_checkpoints.cpp        |  105 --
 .../internal/gzip/queries/query_file_id.cpp   |   19 -
 .../internal/gzip/queries/query_max_bytes.cpp |   37 -
 .../internal/gzip/queries/query_num_lines.cpp |   21 -
 .../gzip/queries/query_schema_validity.cpp    |   19 -
 .../gzip/queries/query_stored_file_info.cpp   |   29 -
 .../utilities/indexer/internal/helpers.cpp    |   21 +-
 .../utilities/indexer/internal/helpers.h      |    3 +-
 .../utilities/indexer/internal/indexer_c.cpp  |    6 +-
 .../indexer/internal/indexer_factory.cpp      |   20 +-
 .../indexer/internal/sqlite/database.h        |   13 -
 .../indexer/internal/sqlite/statement.h       |   13 -
 .../insert_archive_metadata_record.cpp        |   32 -
 .../tar/queries/insert_archive_record.cpp     |   35 -
 .../tar/queries/insert_file_record.cpp        |   35 -
 .../queries/insert_tar_checkpoint_record.cpp  |   37 -
 .../tar/queries/insert_tar_file_record.cpp    |   29 -
 .../indexer/internal/tar/queries/queries.h    |  108 --
 .../internal/tar/queries/query_archive_id.cpp |   28 -
 .../internal/tar/queries/query_metadata.cpp   |  143 --
 .../tar/queries/query_tar_checkpoints.cpp     |  165 --
 .../internal/tar/queries/query_tar_files.cpp  |  119 --
 .../indexer/internal/tar/tar_indexer.cpp      |  675 ++++---
 .../indexer/internal/tar/tar_indexer.h        |   26 +-
 .../indexer/internal/transaction_scope.h      |   39 +
 .../utilities/indexer/provenance_database.cpp |  496 +++--
 .../indexer/visitors/bloom_visitor.cpp        |   34 +-
 .../indexer/visitors/manifest_visitor.cpp     |   32 +-
 .../utilities/reader/internal/gzip_reader.cpp |   16 +-
 .../utilities/reader/internal/gzip_reader.h   |    6 +-
 .../utilities/reader/internal/reader_c.cpp    |   10 +-
 .../reader/internal/reader_factory.cpp        |    6 +-
 .../utilities/reader/internal/tar_reader.cpp  |   36 +-
 .../utilities/reader/internal/tar_reader.h    |    6 +-
 .../utils/utilities/reader/trace_reader.cpp   |   20 +-
 .../utils/utilities/replay/replay.cpp         |    4 +-
 tests/CMakeLists.txt                          |    3 +
 tests/binaries/test_dftracer_index.cpp        |   44 +-
 tests/binaries/test_dftracer_info.cpp         |   10 +
 tests/binaries/test_dftracer_organize.cpp     |   26 +-
 tests/binaries/test_dftracer_server.cpp       |   35 +-
 tests/binaries/test_dftracer_tar.cpp          |   26 +-
 tests/python/common.py                        |   36 +-
 tests/python/test_dask.py                     |   89 +-
 tests/python/test_indexer.py                  |  139 +-
 tests/python/test_reorganization_planner.py   |   16 +-
 tests/python/test_statistics_aggregator.py    |   28 +-
 tests/python/test_statistics_query.py         |   32 +-
 tests/python/test_trace_reader.py             |   22 +-
 tests/python/test_trace_reader_arrow.py       |   52 +-
 tests/reader/test_basic_factory.cpp           |   12 +-
 tests/reader/test_reader.c                    |    4 +-
 tests/reader/test_reader.cpp                  |    4 +-
 tests/reader/test_reader_formats.cpp          |    3 +-
 tests/reader/test_reader_stream.cpp           |    2 +-
 .../reader/test_reader_tar_comprehensive.cpp  |    8 +-
 tests/testing_utilities.cpp                   |   11 +-
 tests/testing_utilities.h                     |    2 +-
 tests/utilities/CMakeLists.txt                |    2 +
 .../dft/indexing/test_bloom_query.cpp         |  101 +-
 .../dft/indexing/test_chunk_indexer.cpp       |    6 +-
 .../dft/indexing/test_chunk_pruner.cpp        |   94 +-
 .../indexing/test_manifest_index_builder.cpp  |    8 +-
 .../dft/indexing/test_manifest_indexer.cpp    |    6 +-
 .../dft/indexing/test_manifest_queries.cpp    |   66 +-
 .../test_reconstruct_integration.cpp          |  206 ++-
 .../test_reconstruction_planner.cpp           |   50 +-
 .../test_reorganization_planner.cpp           |   26 +-
 .../test_reorganize_integration.cpp           |   22 +-
 .../statistics/test_statistics_aggregator.cpp |   53 +-
 .../dft/statistics/test_statistics_query.cpp  |    2 +-
 .../dft/statistics/test_trace_statistics.cpp  |    4 +-
 .../composites/dft/test_index_builder.cpp     |   10 +-
 .../dft/test_metadata_collector.cpp           |   19 +-
 .../dft/views/test_view_builder.cpp           |   88 +-
 .../composites/dft/views/test_view_reader.cpp |   11 +-
 .../composites/test_indexed_file_reader.cpp   |   91 +-
 .../composites/test_line_batch_processor.cpp  |   12 +-
 .../lines/test_streaming_line_reader.cpp      |   94 +-
 .../utilities/indexer/test_index_builder.cpp  |   22 +-
 .../utilities/indexer/test_index_database.cpp |  648 +------
 .../indexer/test_provenance_database.cpp      |  244 +--
 .../indexer/test_rocksdb_storage.cpp          |  224 +++
 tests/utilities/indexer/test_scan_prefix.cpp  |  123 ++
 tests/utilities/reader/test_trace_reader.cpp  |   34 +-
 289 files changed, 9809 insertions(+), 8201 deletions(-)
 create mode 100644 cmake/vendor/CPM_0.42.1.cmake
 create mode 100644 docs/source/_static/custom.css
 create mode 100644 docs/source/cpp_api/rocksdb.rst
 delete mode 100644 docs/source/cpp_api/sqlite.rst
 create mode 100644 include/dftracer/utils/core/common/scoped_fd.h
 create mode 100644 include/dftracer/utils/core/env.h
 create mode 100644 include/dftracer/utils/core/rocksdb/async.h
 create mode 100644 include/dftracer/utils/core/rocksdb/database.h
 create mode 100644 include/dftracer/utils/core/rocksdb/db_manager.h
 create mode 100644 include/dftracer/utils/core/rocksdb/filesystem.h
 create mode 100644 include/dftracer/utils/core/rocksdb/key_codec.h
 delete mode 100644 include/dftracer/utils/core/sqlite/async.h
 delete mode 100644 include/dftracer/utils/core/sqlite/database.h
 delete mode 100644 include/dftracer/utils/core/sqlite/error.h
 delete mode 100644 include/dftracer/utils/core/sqlite/statement.h
 delete mode 100644 include/dftracer/utils/core/sqlite/vfs.h
 create mode 100644 include/dftracer/utils/utilities/indexer/internal/scan_prefix.h
 create mode 100644 src/dftracer/utils/core/env.cpp
 create mode 100644 src/dftracer/utils/core/rocksdb/async.cpp
 create mode 100644 src/dftracer/utils/core/rocksdb/database.cpp
 create mode 100644 src/dftracer/utils/core/rocksdb/db_manager.cpp
 create mode 100644 src/dftracer/utils/core/rocksdb/filesystem.cpp
 create mode 100644 src/dftracer/utils/core/rocksdb/key_codec.cpp
 delete mode 100644 src/dftracer/utils/core/sqlite/async.cpp
 delete mode 100644 src/dftracer/utils/core/sqlite/database.cpp
 delete mode 100644 src/dftracer/utils/core/sqlite/error.cpp
 delete mode 100644 src/dftracer/utils/core/sqlite/statement.cpp
 delete mode 100644 src/dftracer/utils/core/sqlite/vfs.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_bloom_filters.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_dimension_stats.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_statistics.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_event_ranges.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_file_bloom_filter.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_hash_resolutions.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_metadata_lines.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_bloom_filter.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_dimension_stats.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_statistics.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_event_range.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_file_bloom_filter.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_hash_resolution.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_index_dimension.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_metadata_lines.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_provenance.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters_batch.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_dimension_stats.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_statistics.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_event_ranges.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filter.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filters_batch.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_hash_by_resolved.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_index_dimensions.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_metadata_lines.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_provenance.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_resolved_by_hash.cpp
 delete mode 100644 src/dftracer/utils/utilities/composites/dft/indexing/queries/query_time_bounds.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/delete_file_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_checkpoint_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_metadata_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint_size.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoints.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_file_id.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_max_bytes.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_num_lines.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_schema_validity.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_stored_file_info.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/sqlite/database.h
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/sqlite/statement.h
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_metadata_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_file_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_checkpoint_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_file_record.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/queries.h
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/query_archive_id.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/query_metadata.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_checkpoints.cpp
 delete mode 100644 src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_files.cpp
 create mode 100644 src/dftracer/utils/utilities/indexer/internal/transaction_scope.h
 create mode 100644 tests/utilities/indexer/test_rocksdb_storage.cpp
 create mode 100644 tests/utilities/indexer/test_scan_prefix.cpp

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 41c000e6..4ef4b04e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,8 +13,8 @@ jobs:
     outputs:
       code: ${{ steps.filter.outputs.code }}
     steps:
-    - uses: actions/checkout@v4
-    - uses: dorny/paths-filter@v3
+    - uses: actions/checkout@v6
+    - uses: dorny/paths-filter@v3.0.2
       id: filter
       with:
         filters: |
@@ -44,24 +44,33 @@ jobs:
         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
     
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
     
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v6.1.0
       with:
         python-version: ${{ matrix.python-version }}
+
+    - name: Cache ccache
+      uses: actions/cache@v5
+      with:
+        path: ~/.ccache
+        key: ccache-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('CMakeLists.txt', 'pyproject.toml', '.github/workflows/ci.yml') }}
+        restore-keys: |
+          ccache-${{ runner.os }}-${{ matrix.python-version }}-
+          ccache-${{ runner.os }}-
     
     - name: Install dependencies (Ubuntu)
       if: runner.os == 'Linux'
       run: |
         sudo apt-get update
-        sudo apt-get install -y build-essential cmake lcov zlib1g-dev libsqlite3-dev pkg-config ninja-build
+        sudo apt-get install -y build-essential cmake ccache lcov zlib1g-dev libsqlite3-dev pkg-config ninja-build
     
     - name: Install dependencies (macOS)
       if: runner.os == 'macOS'
       run: |
         brew update
-        for f in cmake lcov zlib sqlite pkg-config ninja; do
+        for f in cmake ccache lcov zlib sqlite pkg-config ninja; do
           if brew list --versions "$f" >/dev/null; then
             echo "$f already installed"
           else
@@ -80,24 +89,17 @@ jobs:
         make test
     
     - name: Run Python tests (with venv)
+      if: "!((matrix.os == 'ubuntu-22.04' || matrix.os == 'macos-latest') && matrix.python-version == '3.12')"
       run: |
-        make test-py
-    
-    - name: Run Python tests (without venv)
-      run: |
-        pip install --upgrade pip setuptools wheel
-        pip install -e ".[dev]"
-        pytest tests/python -v
-
-    - name: Type check (ty)
-      if: matrix.python-version == '3.12' && runner.os == 'Linux'
-      run: |
-        pip install ty
-        ty check --python "$(which python)" python/
+        if [ "${{ runner.os }}" = "Linux" ] && [ "${{ matrix.python-version }}" = "3.12" ]; then
+          make test-py RUN_TY=1
+        else
+          make test-py
+        fi
 
     - name: Upload coverage reports to Coveralls
       if: (matrix.os == 'ubuntu-22.04' || matrix.os == 'macos-latest') && matrix.python-version == '3.12'
-      uses: coverallsapp/github-action@v2
+      uses: coverallsapp/github-action@v2.3.6
       continue-on-error: true
       with:
         file: coverage/coverage_filtered.info
@@ -113,7 +115,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Coveralls finished
-      uses: coverallsapp/github-action@v2
+      uses: coverallsapp/github-action@v2.3.6
       continue-on-error: true
       with:
         parallel-finished: true
diff --git a/.github/workflows/format-check.yaml b/.github/workflows/format-check.yaml
index 5e2c4a64..1e93e8d8 100644
--- a/.github/workflows/format-check.yaml
+++ b/.github/workflows/format-check.yaml
@@ -13,8 +13,8 @@ jobs:
       cpp: ${{ steps.filter.outputs.cpp }}
       python: ${{ steps.filter.outputs.python }}
     steps:
-    - uses: actions/checkout@v4
-    - uses: dorny/paths-filter@v3
+    - uses: actions/checkout@v6
+    - uses: dorny/paths-filter@v3.0.2
       id: filter
       with:
         filters: |
@@ -35,7 +35,7 @@ jobs:
     runs-on: ubuntu-24.04
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
 
     - name: Install clang-format
       run: |
@@ -54,10 +54,10 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
 
     - name: Install uv
-      uses: astral-sh/setup-uv@v4
+      uses: astral-sh/setup-uv@v8.0.0
 
     - name: Ruff check
       run: uvx ruff check python/ tests/python/
diff --git a/.github/workflows/python-publish.yaml b/.github/workflows/python-publish.yaml
index 93bcd90b..59e2388e 100644
--- a/.github/workflows/python-publish.yaml
+++ b/.github/workflows/python-publish.yaml
@@ -24,15 +24,15 @@ jobs:
         os: [ubuntu-22.04, macos-14]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.2.1
+        uses: pypa/cibuildwheel@v3.3.0
         env:
           CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* cp313-* cp314-*
           CIBW_SKIP: "*-win32 *-manylinux_i686 *-musllinux_* *-manylinux_aarch64 *-manylinux_ppc64le *-manylinux_s390x"
 
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v6.0.0
         with:
           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
           path: ./wheelhouse/*.whl
@@ -41,12 +41,12 @@ jobs:
     name: Build source distribution
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6.1.0
         with:
           python-version: '3.12'
 
@@ -58,7 +58,7 @@ jobs:
       - name: Build sdist
         run: python -m build --sdist
 
-      - uses: actions/upload-artifact@v4
+      - uses: actions/upload-artifact@v6.0.0
         with:
           name: cibw-sdist
           path: dist/*.tar.gz
@@ -70,7 +70,7 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
 
     steps:
-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v8
         with:
           pattern: cibw-*
           path: dist
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index ac5df96f..d1b42af6 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -10,14 +10,8 @@ build:
     python: "3.11"
   apt_packages:
     - doxygen
-    - cmake
-    - build-essential
-    - zlib1g-dev
-    - libsqlite3-dev
-    - pkg-config
   jobs:
     pre_build:
-      # Run Doxygen to generate C++ API documentation
       - cd docs && doxygen Doxyfile
 
 # Build documentation in the "docs/" directory with Sphinx
@@ -35,5 +29,3 @@ formats:
 python:
   install:
     - requirements: docs/requirements.txt
-    - method: pip
-      path: .
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8fb7e3d..238c0f41 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,18 @@ project(
   VERSION ${DFTRACER_UTILS_VERSION}
   LANGUAGES C CXX)
 
+find_program(CCACHE_EXECUTABLE ccache)
+if(CCACHE_EXECUTABLE)
+  foreach(lang C CXX ASM)
+    if(NOT CMAKE_${lang}_COMPILER_LAUNCHER)
+      set(CMAKE_${lang}_COMPILER_LAUNCHER
+          "${CCACHE_EXECUTABLE}"
+          CACHE STRING "Compiler launcher for ${lang}" FORCE)
+    endif()
+  endforeach()
+  message(STATUS "Using ccache: ${CCACHE_EXECUTABLE}")
+endif()
+
 set(DFTRACER_UTILS_PACKAGE ${PROJECT_NAME})
 set(DFTRACER_UTILS_PACKAGE_NAME ${PROJECT_NAME})
 set(DFTRACER_UTILS_PACKAGE_VERSION "${PROJECT_VERSION}")
diff --git a/Makefile b/Makefile
index 4e3504f5..e9c85b81 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,7 @@
 .PHONY: coverage coverage-clean coverage-view coverage-open test test-coverage test-py build clean format check-format cmake-format lint typecheck help
 
+RUN_TY ?= 0
+
 # Detect build system
 BUILD_GENERATOR := $(shell command -v ninja >/dev/null 2>&1 && echo "Ninja" || echo "Unix Makefiles")
 BUILD_TOOL := $(shell command -v ninja >/dev/null 2>&1 && echo "ninja" || echo "make")
@@ -65,8 +67,15 @@ test-py:
 	@rm -rf .venv_test_py
 	@python3 -m venv .venv_test_py
 	@.venv_test_py/bin/pip install --upgrade pip setuptools wheel
-	@.venv_test_py/bin/pip install -e .[dev]
+	@if [ "$(RUN_TY)" = "1" ]; then \
+		.venv_test_py/bin/pip install -e .[dev] ty; \
+	else \
+		.venv_test_py/bin/pip install -e .[dev]; \
+	fi
 	@.venv_test_py/bin/pytest tests/python -v
+	@if [ "$(RUN_TY)" = "1" ]; then \
+		.venv_test_py/bin/ty check --python "$$(pwd)/.venv_test_py/bin/python" python/; \
+	fi
 	@rm -rf .venv_test_py
 	@echo "Python tests completed successfully!"
 
diff --git a/cmake/modules/CPM.cmake b/cmake/modules/CPM.cmake
index e88873c8..97442324 100644
--- a/cmake/modules/CPM.cmake
+++ b/cmake/modules/CPM.cmake
@@ -2,9 +2,11 @@
 #
 # SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors
 
-set(CPM_DOWNLOAD_VERSION 0.42.0)
+set(CPM_DOWNLOAD_VERSION 0.42.1)
 set(CPM_HASH_SUM
-    "2020b4fc42dba44817983e06342e682ecfc3d2f484a581f11cc5731fbe4dce8a")
+    "f3a6dcc6a04ce9e7f51a127307fa4f699fb2bade357a8eb4c5b45df76e1dc6a5")
+set(CPM_VENDORED_LOCATION
+    "${CMAKE_CURRENT_LIST_DIR}/../vendor/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
 
 if(CPM_SOURCE_CACHE)
   set(CPM_DOWNLOAD_LOCATION
@@ -21,10 +23,28 @@ endif()
 # (~)
 get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
 
+if(EXISTS "${CPM_VENDORED_LOCATION}")
+  get_filename_component(CPM_DOWNLOAD_DIR "${CPM_DOWNLOAD_LOCATION}" DIRECTORY)
+  file(MAKE_DIRECTORY "${CPM_DOWNLOAD_DIR}")
+  file(COPY_FILE "${CPM_VENDORED_LOCATION}" "${CPM_DOWNLOAD_LOCATION}" ONLY_IF_DIFFERENT)
+  include("${CPM_DOWNLOAD_LOCATION}")
+  return()
+endif()
+
 file(
   DOWNLOAD
   https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
   ${CPM_DOWNLOAD_LOCATION}
-  EXPECTED_HASH SHA256=${CPM_HASH_SUM})
+  EXPECTED_HASH SHA256=${CPM_HASH_SUM}
+  STATUS CPM_DOWNLOAD_STATUS)
+
+list(GET CPM_DOWNLOAD_STATUS 0 CPM_DOWNLOAD_STATUS_CODE)
+if(NOT CPM_DOWNLOAD_STATUS_CODE EQUAL 0)
+  list(GET CPM_DOWNLOAD_STATUS 1 CPM_DOWNLOAD_STATUS_MESSAGE)
+  message(
+    FATAL_ERROR
+      "Failed to download CPM.cmake v${CPM_DOWNLOAD_VERSION}: ${CPM_DOWNLOAD_STATUS_MESSAGE}. "
+      "Either restore network access or vendor the file at ${CPM_VENDORED_LOCATION}.")
+endif()
 
 include(${CPM_DOWNLOAD_LOCATION})
diff --git a/cmake/modules/Dependencies.cmake b/cmake/modules/Dependencies.cmake
index b99680eb..2c5ccbfc 100644
--- a/cmake/modules/Dependencies.cmake
+++ b/cmake/modules/Dependencies.cmake
@@ -430,216 +430,225 @@ function(need_yyjson)
   endif()
 endfunction()
 
-# ==============================================================================
-# Database Dependencies
-# ==============================================================================
-
-function(need_sqlite3)
-  find_package(SQLite3 3.35 QUIET)
-
-  if(SQLite3_FOUND)
-    message(STATUS "Found system SQLite3: ${SQLite3_LIBRARIES}")
+# Function to find or build RocksDB
+function(need_rocksdb)
+  find_package(RocksDB 10.10.1 QUIET CONFIG)
+  if(NOT RocksDB_FOUND)
+    find_package(rocksdb 10.10.1 QUIET CONFIG)
+  endif()
+  if(NOT RocksDB_FOUND AND rocksdb_FOUND)
+    set(RocksDB_FOUND TRUE)
+  endif()
+  if(NOT RocksDB_FOUND)
+    find_package(RocksDB 10.10.1 QUIET)
+  endif()
 
-    # Prefer the modern target name (SQLite3::SQLite3).
-    # Older CMake versions only provide SQLite::SQLite3 (now deprecated).
-    if(NOT TARGET SQLite3::SQLite3)
-      if(TARGET SQLite::SQLite3)
-        # Wrap the deprecated target
-        add_library(SQLite3::SQLite3 ALIAS SQLite::SQLite3)
-      else()
-        add_library(SQLite3::SQLite3 UNKNOWN IMPORTED)
+  if(RocksDB_FOUND)
+    message(STATUS "Found system RocksDB")
+
+    if(NOT TARGET RocksDB::rocksdb)
+      if(TARGET rocksdb)
+        add_library(RocksDB::rocksdb ALIAS rocksdb)
+      elseif(TARGET rocksdb-shared)
+        add_library(RocksDB::rocksdb ALIAS rocksdb-shared)
+      elseif(TARGET RocksDB::RocksDB)
+        add_library(RocksDB::rocksdb ALIAS RocksDB::RocksDB)
+      elseif(DEFINED RocksDB_LIBRARY AND DEFINED RocksDB_INCLUDE_DIR)
+        add_library(RocksDB::rocksdb UNKNOWN IMPORTED)
+        set_target_properties(
+          RocksDB::rocksdb
+          PROPERTIES IMPORTED_LOCATION "${RocksDB_LIBRARY}"
+                     INTERFACE_INCLUDE_DIRECTORIES "${RocksDB_INCLUDE_DIR}")
+      elseif(DEFINED ROCKSDB_LIBRARIES AND DEFINED ROCKSDB_INCLUDE_DIRS)
+        add_library(RocksDB::rocksdb UNKNOWN IMPORTED)
         set_target_properties(
-          SQLite3::SQLite3
-          PROPERTIES IMPORTED_LOCATION "${SQLite3_LIBRARIES}"
-                     INTERFACE_INCLUDE_DIRECTORIES "${SQLite3_INCLUDE_DIRS}")
+          RocksDB::rocksdb
+          PROPERTIES IMPORTED_LOCATION ""
+                     INTERFACE_LINK_LIBRARIES "${ROCKSDB_LIBRARIES}"
+                     INTERFACE_INCLUDE_DIRECTORIES "${ROCKSDB_INCLUDE_DIRS}")
       endif()
     endif()
 
-    # Set variables in parent scope so they persist outside the function
-    set(SQLite3_FOUND
-        ${SQLite3_FOUND}
-        PARENT_SCOPE)
-    set(SQLite3_LIBRARIES
-        ${SQLite3_LIBRARIES}
-        PARENT_SCOPE)
-    set(SQLite3_INCLUDE_DIRS
-        ${SQLite3_INCLUDE_DIRS}
+    if(NOT TARGET RocksDB::rocksdb)
+      message(
+        FATAL_ERROR
+          "need_rocksdb: RocksDB was found but no usable target could be created."
+      )
+    endif()
+
+    set(RocksDB_FOUND
+        ${RocksDB_FOUND}
         PARENT_SCOPE)
-    set(SQLite3_CPM
+    set(RocksDB_CPM
         FALSE
         PARENT_SCOPE)
   else()
-    # Build with CPM
-    if(NOT SQLite3_ADDED)
+    if(NOT rocksdb_ADDED)
       cpmaddpackage(
         NAME
-        SQLite3
-        URL
-        https://www.sqlite.org/2024/sqlite-amalgamation-3460100.zip
+        rocksdb
+        GITHUB_REPOSITORY
+        facebook/rocksdb
         VERSION
-        3.46.1
-        DOWNLOAD_ONLY
+        10.10.1
+        GIT_TAG
+        v10.10.1
+        OPTIONS
+        "ROCKSDB_BUILD_SHARED ${DFTRACER_UTILS_BUILD_SHARED}"
+        "WITH_TESTS OFF"
+        "WITH_TOOLS OFF"
+        "WITH_CORE_TOOLS OFF"
+        "WITH_BENCHMARK_TOOLS OFF"
+        "WITH_GFLAGS OFF"
+        "WITH_SNAPPY OFF"
+        "WITH_LZ4 ON"
+        "WITH_ZLIB ON"
+        "WITH_ZSTD OFF"
+        "WITH_BZ2 OFF"
+        "USE_RTTI ON"
+        "FAIL_ON_WARNINGS OFF"
+        FORCE
         YES)
     endif()
 
-    if(SQLite3_ADDED)
-      message(STATUS "Built SQLite3 with CPM")
-
-      set(SQLITE3_TARGETS)
-
-      # Create sqlite3 library from amalgamation
-      if(DFTRACER_UTILS_BUILD_SHARED)
-        add_library(sqlite3_shared SHARED ${SQLite3_SOURCE_DIR}/sqlite3.c)
-        target_include_directories(
-          sqlite3_shared
-          PUBLIC $<BUILD_INTERFACE:${SQLite3_SOURCE_DIR}>
-                 $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
-
-        # Enable common SQLite features
-        target_compile_definitions(
-          sqlite3_shared PUBLIC SQLITE_ENABLE_FTS5 SQLITE_ENABLE_JSON1
-                                SQLITE_ENABLE_RTREE SQLITE_THREADSAFE=1)
-
-        if(NOT WIN32)
-          target_link_libraries(sqlite3_shared PRIVATE pthread dl m)
-        endif()
-
-        set_target_properties(
-          sqlite3_shared
-          PROPERTIES OUTPUT_NAME sqlite3
-                     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
-                     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-        add_library(SQLite::SQLite3 ALIAS sqlite3_shared)
-        list(APPEND SQLITE3_TARGETS sqlite3_shared)
-        message(STATUS "Added SQLite3 shared library")
+    if(TARGET rocksdb AND NOT TARGET RocksDB::rocksdb_static)
+      add_library(RocksDB::rocksdb_static ALIAS rocksdb)
+    endif()
+    if(TARGET rocksdb-shared AND NOT TARGET RocksDB::rocksdb_shared)
+      add_library(RocksDB::rocksdb_shared ALIAS rocksdb-shared)
+    endif()
+    if(NOT TARGET RocksDB::rocksdb)
+      if(TARGET RocksDB::rocksdb_shared)
+        add_library(RocksDB::rocksdb ALIAS rocksdb-shared)
+      elseif(TARGET RocksDB::rocksdb_static)
+        add_library(RocksDB::rocksdb ALIAS rocksdb)
       endif()
+    endif()
 
-      if(DFTRACER_UTILS_BUILD_STATIC)
-        add_library(sqlite3_static STATIC ${SQLite3_SOURCE_DIR}/sqlite3.c)
-        target_include_directories(
-          sqlite3_static
-          PUBLIC $<BUILD_INTERFACE:${SQLite3_SOURCE_DIR}>
-                 $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
-
-        # Enable common SQLite features
-        target_compile_definitions(
-          sqlite3_static PUBLIC SQLITE_ENABLE_FTS5 SQLITE_ENABLE_JSON1
-                                SQLITE_ENABLE_RTREE SQLITE_THREADSAFE=1)
+    if(rocksdb_ADDED OR TARGET rocksdb OR TARGET rocksdb-shared)
+      message(STATUS "Built RocksDB with CPM")
 
-        if(NOT WIN32)
-          target_link_libraries(sqlite3_static PRIVATE pthread dl m)
-        endif()
+      set(ROCKSDB_LIBRARY_DIR "${CMAKE_BINARY_DIR}/lib")
 
+      if(TARGET rocksdb)
         set_target_properties(
-          sqlite3_static
-          PROPERTIES OUTPUT_NAME sqlite3
-                     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
-                     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-        add_library(SQLite::SQLite3_static ALIAS sqlite3_static)
-        list(APPEND SQLITE3_TARGETS sqlite3_static)
-        message(STATUS "Added SQLite3 static library")
-
-        # If only static is built, make it the default alias
-        if(NOT DFTRACER_UTILS_BUILD_SHARED)
-          add_library(SQLite::SQLite3 ALIAS sqlite3_static)
+          rocksdb
+          PROPERTIES POSITION_INDEPENDENT_CODE ON
+                     ARCHIVE_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}"
+                     LIBRARY_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}"
+                     RUNTIME_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}")
+        target_compile_definitions(rocksdb PUBLIC ROCKSDB_USE_RTTI)
+        if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang")
+          target_compile_options(rocksdb PRIVATE -frtti)
+          target_compile_options(rocksdb PUBLIC -Wno-conversion)
         endif()
+        install(
+          TARGETS rocksdb
+          EXPORT rocksdbTargets
+          ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
       endif()
-
-      # Make sqlite3 installable
-      if(SQLITE3_TARGETS)
+      if(TARGET rocksdb-shared)
+        set_target_properties(
+          rocksdb-shared
+          PROPERTIES POSITION_INDEPENDENT_CODE ON
+                     ARCHIVE_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}"
+                     LIBRARY_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}"
+                     RUNTIME_OUTPUT_DIRECTORY "${ROCKSDB_LIBRARY_DIR}")
+        target_compile_definitions(rocksdb-shared PUBLIC ROCKSDB_USE_RTTI)
+        if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang")
+          target_compile_options(rocksdb-shared PRIVATE -frtti)
+          target_compile_options(rocksdb-shared PUBLIC -Wno-conversion)
+        endif()
         install(
-          TARGETS ${SQLITE3_TARGETS}
-          EXPORT sqlite3Targets
+          TARGETS rocksdb-shared
+          EXPORT rocksdbTargets
           ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
           LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
           RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
       endif()
 
-      # Install sqlite3 header
-      install(FILES ${SQLite3_SOURCE_DIR}/sqlite3.h
-              DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+      list(APPEND DEPENDENCY_LIBRARY_DIRS "${ROCKSDB_LIBRARY_DIR}")
+      list(REMOVE_DUPLICATES DEPENDENCY_LIBRARY_DIRS)
+      set(DEPENDENCY_LIBRARY_DIRS
+          "${DEPENDENCY_LIBRARY_DIRS}"
+          PARENT_SCOPE)
 
-      # Install the export set
-      install(
-        EXPORT sqlite3Targets
-        FILE sqlite3Targets.cmake
-        NAMESPACE SQLite::
-        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/sqlite3)
+      list(APPEND CMAKE_BUILD_RPATH "${ROCKSDB_LIBRARY_DIR}")
+      list(REMOVE_DUPLICATES CMAKE_BUILD_RPATH)
+      set(CMAKE_BUILD_RPATH
+          "${CMAKE_BUILD_RPATH}"
+          PARENT_SCOPE)
 
-      set(SQLite3_CPM
+      list(APPEND CMAKE_INSTALL_RPATH "${ROCKSDB_LIBRARY_DIR}")
+      list(REMOVE_DUPLICATES CMAKE_INSTALL_RPATH)
+      set(CMAKE_INSTALL_RPATH
+          "${CMAKE_INSTALL_RPATH}"
+          PARENT_SCOPE)
+
+      set(RocksDB_FOUND
+          TRUE
+          PARENT_SCOPE)
+      set(RocksDB_CPM
           TRUE
           PARENT_SCOPE)
     endif()
   endif()
 endfunction()
 
-# Function to link SQLite3 to a target Parameters: TARGET_NAME - name of the
-# target to link SQLite3 to
-function(link_sqlite3 TARGET_NAME LIBRARY_TYPE)
-  # Validate parameters
+function(link_rocksdb TARGET_NAME LIBRARY_TYPE)
   if(NOT TARGET_NAME)
-    message(FATAL_ERROR "link_sqlite3: TARGET_NAME is required")
+    message(FATAL_ERROR "link_rocksdb: TARGET_NAME is required")
   endif()
 
-  if(NOT TARGET ${TARGET_NAME})
-    message(FATAL_ERROR "link_sqlite3: Target '${TARGET_NAME}' does not exist")
-  endif()
-
-  # Check if any SQLite3 variant is available
-  set(SQLITE3_AVAILABLE FALSE)
-
-  # Check for CPM-built SQLite3
-  if(TARGET sqlite3_shared OR TARGET sqlite3_static)
-    set(SQLITE3_AVAILABLE TRUE)
+  if(NOT LIBRARY_TYPE MATCHES "^(STATIC|SHARED)$")
+    message(
+      FATAL_ERROR "link_rocksdb: LIBRARY_TYPE must be either STATIC or SHARED")
   endif()
 
-  # Check for system SQLite3
-  if(TARGET SQLite::SQLite3)
-    set(SQLITE3_AVAILABLE TRUE)
+  if(NOT TARGET ${TARGET_NAME})
+    message(FATAL_ERROR "link_rocksdb: Target '${TARGET_NAME}' does not exist")
   endif()
 
-  if(NOT SQLITE3_AVAILABLE)
+  if(NOT TARGET RocksDB::rocksdb AND NOT TARGET RocksDB::rocksdb_static
+     AND NOT TARGET RocksDB::rocksdb_shared AND NOT TARGET rocksdb
+     AND NOT TARGET rocksdb-shared)
     message(
       FATAL_ERROR
-        "link_sqlite3: No SQLite3 found! Call need_sqlite3() first or ensure system SQLite3 is available."
+        "link_rocksdb: No RocksDB found! Call need_rocksdb() first or ensure system RocksDB is available."
     )
   endif()
 
-  # Link appropriate SQLite3 variant Use PUBLIC linkage since sqlite3.h is
-  # included in public headers
   if(LIBRARY_TYPE STREQUAL "STATIC")
-    # For static libraries, prefer static SQLite3
-    if(TARGET sqlite3_static)
-      target_link_libraries(${TARGET_NAME} PUBLIC sqlite3_static)
-      message(
-        STATUS "Linked ${TARGET_NAME} to CPM-built sqlite3_static")
-    elseif(TARGET sqlite3_shared)
-      target_link_libraries(${TARGET_NAME} PUBLIC sqlite3_shared)
-      message(
-        STATUS "Linked ${TARGET_NAME} to CPM-built sqlite3_shared")
-    elseif(TARGET SQLite3::SQLite3)
-      target_link_libraries(${TARGET_NAME} PUBLIC SQLite3::SQLite3)
-      message(STATUS "Linked ${TARGET_NAME} to SQLite3::SQLite3")
-    elseif(TARGET SQLite::SQLite3)
-      target_link_libraries(${TARGET_NAME} PUBLIC SQLite::SQLite3)
-      message(STATUS "Linked ${TARGET_NAME} to SQLite::SQLite3")
+    if(TARGET RocksDB::rocksdb_static)
+      target_link_libraries(${TARGET_NAME} PUBLIC RocksDB::rocksdb_static)
+      message(STATUS "Linked ${TARGET_NAME} to RocksDB::rocksdb_static")
+    elseif(TARGET rocksdb)
+      target_link_libraries(${TARGET_NAME} PUBLIC rocksdb)
+      message(STATUS "Linked ${TARGET_NAME} to rocksdb")
+    elseif(TARGET RocksDB::rocksdb)
+      target_link_libraries(${TARGET_NAME} PUBLIC RocksDB::rocksdb)
+      message(STATUS "Linked ${TARGET_NAME} to RocksDB::rocksdb")
+    else()
+      message(FATAL_ERROR "Static RocksDB requested for ${TARGET_NAME}, but no static RocksDB target is available")
     endif()
   else()
-    # For shared libraries, prefer shared SQLite3
-    if(TARGET sqlite3_shared)
-      target_link_libraries(${TARGET_NAME} PUBLIC sqlite3_shared)
-      message(
-        STATUS "Linked ${TARGET_NAME} to CPM-built sqlite3_shared")
-    elseif(TARGET sqlite3_static)
-      target_link_libraries(${TARGET_NAME} PUBLIC sqlite3_static)
-      message(
-        STATUS "Linked ${TARGET_NAME} to CPM-built sqlite3_static")
-    elseif(TARGET SQLite3::SQLite3)
-      target_link_libraries(${TARGET_NAME} PUBLIC SQLite3::SQLite3)
-      message(STATUS "Linked ${TARGET_NAME} to SQLite3::SQLite3")
-    elseif(TARGET SQLite::SQLite3)
-      target_link_libraries(${TARGET_NAME} PUBLIC SQLite::SQLite3)
-      message(STATUS "Linked ${TARGET_NAME} to SQLite::SQLite3")
+    if(TARGET RocksDB::rocksdb_shared)
+      target_link_libraries(${TARGET_NAME} PUBLIC RocksDB::rocksdb_shared)
+      message(STATUS "Linked ${TARGET_NAME} to RocksDB::rocksdb_shared")
+    elseif(TARGET rocksdb-shared)
+      target_link_libraries(${TARGET_NAME} PUBLIC rocksdb-shared)
+      message(STATUS "Linked ${TARGET_NAME} to rocksdb-shared")
+    elseif(TARGET RocksDB::rocksdb)
+      target_link_libraries(${TARGET_NAME} PUBLIC RocksDB::rocksdb)
+      message(STATUS "Linked ${TARGET_NAME} to RocksDB::rocksdb")
+    elseif(TARGET RocksDB::rocksdb_static)
+      target_link_libraries(${TARGET_NAME} PUBLIC RocksDB::rocksdb_static)
+      message(STATUS "Linked ${TARGET_NAME} to RocksDB::rocksdb_static")
+    elseif(TARGET rocksdb)
+      target_link_libraries(${TARGET_NAME} PUBLIC rocksdb)
+      message(STATUS "Linked ${TARGET_NAME} to rocksdb")
     endif()
   endif()
 endfunction()
@@ -647,6 +656,189 @@ endfunction()
 # ==============================================================================
 # Compression Dependencies
 # ==============================================================================
+function(need_lz4)
+  if(DEFINED CACHE{lz4_LIBRARIES} AND NOT EXISTS "${lz4_LIBRARIES}")
+    unset(lz4_LIBRARIES CACHE)
+  endif()
+  if(DEFINED CACHE{lz4_INCLUDE_DIRS} AND NOT EXISTS "${lz4_INCLUDE_DIRS}")
+    unset(lz4_INCLUDE_DIRS CACHE)
+  endif()
+
+  find_path(lz4_INCLUDE_DIRS NAMES lz4.h)
+  find_library(lz4_LIBRARIES NAMES lz4)
+
+  if(lz4_INCLUDE_DIRS AND lz4_LIBRARIES AND EXISTS "${lz4_LIBRARIES}")
+    message(STATUS "Found system lz4: ${lz4_LIBRARIES}")
+
+    if(NOT TARGET lz4::lz4)
+      add_library(lz4::lz4 UNKNOWN IMPORTED)
+      set_target_properties(
+        lz4::lz4
+        PROPERTIES IMPORTED_LOCATION "${lz4_LIBRARIES}"
+                   INTERFACE_INCLUDE_DIRECTORIES "${lz4_INCLUDE_DIRS}")
+    endif()
+
+    set(lz4_FOUND
+        TRUE
+        PARENT_SCOPE)
+    set(lz4_INCLUDE_DIRS
+        ${lz4_INCLUDE_DIRS}
+        PARENT_SCOPE)
+    set(lz4_LIBRARIES
+        ${lz4_LIBRARIES}
+        PARENT_SCOPE)
+    set(lz4_CPM
+        FALSE
+        PARENT_SCOPE)
+    set(lz4_FOUND
+        TRUE
+        CACHE BOOL "lz4 availability" FORCE)
+    set(lz4_INCLUDE_DIRS
+        "${lz4_INCLUDE_DIRS}"
+        CACHE PATH "lz4 include directories" FORCE)
+    set(lz4_LIBRARIES
+        "${lz4_LIBRARIES}"
+        CACHE STRING "lz4 libraries" FORCE)
+  else()
+    if(NOT lz4_ADDED)
+      cpmaddpackage(
+        NAME
+        lz4
+        GITHUB_REPOSITORY
+        lz4/lz4
+        VERSION
+        1.10.0
+        GIT_TAG
+        v1.10.0
+        DOWNLOAD_ONLY
+        YES)
+    endif()
+
+    if(lz4_ADDED)
+      message(STATUS "Built lz4 with CPM")
+
+      set(LZ4_TARGETS)
+      set(LZ4_SOURCES
+          ${lz4_SOURCE_DIR}/lib/lz4.c
+          ${lz4_SOURCE_DIR}/lib/lz4frame.c
+          ${lz4_SOURCE_DIR}/lib/lz4hc.c
+          ${lz4_SOURCE_DIR}/lib/xxhash.c)
+      set(LZ4_SHARED_OUTPUT
+          "${CMAKE_BINARY_DIR}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}lz4${CMAKE_SHARED_LIBRARY_SUFFIX}"
+      )
+      set(LZ4_STATIC_OUTPUT
+          "${CMAKE_BINARY_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}lz4${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      )
+      set(LZ4_PREFERRED_OUTPUT "${LZ4_STATIC_OUTPUT}")
+      if(DFTRACER_UTILS_BUILD_SHARED)
+        set(LZ4_PREFERRED_OUTPUT "${LZ4_SHARED_OUTPUT}")
+      endif()
+
+      if(DFTRACER_UTILS_BUILD_STATIC)
+        add_library(lz4_static STATIC ${LZ4_SOURCES})
+        target_include_directories(
+          lz4_static
+          PUBLIC $<BUILD_INTERFACE:${lz4_SOURCE_DIR}/lib>
+                 $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+        set_target_properties(
+          lz4_static
+          PROPERTIES OUTPUT_NAME lz4
+                     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
+                     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+        list(APPEND LZ4_TARGETS lz4_static)
+      endif()
+
+      if(DFTRACER_UTILS_BUILD_SHARED)
+        add_library(lz4_shared SHARED ${LZ4_SOURCES})
+        target_include_directories(
+          lz4_shared
+          PUBLIC $<BUILD_INTERFACE:${lz4_SOURCE_DIR}/lib>
+                 $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+        set_target_properties(
+          lz4_shared
+          PROPERTIES OUTPUT_NAME lz4
+                     ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
+                     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+        list(APPEND LZ4_TARGETS lz4_shared)
+      endif()
+
+      if(TARGET lz4_static AND NOT TARGET lz4::lz4_static)
+        add_library(lz4::lz4_static UNKNOWN IMPORTED GLOBAL)
+        set_target_properties(
+          lz4::lz4_static
+          PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_OUTPUT}"
+                     INTERFACE_INCLUDE_DIRECTORIES "${lz4_SOURCE_DIR}/lib")
+        add_dependencies(lz4::lz4_static lz4_static)
+      endif()
+      if(TARGET lz4_shared AND NOT TARGET lz4::lz4_shared)
+        add_library(lz4::lz4_shared UNKNOWN IMPORTED GLOBAL)
+        set_target_properties(
+          lz4::lz4_shared
+          PROPERTIES IMPORTED_LOCATION "${LZ4_SHARED_OUTPUT}"
+                     INTERFACE_INCLUDE_DIRECTORIES "${lz4_SOURCE_DIR}/lib")
+        add_dependencies(lz4::lz4_shared lz4_shared)
+      endif()
+      if(NOT TARGET lz4::lz4)
+        add_library(lz4::lz4 UNKNOWN IMPORTED GLOBAL)
+        if(TARGET lz4::lz4_shared)
+          set_target_properties(
+            lz4::lz4
+            PROPERTIES IMPORTED_LOCATION "${LZ4_SHARED_OUTPUT}"
+                       INTERFACE_INCLUDE_DIRECTORIES "${lz4_SOURCE_DIR}/lib")
+          add_dependencies(lz4::lz4 lz4_shared)
+        elseif(TARGET lz4::lz4_static)
+          set_target_properties(
+            lz4::lz4
+            PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_OUTPUT}"
+                       INTERFACE_INCLUDE_DIRECTORIES "${lz4_SOURCE_DIR}/lib")
+          add_dependencies(lz4::lz4 lz4_static)
+        endif()
+      endif()
+
+      install(FILES ${lz4_SOURCE_DIR}/lib/lz4.h ${lz4_SOURCE_DIR}/lib/lz4frame.h
+                    ${lz4_SOURCE_DIR}/lib/lz4hc.h ${lz4_SOURCE_DIR}/lib/xxhash.h
+              DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+      if(LZ4_TARGETS)
+        install(
+          TARGETS ${LZ4_TARGETS}
+          EXPORT lz4Targets
+          ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        install(
+          EXPORT lz4Targets
+          FILE lz4Targets.cmake
+          NAMESPACE lz4::
+          DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lz4)
+      endif()
+
+      set(lz4_FOUND
+          TRUE
+          PARENT_SCOPE)
+      set(lz4_INCLUDE_DIRS
+          ${lz4_SOURCE_DIR}/lib
+          PARENT_SCOPE)
+      set(lz4_LIBRARIES
+          ${LZ4_PREFERRED_OUTPUT}
+          PARENT_SCOPE)
+      set(lz4_CPM
+          TRUE
+          PARENT_SCOPE)
+
+      # Seed the variables RocksDB's bundled Findlz4.cmake checks.
+      set(lz4_FOUND
+          TRUE
+          CACHE BOOL "lz4 availability" FORCE)
+      set(lz4_INCLUDE_DIRS
+          "${lz4_SOURCE_DIR}/lib"
+          CACHE PATH "lz4 include directories" FORCE)
+      set(lz4_LIBRARIES
+          "${LZ4_PREFERRED_OUTPUT}"
+          CACHE STRING "lz4 libraries" FORCE)
+    endif()
+  endif()
+endfunction()
 
 function(need_zlib)
   find_package(ZLIB 1.2 QUIET)
diff --git a/cmake/modules/InstallHelpers.cmake b/cmake/modules/InstallHelpers.cmake
index 363b3ae1..4c776c93 100644
--- a/cmake/modules/InstallHelpers.cmake
+++ b/cmake/modules/InstallHelpers.cmake
@@ -196,42 +196,6 @@ else()
     endif()
 endif()
 
-# SQLITE3 dependency
-find_library(SQLITE3_LIBRARY_BUNDLED
-    NAMES sqlite3 libsqlite3
-    PATHS \${_IMPORT_PREFIX}/lib
-    NO_DEFAULT_PATH
-)
-
-if(SQLITE3_LIBRARY_BUNDLED)
-    # Found sqlite3 that was built with this package
-    find_path(SQLITE3_INCLUDE_DIR_BUNDLED
-        NAMES sqlite3.h
-        PATHS \${_IMPORT_PREFIX}/include
-        NO_DEFAULT_PATH
-    )
-
-    if(SQLITE3_INCLUDE_DIR_BUNDLED AND NOT TARGET SQLite::SQLite3)
-        add_library(SQLite::SQLite3 UNKNOWN IMPORTED)
-        set_target_properties(SQLite::SQLite3 PROPERTIES
-            IMPORTED_LOCATION \"\${SQLITE3_LIBRARY_BUNDLED}\"
-            INTERFACE_INCLUDE_DIRECTORIES \"\${SQLITE3_INCLUDE_DIR_BUNDLED}\"
-        )
-    endif()
-else()
-    # Fall back to system sqlite3 via pkg-config (require minimum version 3.35)
-    find_dependency(PkgConfig REQUIRED)
-    pkg_check_modules(SQLITE3 REQUIRED sqlite3>=3.35)
-
-    if(SQLITE3_FOUND AND NOT TARGET SQLite::SQLite3)
-        add_library(SQLite::SQLite3 UNKNOWN IMPORTED)
-        set_target_properties(SQLite::SQLite3 PROPERTIES
-            IMPORTED_LOCATION \"\${SQLITE3_LIBRARIES}\"
-            INTERFACE_INCLUDE_DIRECTORIES \"\${SQLITE3_INCLUDE_DIRS}\"
-        )
-    endif()
-endif()
-
 # YYJSON dependency
 find_library(YYJSON_LIBRARY_BUNDLED
     NAMES yyjson libyyjson
diff --git a/cmake/modules/LibraryHelpers.cmake b/cmake/modules/LibraryHelpers.cmake
index 43b08790..787b23a9 100644
--- a/cmake/modules/LibraryHelpers.cmake
+++ b/cmake/modules/LibraryHelpers.cmake
@@ -213,6 +213,10 @@ function(create_library)
       set_target_properties(${TARGET_NAME} PROPERTIES ${LIB_PROPERTIES})
     endif()
 
+    if(NOT ${TARGET_TYPE} STREQUAL "INTERFACE")
+      target_add_rpath(${TARGET_NAME})
+    endif()
+
     # Enable C++20 coroutines support
     target_enable_coroutine(${TARGET_NAME})
   endmacro()
diff --git a/cmake/vendor/CPM_0.42.1.cmake b/cmake/vendor/CPM_0.42.1.cmake
new file mode 100644
index 00000000..832977c7
--- /dev/null
+++ b/cmake/vendor/CPM_0.42.1.cmake
@@ -0,0 +1,1363 @@
+# CPM.cmake - CMake's missing package manager
+# ===========================================
+# See https://github.com/cpm-cmake/CPM.cmake for usage and update instructions.
+#
+# MIT License
+# -----------
+#[[
+  Copyright (c) 2019-2023 Lars Melchior and contributors
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in all
+  copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+]]
+
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+# Initialize logging prefix
+if(NOT CPM_INDENT)
+  set(CPM_INDENT
+      "CPM:"
+      CACHE INTERNAL ""
+  )
+endif()
+
+if(NOT COMMAND cpm_message)
+  function(cpm_message)
+    message(${ARGV})
+  endfunction()
+endif()
+
+if(DEFINED EXTRACTED_CPM_VERSION)
+  set(CURRENT_CPM_VERSION "${EXTRACTED_CPM_VERSION}${CPM_DEVELOPMENT}")
+else()
+  set(CURRENT_CPM_VERSION 0.42.1)
+endif()
+
+get_filename_component(CPM_CURRENT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" REALPATH)
+if(CPM_DIRECTORY)
+  if(NOT CPM_DIRECTORY STREQUAL CPM_CURRENT_DIRECTORY)
+    if(CPM_VERSION VERSION_LESS CURRENT_CPM_VERSION)
+      message(
+        AUTHOR_WARNING
+          "${CPM_INDENT} \
+A dependency is using a more recent CPM version (${CURRENT_CPM_VERSION}) than the current project (${CPM_VERSION}). \
+It is recommended to upgrade CPM to the most recent version. \
+See https://github.com/cpm-cmake/CPM.cmake for more information."
+      )
+    endif()
+    if(${CMAKE_VERSION} VERSION_LESS "3.17.0")
+      include(FetchContent)
+    endif()
+    return()
+  endif()
+
+  get_property(
+    CPM_INITIALIZED GLOBAL ""
+    PROPERTY CPM_INITIALIZED
+    SET
+  )
+  if(CPM_INITIALIZED)
+    return()
+  endif()
+endif()
+
+if(CURRENT_CPM_VERSION MATCHES "development-version")
+  message(
+    WARNING "${CPM_INDENT} Your project is using an unstable development version of CPM.cmake. \
+Please update to a recent release if possible. \
+See https://github.com/cpm-cmake/CPM.cmake for details."
+  )
+endif()
+
+set_property(GLOBAL PROPERTY CPM_INITIALIZED true)
+
+macro(cpm_set_policies)
+  # the policy allows us to change options without caching
+  cmake_policy(SET CMP0077 NEW)
+  set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
+
+  # the policy allows us to change set(CACHE) without caching
+  if(POLICY CMP0126)
+    cmake_policy(SET CMP0126 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0126 NEW)
+  endif()
+
+  # The policy uses the download time for timestamp, instead of the timestamp in the archive. This
+  # allows for proper rebuilds when a projects url changes
+  if(POLICY CMP0135)
+    cmake_policy(SET CMP0135 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0135 NEW)
+  endif()
+
+  # treat relative git repository paths as being relative to the parent project's remote
+  if(POLICY CMP0150)
+    cmake_policy(SET CMP0150 NEW)
+    set(CMAKE_POLICY_DEFAULT_CMP0150 NEW)
+  endif()
+endmacro()
+cpm_set_policies()
+
+option(CPM_USE_LOCAL_PACKAGES "Always try to use `find_package` to get dependencies"
+       $ENV{CPM_USE_LOCAL_PACKAGES}
+)
+option(CPM_LOCAL_PACKAGES_ONLY "Only use `find_package` to get dependencies"
+       $ENV{CPM_LOCAL_PACKAGES_ONLY}
+)
+option(CPM_DOWNLOAD_ALL "Always download dependencies from source" $ENV{CPM_DOWNLOAD_ALL})
+option(CPM_DONT_UPDATE_MODULE_PATH "Don't update the module path to allow using find_package"
+       $ENV{CPM_DONT_UPDATE_MODULE_PATH}
+)
+option(CPM_DONT_CREATE_PACKAGE_LOCK "Don't create a package lock file in the binary path"
+       $ENV{CPM_DONT_CREATE_PACKAGE_LOCK}
+)
+option(CPM_INCLUDE_ALL_IN_PACKAGE_LOCK
+       "Add all packages added through CPM.cmake to the package lock"
+       $ENV{CPM_INCLUDE_ALL_IN_PACKAGE_LOCK}
+)
+option(CPM_USE_NAMED_CACHE_DIRECTORIES
+       "Use additional directory of package name in cache on the most nested level."
+       $ENV{CPM_USE_NAMED_CACHE_DIRECTORIES}
+)
+
+set(CPM_VERSION
+    ${CURRENT_CPM_VERSION}
+    CACHE INTERNAL ""
+)
+set(CPM_DIRECTORY
+    ${CPM_CURRENT_DIRECTORY}
+    CACHE INTERNAL ""
+)
+set(CPM_FILE
+    ${CMAKE_CURRENT_LIST_FILE}
+    CACHE INTERNAL ""
+)
+set(CPM_PACKAGES
+    ""
+    CACHE INTERNAL ""
+)
+set(CPM_DRY_RUN
+    OFF
+    CACHE INTERNAL "Don't download or configure dependencies (for testing)"
+)
+
+if(DEFINED ENV{CPM_SOURCE_CACHE})
+  set(CPM_SOURCE_CACHE_DEFAULT $ENV{CPM_SOURCE_CACHE})
+else()
+  set(CPM_SOURCE_CACHE_DEFAULT OFF)
+endif()
+
+set(CPM_SOURCE_CACHE
+    ${CPM_SOURCE_CACHE_DEFAULT}
+    CACHE PATH "Directory to download CPM dependencies"
+)
+
+if(NOT CPM_DONT_UPDATE_MODULE_PATH AND NOT DEFINED CMAKE_FIND_PACKAGE_REDIRECTS_DIR)
+  set(CPM_MODULE_PATH
+      "${CMAKE_BINARY_DIR}/CPM_modules"
+      CACHE INTERNAL ""
+  )
+  # remove old modules
+  file(REMOVE_RECURSE ${CPM_MODULE_PATH})
+  file(MAKE_DIRECTORY ${CPM_MODULE_PATH})
+  # locally added CPM modules should override global packages
+  set(CMAKE_MODULE_PATH "${CPM_MODULE_PATH};${CMAKE_MODULE_PATH}")
+endif()
+
+if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
+  set(CPM_PACKAGE_LOCK_FILE
+      "${CMAKE_BINARY_DIR}/cpm-package-lock.cmake"
+      CACHE INTERNAL ""
+  )
+  file(WRITE ${CPM_PACKAGE_LOCK_FILE}
+       "# CPM Package Lock\n# This file should be committed to version control\n\n"
+  )
+endif()
+
+include(FetchContent)
+
+# Try to infer package name from git repository uri (path or url)
+function(cpm_package_name_from_git_uri URI RESULT)
+  if("${URI}" MATCHES "([^/:]+)/?.git/?$")
+    set(${RESULT}
+        ${CMAKE_MATCH_1}
+        PARENT_SCOPE
+    )
+  else()
+    unset(${RESULT} PARENT_SCOPE)
+  endif()
+endfunction()
+
+# Find the shortest hash that can be used eg, if origin_hash is
+# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found
+# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash}
+#
+# * .../cccb.hash
+# * .../cccb77ae.hash
+# * .../cccb77ae9609.hash
+# * .../cccb77ae9609d276.hash
+# * etc
+#
+# We will be able to use a shorter path with very high probability, but in the (rare) event that the
+# first couple characters collide, we will check longer and longer substrings.
+function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
+  # for compatibility with caches populated by a previous version of CPM, check if a directory using
+  # the full hash already exists
+  if(EXISTS "${source_cache_dir}/${origin_hash}")
+    set(${short_hash_output_var}
+        "${origin_hash}"
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  foreach(len RANGE 4 40 4)
+    string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
+    set(hash_lock ${source_cache_dir}/${short_hash}.lock)
+    set(hash_fp ${source_cache_dir}/${short_hash}.hash)
+    # Take a lock, so we don't have a race condition with another instance of cmake. We will release
+    # this lock when we can, however, if there is an error, we want to ensure it gets released on
+    # it's own on exit from the function.
+    file(LOCK ${hash_lock} GUARD FUNCTION)
+
+    # Load the contents of .../${short_hash}.hash
+    file(TOUCH ${hash_fp})
+    file(READ ${hash_fp} hash_fp_contents)
+
+    if(hash_fp_contents STREQUAL "")
+      # Write the origin hash
+      file(WRITE ${hash_fp} ${origin_hash})
+      file(LOCK ${hash_lock} RELEASE)
+      break()
+    elseif(hash_fp_contents STREQUAL origin_hash)
+      file(LOCK ${hash_lock} RELEASE)
+      break()
+    else()
+      file(LOCK ${hash_lock} RELEASE)
+    endif()
+  endforeach()
+  set(${short_hash_output_var}
+      "${short_hash}"
+      PARENT_SCOPE
+  )
+endfunction()
+
+# Try to infer package name and version from a url
+function(cpm_package_name_and_ver_from_url url outName outVer)
+  if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
+    # We matched an archive
+    set(filename "${CMAKE_MATCH_1}")
+
+    if(filename MATCHES "([a-zA-Z0-9_\\.-]+)[_-]v?(([0-9]+\\.)*[0-9]+[a-zA-Z0-9]*)")
+      # We matched <name>-<version> (ie foo-1.2.3)
+      set(${outName}
+          "${CMAKE_MATCH_1}"
+          PARENT_SCOPE
+      )
+      set(${outVer}
+          "${CMAKE_MATCH_2}"
+          PARENT_SCOPE
+      )
+    elseif(filename MATCHES "(([0-9]+\\.)+[0-9]+[a-zA-Z0-9]*)")
+      # We couldn't find a name, but we found a version
+      #
+      # In many cases (which we don't handle here) the url would look something like
+      # `irrelevant/ACTUAL_PACKAGE_NAME/irrelevant/1.2.3.zip`. In such a case we can't possibly
+      # distinguish the package name from the irrelevant bits. Moreover if we try to match the
+      # package name from the filename, we'd get bogus at best.
+      unset(${outName} PARENT_SCOPE)
+      set(${outVer}
+          "${CMAKE_MATCH_1}"
+          PARENT_SCOPE
+      )
+    else()
+      # Boldly assume that the file name is the package name.
+      #
+      # Yes, something like `irrelevant/ACTUAL_NAME/irrelevant/download.zip` will ruin our day, but
+      # such cases should be quite rare. No popular service does this... we think.
+      set(${outName}
+          "${filename}"
+          PARENT_SCOPE
+      )
+      unset(${outVer} PARENT_SCOPE)
+    endif()
+  else()
+    # No ideas yet what to do with non-archives
+    unset(${outName} PARENT_SCOPE)
+    unset(${outVer} PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(cpm_find_package NAME VERSION)
+  string(REPLACE " " ";" EXTRA_ARGS "${ARGN}")
+  find_package(${NAME} ${VERSION} ${EXTRA_ARGS} QUIET)
+  if(${CPM_ARGS_NAME}_FOUND)
+    if(DEFINED ${CPM_ARGS_NAME}_VERSION)
+      set(VERSION ${${CPM_ARGS_NAME}_VERSION})
+    endif()
+    cpm_message(STATUS "${CPM_INDENT} Using local package ${CPM_ARGS_NAME}@${VERSION}")
+    CPMRegisterPackage(${CPM_ARGS_NAME} "${VERSION}")
+    set(CPM_PACKAGE_FOUND
+        YES
+        PARENT_SCOPE
+    )
+  else()
+    set(CPM_PACKAGE_FOUND
+        NO
+        PARENT_SCOPE
+    )
+  endif()
+endfunction()
+
+# Create a custom FindXXX.cmake module for a CPM package This prevents `find_package(NAME)` from
+# finding the system library
+function(cpm_create_module_file Name)
+  if(NOT CPM_DONT_UPDATE_MODULE_PATH)
+    if(DEFINED CMAKE_FIND_PACKAGE_REDIRECTS_DIR)
+      # Redirect find_package calls to the CPM package. This is what FetchContent does when you set
+      # OVERRIDE_FIND_PACKAGE. The CMAKE_FIND_PACKAGE_REDIRECTS_DIR works for find_package in CONFIG
+      # mode, unlike the Find${Name}.cmake fallback. CMAKE_FIND_PACKAGE_REDIRECTS_DIR is not defined
+      # in script mode, or in CMake < 3.24.
+      # https://cmake.org/cmake/help/latest/module/FetchContent.html#fetchcontent-find-package-integration-examples
+      string(TOLOWER ${Name} NameLower)
+      file(WRITE ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/${NameLower}-config.cmake
+           "include(\"\${CMAKE_CURRENT_LIST_DIR}/${NameLower}-extra.cmake\" OPTIONAL)\n"
+           "include(\"\${CMAKE_CURRENT_LIST_DIR}/${Name}Extra.cmake\" OPTIONAL)\n"
+      )
+      file(WRITE ${CMAKE_FIND_PACKAGE_REDIRECTS_DIR}/${NameLower}-config-version.cmake
+           "set(PACKAGE_VERSION_COMPATIBLE TRUE)\n" "set(PACKAGE_VERSION_EXACT TRUE)\n"
+      )
+    else()
+      file(WRITE ${CPM_MODULE_PATH}/Find${Name}.cmake
+           "include(\"${CPM_FILE}\")\n${ARGN}\nset(${Name}_FOUND TRUE)"
+      )
+    endif()
+  endif()
+endfunction()
+
+# Find a package locally or fallback to CPMAddPackage
+function(CPMFindPackage)
+  set(oneValueArgs NAME VERSION GIT_TAG FIND_PACKAGE_ARGUMENTS)
+
+  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "" ${ARGN})
+
+  if(NOT DEFINED CPM_ARGS_VERSION)
+    if(DEFINED CPM_ARGS_GIT_TAG)
+      cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION)
+    endif()
+  endif()
+
+  set(downloadPackage ${CPM_DOWNLOAD_ALL})
+  if(DEFINED CPM_DOWNLOAD_${CPM_ARGS_NAME})
+    set(downloadPackage ${CPM_DOWNLOAD_${CPM_ARGS_NAME}})
+  elseif(DEFINED ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
+    set(downloadPackage $ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}})
+  endif()
+  if(downloadPackage)
+    CPMAddPackage(${ARGN})
+    cpm_export_variables(${CPM_ARGS_NAME})
+    return()
+  endif()
+
+  cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
+
+  if(NOT CPM_PACKAGE_FOUND)
+    CPMAddPackage(${ARGN})
+    cpm_export_variables(${CPM_ARGS_NAME})
+  endif()
+
+endfunction()
+
+# checks if a package has been added before
+function(cpm_check_if_package_already_added CPM_ARGS_NAME CPM_ARGS_VERSION)
+  if("${CPM_ARGS_NAME}" IN_LIST CPM_PACKAGES)
+    CPMGetPackageVersion(${CPM_ARGS_NAME} CPM_PACKAGE_VERSION)
+    if("${CPM_PACKAGE_VERSION}" VERSION_LESS "${CPM_ARGS_VERSION}")
+      message(
+        WARNING
+          "${CPM_INDENT} Requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION})."
+      )
+    endif()
+    cpm_get_fetch_properties(${CPM_ARGS_NAME})
+    set(${CPM_ARGS_NAME}_ADDED NO)
+    set(CPM_PACKAGE_ALREADY_ADDED
+        YES
+        PARENT_SCOPE
+    )
+    cpm_export_variables(${CPM_ARGS_NAME})
+  else()
+    set(CPM_PACKAGE_ALREADY_ADDED
+        NO
+        PARENT_SCOPE
+    )
+  endif()
+endfunction()
+
+# Parse the argument of CPMAddPackage in case a single one was provided and convert it to a list of
+# arguments which can then be parsed idiomatically. For example gh:foo/bar@1.2.3 will be converted
+# to: GITHUB_REPOSITORY;foo/bar;VERSION;1.2.3
+function(cpm_parse_add_package_single_arg arg outArgs)
+  # Look for a scheme
+  if("${arg}" MATCHES "^([a-zA-Z]+):(.+)$")
+    string(TOLOWER "${CMAKE_MATCH_1}" scheme)
+    set(uri "${CMAKE_MATCH_2}")
+
+    # Check for CPM-specific schemes
+    if(scheme STREQUAL "gh")
+      set(out "GITHUB_REPOSITORY;${uri}")
+      set(packageType "git")
+    elseif(scheme STREQUAL "gl")
+      set(out "GITLAB_REPOSITORY;${uri}")
+      set(packageType "git")
+    elseif(scheme STREQUAL "bb")
+      set(out "BITBUCKET_REPOSITORY;${uri}")
+      set(packageType "git")
+      # A CPM-specific scheme was not found. Looks like this is a generic URL so try to determine
+      # type
+    elseif(arg MATCHES ".git/?(@|#|$)")
+      set(out "GIT_REPOSITORY;${arg}")
+      set(packageType "git")
+    else()
+      # Fall back to a URL
+      set(out "URL;${arg}")
+      set(packageType "archive")
+
+      # We could also check for SVN since FetchContent supports it, but SVN is so rare these days.
+      # We just won't bother with the additional complexity it will induce in this function. SVN is
+      # done by multi-arg
+    endif()
+  else()
+    if(arg MATCHES ".git/?(@|#|$)")
+      set(out "GIT_REPOSITORY;${arg}")
+      set(packageType "git")
+    else()
+      # Give up
+      message(FATAL_ERROR "${CPM_INDENT} Can't determine package type of '${arg}'")
+    endif()
+  endif()
+
+  # For all packages we interpret @... as version. Only replace the last occurrence. Thus URIs
+  # containing '@' can be used
+  string(REGEX REPLACE "@([^@]+)$" ";VERSION;\\1" out "${out}")
+
+  # Parse the rest according to package type
+  if(packageType STREQUAL "git")
+    # For git repos we interpret #... as a tag or branch or commit hash
+    string(REGEX REPLACE "#([^#]+)$" ";GIT_TAG;\\1" out "${out}")
+  elseif(packageType STREQUAL "archive")
+    # For archives we interpret #... as a URL hash.
+    string(REGEX REPLACE "#([^#]+)$" ";URL_HASH;\\1" out "${out}")
+    # We don't try to parse the version if it's not provided explicitly. cpm_get_version_from_url
+    # should do this at a later point
+  else()
+    # We should never get here. This is an assertion and hitting it means there's a problem with the
+    # code above. A packageType was set, but not handled by this if-else.
+    message(FATAL_ERROR "${CPM_INDENT} Unsupported package type '${packageType}' of '${arg}'")
+  endif()
+
+  set(${outArgs}
+      ${out}
+      PARENT_SCOPE
+  )
+endfunction()
+
+# Check that the working directory for a git repo is clean
+function(cpm_check_git_working_dir_is_clean repoPath gitTag isClean)
+
+  find_package(Git REQUIRED)
+
+  if(NOT GIT_EXECUTABLE)
+    # No git executable, assume directory is clean
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  # check for uncommitted changes
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} status --porcelain
+    RESULT_VARIABLE resultGitStatus
+    OUTPUT_VARIABLE repoStatus
+    OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET
+    WORKING_DIRECTORY ${repoPath}
+  )
+  if(resultGitStatus)
+    # not supposed to happen, assume clean anyway
+    message(WARNING "${CPM_INDENT} Calling git status on folder ${repoPath} failed")
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  if(NOT "${repoStatus}" STREQUAL "")
+    set(${isClean}
+        FALSE
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
+  # check for committed changes
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} diff -s --exit-code ${gitTag}
+    RESULT_VARIABLE resultGitDiff
+    OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_QUIET
+    WORKING_DIRECTORY ${repoPath}
+  )
+
+  if(${resultGitDiff} EQUAL 0)
+    set(${isClean}
+        TRUE
+        PARENT_SCOPE
+    )
+  else()
+    set(${isClean}
+        FALSE
+        PARENT_SCOPE
+    )
+  endif()
+
+endfunction()
+
+# Add PATCH_COMMAND to CPM_ARGS_UNPARSED_ARGUMENTS. This method consumes a list of files in ARGN
+# then generates a `PATCH_COMMAND` appropriate for `ExternalProject_Add()`. This command is appended
+# to the parent scope's `CPM_ARGS_UNPARSED_ARGUMENTS`.
+function(cpm_add_patches)
+  # Return if no patch files are supplied.
+  if(NOT ARGN)
+    return()
+  endif()
+
+  # Find the patch program.
+  find_program(PATCH_EXECUTABLE patch)
+  if(CMAKE_HOST_WIN32 AND NOT PATCH_EXECUTABLE)
+    # The Windows git executable is distributed with patch.exe. Find the path to the executable, if
+    # it exists, then search `../usr/bin` and `../../usr/bin` for patch.exe.
+    find_package(Git QUIET)
+    if(GIT_EXECUTABLE)
+      get_filename_component(extra_search_path ${GIT_EXECUTABLE} DIRECTORY)
+      get_filename_component(extra_search_path_1up ${extra_search_path} DIRECTORY)
+      get_filename_component(extra_search_path_2up ${extra_search_path_1up} DIRECTORY)
+      find_program(
+        PATCH_EXECUTABLE patch HINTS "${extra_search_path_1up}/usr/bin"
+                                     "${extra_search_path_2up}/usr/bin"
+      )
+    endif()
+  endif()
+  if(NOT PATCH_EXECUTABLE)
+    message(FATAL_ERROR "Couldn't find `patch` executable to use with PATCHES keyword.")
+  endif()
+
+  # Create a temporary
+  set(temp_list ${CPM_ARGS_UNPARSED_ARGUMENTS})
+
+  # Ensure each file exists (or error out) and add it to the list.
+  set(first_item True)
+  foreach(PATCH_FILE ${ARGN})
+    # Make sure the patch file exists, if we can't find it, try again in the current directory.
+    if(NOT EXISTS "${PATCH_FILE}")
+      if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
+        message(FATAL_ERROR "Couldn't find patch file: '${PATCH_FILE}'")
+      endif()
+      set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}")
+    endif()
+
+    # Convert to absolute path for use with patch file command.
+    get_filename_component(PATCH_FILE "${PATCH_FILE}" ABSOLUTE)
+
+    # The first patch entry must be preceded by "PATCH_COMMAND" while the following items are
+    # preceded by "&&".
+    if(first_item)
+      set(first_item False)
+      list(APPEND temp_list "PATCH_COMMAND")
+    else()
+      list(APPEND temp_list "&&")
+    endif()
+    # Add the patch command to the list
+    list(APPEND temp_list "${PATCH_EXECUTABLE}" "-p1" "<" "${PATCH_FILE}")
+  endforeach()
+
+  # Move temp out into parent scope.
+  set(CPM_ARGS_UNPARSED_ARGUMENTS
+      ${temp_list}
+      PARENT_SCOPE
+  )
+
+endfunction()
+
+# method to overwrite internal FetchContent properties, to allow using CPM.cmake to overload
+# FetchContent calls. As these are internal cmake properties, this method should be used carefully
+# and may need modification in future CMake versions. Source:
+# https://github.com/Kitware/CMake/blob/dc3d0b5a0a7d26d43d6cfeb511e224533b5d188f/Modules/FetchContent.cmake#L1152
+function(cpm_override_fetchcontent contentName)
+  cmake_parse_arguments(PARSE_ARGV 1 arg "" "SOURCE_DIR;BINARY_DIR" "")
+  if(NOT "${arg_UNPARSED_ARGUMENTS}" STREQUAL "")
+    message(FATAL_ERROR "${CPM_INDENT} Unsupported arguments: ${arg_UNPARSED_ARGUMENTS}")
+  endif()
+
+  string(TOLOWER ${contentName} contentNameLower)
+  set(prefix "_FetchContent_${contentNameLower}")
+
+  set(propertyName "${prefix}_sourceDir")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} "${arg_SOURCE_DIR}")
+
+  set(propertyName "${prefix}_binaryDir")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} "${arg_BINARY_DIR}")
+
+  set(propertyName "${prefix}_populated")
+  define_property(
+    GLOBAL
+    PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} TRUE)
+endfunction()
+
+# Download and add a package from source
+function(CPMAddPackage)
+  cpm_set_policies()
+
+  set(oneValueArgs
+      NAME
+      FORCE
+      VERSION
+      GIT_TAG
+      DOWNLOAD_ONLY
+      GITHUB_REPOSITORY
+      GITLAB_REPOSITORY
+      BITBUCKET_REPOSITORY
+      GIT_REPOSITORY
+      SOURCE_DIR
+      FIND_PACKAGE_ARGUMENTS
+      NO_CACHE
+      SYSTEM
+      GIT_SHALLOW
+      EXCLUDE_FROM_ALL
+      SOURCE_SUBDIR
+      CUSTOM_CACHE_KEY
+  )
+
+  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES)
+
+  list(LENGTH ARGN argnLength)
+
+  # Parse single shorthand argument
+  if(argnLength EQUAL 1)
+    cpm_parse_add_package_single_arg("${ARGN}" ARGN)
+
+    # The shorthand syntax implies EXCLUDE_FROM_ALL and SYSTEM
+    set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;")
+
+    # Parse URI shorthand argument
+  elseif(argnLength GREATER 1 AND "${ARGV0}" STREQUAL "URI")
+    list(REMOVE_AT ARGN 0 1) # remove "URI gh:<...>@version#tag"
+    cpm_parse_add_package_single_arg("${ARGV1}" ARGV0)
+
+    set(ARGN "${ARGV0};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;${ARGN}")
+  endif()
+
+  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}")
+
+  # Set default values for arguments
+  if(NOT DEFINED CPM_ARGS_VERSION)
+    if(DEFINED CPM_ARGS_GIT_TAG)
+      cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION)
+    endif()
+  endif()
+
+  if(CPM_ARGS_DOWNLOAD_ONLY)
+    set(DOWNLOAD_ONLY ${CPM_ARGS_DOWNLOAD_ONLY})
+  else()
+    set(DOWNLOAD_ONLY NO)
+  endif()
+
+  if(DEFINED CPM_ARGS_GITHUB_REPOSITORY)
+    set(CPM_ARGS_GIT_REPOSITORY "https://github.com/${CPM_ARGS_GITHUB_REPOSITORY}.git")
+  elseif(DEFINED CPM_ARGS_GITLAB_REPOSITORY)
+    set(CPM_ARGS_GIT_REPOSITORY "https://gitlab.com/${CPM_ARGS_GITLAB_REPOSITORY}.git")
+  elseif(DEFINED CPM_ARGS_BITBUCKET_REPOSITORY)
+    set(CPM_ARGS_GIT_REPOSITORY "https://bitbucket.org/${CPM_ARGS_BITBUCKET_REPOSITORY}.git")
+  endif()
+
+  if(DEFINED CPM_ARGS_GIT_REPOSITORY)
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_REPOSITORY ${CPM_ARGS_GIT_REPOSITORY})
+    if(NOT DEFINED CPM_ARGS_GIT_TAG)
+      set(CPM_ARGS_GIT_TAG v${CPM_ARGS_VERSION})
+    endif()
+
+    # If a name wasn't provided, try to infer it from the git repo
+    if(NOT DEFINED CPM_ARGS_NAME)
+      cpm_package_name_from_git_uri(${CPM_ARGS_GIT_REPOSITORY} CPM_ARGS_NAME)
+    endif()
+  endif()
+
+  set(CPM_SKIP_FETCH FALSE)
+
+  if(DEFINED CPM_ARGS_GIT_TAG)
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_TAG ${CPM_ARGS_GIT_TAG})
+    # If GIT_SHALLOW is explicitly specified, honor the value.
+    if(DEFINED CPM_ARGS_GIT_SHALLOW)
+      list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW ${CPM_ARGS_GIT_SHALLOW})
+    endif()
+  endif()
+
+  if(DEFINED CPM_ARGS_URL)
+    # If a name or version aren't provided, try to infer them from the URL
+    list(GET CPM_ARGS_URL 0 firstUrl)
+    cpm_package_name_and_ver_from_url(${firstUrl} nameFromUrl verFromUrl)
+    # If we fail to obtain name and version from the first URL, we could try other URLs if any.
+    # However multiple URLs are expected to be quite rare, so for now we won't bother.
+
+    # If the caller provided their own name and version, they trump the inferred ones.
+    if(NOT DEFINED CPM_ARGS_NAME)
+      set(CPM_ARGS_NAME ${nameFromUrl})
+    endif()
+    if(NOT DEFINED CPM_ARGS_VERSION)
+      set(CPM_ARGS_VERSION ${verFromUrl})
+    endif()
+
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS URL "${CPM_ARGS_URL}")
+  endif()
+
+  # Check for required arguments
+
+  if(NOT DEFINED CPM_ARGS_NAME)
+    message(
+      FATAL_ERROR
+        "${CPM_INDENT} 'NAME' was not provided and couldn't be automatically inferred for package added with arguments: '${ARGN}'"
+    )
+  endif()
+
+  # Check if package has been added before
+  cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}")
+  if(CPM_PACKAGE_ALREADY_ADDED)
+    cpm_export_variables(${CPM_ARGS_NAME})
+    return()
+  endif()
+
+  # Check for manual overrides
+  if(NOT CPM_ARGS_FORCE AND NOT "${CPM_${CPM_ARGS_NAME}_SOURCE}" STREQUAL "")
+    set(PACKAGE_SOURCE ${CPM_${CPM_ARGS_NAME}_SOURCE})
+    set(CPM_${CPM_ARGS_NAME}_SOURCE "")
+    CPMAddPackage(
+      NAME "${CPM_ARGS_NAME}"
+      SOURCE_DIR "${PACKAGE_SOURCE}"
+      EXCLUDE_FROM_ALL "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+      SYSTEM "${CPM_ARGS_SYSTEM}"
+      PATCHES "${CPM_ARGS_PATCHES}"
+      OPTIONS "${CPM_ARGS_OPTIONS}"
+      SOURCE_SUBDIR "${CPM_ARGS_SOURCE_SUBDIR}"
+      DOWNLOAD_ONLY "${DOWNLOAD_ONLY}"
+      FORCE True
+    )
+    cpm_export_variables(${CPM_ARGS_NAME})
+    return()
+  endif()
+
+  # Check for available declaration
+  if(NOT CPM_ARGS_FORCE AND NOT "${CPM_DECLARATION_${CPM_ARGS_NAME}}" STREQUAL "")
+    set(declaration ${CPM_DECLARATION_${CPM_ARGS_NAME}})
+    set(CPM_DECLARATION_${CPM_ARGS_NAME} "")
+    CPMAddPackage(${declaration})
+    cpm_export_variables(${CPM_ARGS_NAME})
+    # checking again to ensure version and option compatibility
+    cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}")
+    return()
+  endif()
+
+  if(NOT CPM_ARGS_FORCE)
+    if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY)
+      cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS})
+
+      if(CPM_PACKAGE_FOUND)
+        cpm_export_variables(${CPM_ARGS_NAME})
+        return()
+      endif()
+
+      if(CPM_LOCAL_PACKAGES_ONLY)
+        message(
+          SEND_ERROR
+            "${CPM_INDENT} ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})"
+        )
+      endif()
+    endif()
+  endif()
+
+  CPMRegisterPackage("${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}")
+
+  if(DEFINED CPM_ARGS_GIT_TAG)
+    set(PACKAGE_INFO "${CPM_ARGS_GIT_TAG}")
+  elseif(DEFINED CPM_ARGS_SOURCE_DIR)
+    set(PACKAGE_INFO "${CPM_ARGS_SOURCE_DIR}")
+  else()
+    set(PACKAGE_INFO "${CPM_ARGS_VERSION}")
+  endif()
+
+  if(DEFINED FETCHCONTENT_BASE_DIR)
+    # respect user's FETCHCONTENT_BASE_DIR if set
+    set(CPM_FETCHCONTENT_BASE_DIR ${FETCHCONTENT_BASE_DIR})
+  else()
+    set(CPM_FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/_deps)
+  endif()
+
+  cpm_add_patches(${CPM_ARGS_PATCHES})
+
+  if(DEFINED CPM_ARGS_DOWNLOAD_COMMAND)
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND})
+  elseif(DEFINED CPM_ARGS_SOURCE_DIR)
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${CPM_ARGS_SOURCE_DIR})
+    if(NOT IS_ABSOLUTE ${CPM_ARGS_SOURCE_DIR})
+      # Expand `CPM_ARGS_SOURCE_DIR` relative path. This is important because EXISTS doesn't work
+      # for relative paths.
+      get_filename_component(
+        source_directory ${CPM_ARGS_SOURCE_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}
+      )
+    else()
+      set(source_directory ${CPM_ARGS_SOURCE_DIR})
+    endif()
+    if(NOT EXISTS ${source_directory})
+      string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
+      # remove timestamps so CMake will re-download the dependency
+      file(REMOVE_RECURSE "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild")
+    endif()
+  elseif(CPM_SOURCE_CACHE AND NOT CPM_ARGS_NO_CACHE)
+    string(TOLOWER ${CPM_ARGS_NAME} lower_case_name)
+    set(origin_parameters ${CPM_ARGS_UNPARSED_ARGUMENTS})
+    list(SORT origin_parameters)
+    if(CPM_ARGS_CUSTOM_CACHE_KEY)
+      # Application set a custom unique directory name
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
+    elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
+      string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
+      cpm_get_shortest_hash(
+        "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
+        "${origin_hash}" # Input hash
+        origin_hash # Computed hash
+      )
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
+    else()
+      string(SHA1 origin_hash "${origin_parameters}")
+      cpm_get_shortest_hash(
+        "${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
+        "${origin_hash}" # Input hash
+        origin_hash # Computed hash
+      )
+      set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
+    endif()
+    # Expand `download_directory` relative path. This is important because EXISTS doesn't work for
+    # relative paths.
+    get_filename_component(download_directory ${download_directory} ABSOLUTE)
+    list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory})
+
+    if(CPM_SOURCE_CACHE)
+      file(LOCK ${download_directory}/../cmake.lock)
+    endif()
+
+    if(EXISTS ${download_directory})
+      if(CPM_SOURCE_CACHE)
+        file(LOCK ${download_directory}/../cmake.lock RELEASE)
+      endif()
+
+      cpm_store_fetch_properties(
+        ${CPM_ARGS_NAME} "${download_directory}"
+        "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
+      )
+      cpm_get_fetch_properties("${CPM_ARGS_NAME}")
+
+      if(DEFINED CPM_ARGS_GIT_TAG AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS))
+        # warn if cache has been changed since checkout
+        cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN)
+        if(NOT ${IS_CLEAN})
+          message(
+            WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty"
+          )
+        endif()
+      endif()
+
+      cpm_add_subdirectory(
+        "${CPM_ARGS_NAME}"
+        "${DOWNLOAD_ONLY}"
+        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        "${${CPM_ARGS_NAME}_BINARY_DIR}"
+        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+        "${CPM_ARGS_SYSTEM}"
+        "${CPM_ARGS_OPTIONS}"
+      )
+      set(PACKAGE_INFO "${PACKAGE_INFO} at ${download_directory}")
+
+      # As the source dir is already cached/populated, we override the call to FetchContent.
+      set(CPM_SKIP_FETCH TRUE)
+      cpm_override_fetchcontent(
+        "${lower_case_name}" SOURCE_DIR "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        BINARY_DIR "${${CPM_ARGS_NAME}_BINARY_DIR}"
+      )
+
+    else()
+      # Enable shallow clone when GIT_TAG is not a commit hash. Our guess may not be accurate, but
+      # it should guarantee no commit hash get mis-detected.
+      if(NOT DEFINED CPM_ARGS_GIT_SHALLOW)
+        cpm_is_git_tag_commit_hash("${CPM_ARGS_GIT_TAG}" IS_HASH)
+        if(NOT ${IS_HASH})
+          list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW TRUE)
+        endif()
+      endif()
+
+      # remove timestamps so CMake will re-download the dependency
+      file(REMOVE_RECURSE ${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild)
+      set(PACKAGE_INFO "${PACKAGE_INFO} to ${download_directory}")
+    endif()
+  endif()
+
+  if(NOT "${DOWNLOAD_ONLY}")
+    cpm_create_module_file(${CPM_ARGS_NAME} "CPMAddPackage(\"${ARGN}\")")
+  endif()
+
+  if(CPM_PACKAGE_LOCK_ENABLED)
+    if((CPM_ARGS_VERSION AND NOT CPM_ARGS_SOURCE_DIR) OR CPM_INCLUDE_ALL_IN_PACKAGE_LOCK)
+      cpm_add_to_package_lock(${CPM_ARGS_NAME} "${ARGN}")
+    elseif(CPM_ARGS_SOURCE_DIR)
+      cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "local directory")
+    else()
+      cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "${ARGN}")
+    endif()
+  endif()
+
+  cpm_message(
+    STATUS "${CPM_INDENT} Adding package ${CPM_ARGS_NAME}@${CPM_ARGS_VERSION} (${PACKAGE_INFO})"
+  )
+
+  if(NOT CPM_SKIP_FETCH)
+    # CMake 3.28 added EXCLUDE, SYSTEM (3.25), and SOURCE_SUBDIR (3.18) to FetchContent_Declare.
+    # Calling FetchContent_MakeAvailable will then internally forward these options to
+    # add_subdirectory. Up until these changes, we had to call FetchContent_Populate and
+    # add_subdirectory separately, which is no longer necessary and has been deprecated as of 3.30.
+    # A Bug in CMake prevents us to use the non-deprecated functions until 3.30.3.
+    set(fetchContentDeclareExtraArgs "")
+    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.3")
+      if(${CPM_ARGS_EXCLUDE_FROM_ALL})
+        list(APPEND fetchContentDeclareExtraArgs EXCLUDE_FROM_ALL)
+      endif()
+      if(${CPM_ARGS_SYSTEM})
+        list(APPEND fetchContentDeclareExtraArgs SYSTEM)
+      endif()
+      if(DEFINED CPM_ARGS_SOURCE_SUBDIR)
+        list(APPEND fetchContentDeclareExtraArgs SOURCE_SUBDIR ${CPM_ARGS_SOURCE_SUBDIR})
+      endif()
+      # For CMake version <3.28 OPTIONS are parsed in cpm_add_subdirectory
+      if(CPM_ARGS_OPTIONS AND NOT DOWNLOAD_ONLY)
+        foreach(OPTION ${CPM_ARGS_OPTIONS})
+          cpm_parse_option("${OPTION}")
+          set(${OPTION_KEY} "${OPTION_VALUE}")
+        endforeach()
+      endif()
+    endif()
+    cpm_declare_fetch(
+      "${CPM_ARGS_NAME}" ${fetchContentDeclareExtraArgs} "${CPM_ARGS_UNPARSED_ARGUMENTS}"
+    )
+
+    cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS})
+    if(CPM_SOURCE_CACHE AND download_directory)
+      file(LOCK ${download_directory}/../cmake.lock RELEASE)
+    endif()
+    if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.30.3")
+      cpm_add_subdirectory(
+        "${CPM_ARGS_NAME}"
+        "${DOWNLOAD_ONLY}"
+        "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}"
+        "${${CPM_ARGS_NAME}_BINARY_DIR}"
+        "${CPM_ARGS_EXCLUDE_FROM_ALL}"
+        "${CPM_ARGS_SYSTEM}"
+        "${CPM_ARGS_OPTIONS}"
+      )
+    endif()
+    cpm_get_fetch_properties("${CPM_ARGS_NAME}")
+  endif()
+
+  set(${CPM_ARGS_NAME}_ADDED YES)
+  cpm_export_variables("${CPM_ARGS_NAME}")
+endfunction()
+
+# Fetch a previously declared package
+macro(CPMGetPackage Name)
+  if(DEFINED "CPM_DECLARATION_${Name}")
+    CPMAddPackage(NAME ${Name})
+  else()
+    message(SEND_ERROR "${CPM_INDENT} Cannot retrieve package ${Name}: no declaration available")
+  endif()
+endmacro()
+
+# export variables available to the caller to the parent scope expects ${CPM_ARGS_NAME} to be set
+macro(cpm_export_variables name)
+  set(${name}_SOURCE_DIR
+      "${${name}_SOURCE_DIR}"
+      PARENT_SCOPE
+  )
+  set(${name}_BINARY_DIR
+      "${${name}_BINARY_DIR}"
+      PARENT_SCOPE
+  )
+  set(${name}_ADDED
+      "${${name}_ADDED}"
+      PARENT_SCOPE
+  )
+  set(CPM_LAST_PACKAGE_NAME
+      "${name}"
+      PARENT_SCOPE
+  )
+endmacro()
+
+# declares a package, so that any call to CPMAddPackage for the package name will use these
+# arguments instead. Previous declarations will not be overridden.
+macro(CPMDeclarePackage Name)
+  if(NOT DEFINED "CPM_DECLARATION_${Name}")
+    set("CPM_DECLARATION_${Name}" "${ARGN}")
+  endif()
+endmacro()
+
+function(cpm_add_to_package_lock Name)
+  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
+    cpm_prettify_package_arguments(PRETTY_ARGN false ${ARGN})
+    file(APPEND ${CPM_PACKAGE_LOCK_FILE} "# ${Name}\nCPMDeclarePackage(${Name}\n${PRETTY_ARGN})\n")
+  endif()
+endfunction()
+
+function(cpm_add_comment_to_package_lock Name)
+  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
+    cpm_prettify_package_arguments(PRETTY_ARGN true ${ARGN})
+    file(APPEND ${CPM_PACKAGE_LOCK_FILE}
+         "# ${Name} (unversioned)\n# CPMDeclarePackage(${Name}\n${PRETTY_ARGN}#)\n"
+    )
+  endif()
+endfunction()
+
+# includes the package lock file if it exists and creates a target `cpm-update-package-lock` to
+# update it
+macro(CPMUsePackageLock file)
+  if(NOT CPM_DONT_CREATE_PACKAGE_LOCK)
+    get_filename_component(CPM_ABSOLUTE_PACKAGE_LOCK_PATH ${file} ABSOLUTE)
+    if(EXISTS ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH})
+      include(${CPM_ABSOLUTE_PACKAGE_LOCK_PATH})
+    endif()
+    if(NOT TARGET cpm-update-package-lock)
+      add_custom_target(
+        cpm-update-package-lock COMMAND ${CMAKE_COMMAND} -E copy ${CPM_PACKAGE_LOCK_FILE}
+                                        ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}
+      )
+    endif()
+    set(CPM_PACKAGE_LOCK_ENABLED true)
+  endif()
+endmacro()
+
+# registers a package that has been added to CPM
+function(CPMRegisterPackage PACKAGE VERSION)
+  list(APPEND CPM_PACKAGES ${PACKAGE})
+  set(CPM_PACKAGES
+      ${CPM_PACKAGES}
+      CACHE INTERNAL ""
+  )
+  set("CPM_PACKAGE_${PACKAGE}_VERSION"
+      ${VERSION}
+      CACHE INTERNAL ""
+  )
+endfunction()
+
+# retrieve the current version of the package to ${OUTPUT}
+function(CPMGetPackageVersion PACKAGE OUTPUT)
+  set(${OUTPUT}
+      "${CPM_PACKAGE_${PACKAGE}_VERSION}"
+      PARENT_SCOPE
+  )
+endfunction()
+
+# declares a package in FetchContent_Declare
+function(cpm_declare_fetch PACKAGE)
+  if(${CPM_DRY_RUN})
+    cpm_message(STATUS "${CPM_INDENT} Package not declared (dry run)")
+    return()
+  endif()
+
+  FetchContent_Declare(${PACKAGE} ${ARGN})
+endfunction()
+
+# returns properties for a package previously defined by cpm_declare_fetch
+function(cpm_get_fetch_properties PACKAGE)
+  if(${CPM_DRY_RUN})
+    return()
+  endif()
+
+  set(${PACKAGE}_SOURCE_DIR
+      "${CPM_PACKAGE_${PACKAGE}_SOURCE_DIR}"
+      PARENT_SCOPE
+  )
+  set(${PACKAGE}_BINARY_DIR
+      "${CPM_PACKAGE_${PACKAGE}_BINARY_DIR}"
+      PARENT_SCOPE
+  )
+endfunction()
+
+function(cpm_store_fetch_properties PACKAGE source_dir binary_dir)
+  if(${CPM_DRY_RUN})
+    return()
+  endif()
+
+  set(CPM_PACKAGE_${PACKAGE}_SOURCE_DIR
+      "${source_dir}"
+      CACHE INTERNAL ""
+  )
+  set(CPM_PACKAGE_${PACKAGE}_BINARY_DIR
+      "${binary_dir}"
+      CACHE INTERNAL ""
+  )
+endfunction()
+
+# adds a package as a subdirectory if viable, according to provided options
+function(
+  cpm_add_subdirectory
+  PACKAGE
+  DOWNLOAD_ONLY
+  SOURCE_DIR
+  BINARY_DIR
+  EXCLUDE
+  SYSTEM
+  OPTIONS
+)
+
+  if(NOT DOWNLOAD_ONLY AND EXISTS ${SOURCE_DIR}/CMakeLists.txt)
+    set(addSubdirectoryExtraArgs "")
+    if(EXCLUDE)
+      list(APPEND addSubdirectoryExtraArgs EXCLUDE_FROM_ALL)
+    endif()
+    if("${SYSTEM}" AND "${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.25")
+      # https://cmake.org/cmake/help/latest/prop_dir/SYSTEM.html#prop_dir:SYSTEM
+      list(APPEND addSubdirectoryExtraArgs SYSTEM)
+    endif()
+    if(OPTIONS)
+      foreach(OPTION ${OPTIONS})
+        cpm_parse_option("${OPTION}")
+        set(${OPTION_KEY} "${OPTION_VALUE}")
+      endforeach()
+    endif()
+    set(CPM_OLD_INDENT "${CPM_INDENT}")
+    set(CPM_INDENT "${CPM_INDENT} ${PACKAGE}:")
+    add_subdirectory(${SOURCE_DIR} ${BINARY_DIR} ${addSubdirectoryExtraArgs})
+    set(CPM_INDENT "${CPM_OLD_INDENT}")
+  endif()
+endfunction()
+
+# downloads a previously declared package via FetchContent and exports the variables
+# `${PACKAGE}_SOURCE_DIR` and `${PACKAGE}_BINARY_DIR` to the parent scope
+function(cpm_fetch_package PACKAGE DOWNLOAD_ONLY populated)
+  set(${populated}
+      FALSE
+      PARENT_SCOPE
+  )
+  if(${CPM_DRY_RUN})
+    cpm_message(STATUS "${CPM_INDENT} Package ${PACKAGE} not fetched (dry run)")
+    return()
+  endif()
+
+  FetchContent_GetProperties(${PACKAGE})
+
+  string(TOLOWER "${PACKAGE}" lower_case_name)
+
+  if(NOT ${lower_case_name}_POPULATED)
+    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.30.3")
+      if(DOWNLOAD_ONLY)
+        # MakeAvailable will call add_subdirectory internally which is not what we want when
+        # DOWNLOAD_ONLY is set. Populate will only download the dependency without adding it to the
+        # build
+        FetchContent_Populate(
+          ${PACKAGE}
+          SOURCE_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-src"
+          BINARY_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build"
+          SUBBUILD_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild"
+          ${ARGN}
+        )
+      else()
+        FetchContent_MakeAvailable(${PACKAGE})
+      endif()
+    else()
+      FetchContent_Populate(${PACKAGE})
+    endif()
+    set(${populated}
+        TRUE
+        PARENT_SCOPE
+    )
+  endif()
+
+  cpm_store_fetch_properties(
+    ${CPM_ARGS_NAME} ${${lower_case_name}_SOURCE_DIR} ${${lower_case_name}_BINARY_DIR}
+  )
+
+  set(${PACKAGE}_SOURCE_DIR
+      ${${lower_case_name}_SOURCE_DIR}
+      PARENT_SCOPE
+  )
+  set(${PACKAGE}_BINARY_DIR
+      ${${lower_case_name}_BINARY_DIR}
+      PARENT_SCOPE
+  )
+endfunction()
+
+# splits a package option
+function(cpm_parse_option OPTION)
+  string(REGEX MATCH "^[^ ]+" OPTION_KEY "${OPTION}")
+  string(LENGTH "${OPTION}" OPTION_LENGTH)
+  string(LENGTH "${OPTION_KEY}" OPTION_KEY_LENGTH)
+  if(OPTION_KEY_LENGTH STREQUAL OPTION_LENGTH)
+    # no value for key provided, assume user wants to set option to "ON"
+    set(OPTION_VALUE "ON")
+  else()
+    math(EXPR OPTION_KEY_LENGTH "${OPTION_KEY_LENGTH}+1")
+    string(SUBSTRING "${OPTION}" "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE)
+  endif()
+  set(OPTION_KEY
+      "${OPTION_KEY}"
+      PARENT_SCOPE
+  )
+  set(OPTION_VALUE
+      "${OPTION_VALUE}"
+      PARENT_SCOPE
+  )
+endfunction()
+
+# guesses the package version from a git tag
+function(cpm_get_version_from_git_tag GIT_TAG RESULT)
+  string(LENGTH ${GIT_TAG} length)
+  if(length EQUAL 40)
+    # GIT_TAG is probably a git hash
+    set(${RESULT}
+        0
+        PARENT_SCOPE
+    )
+  else()
+    string(REGEX MATCH "v?([0123456789.]*).*" _ ${GIT_TAG})
+    set(${RESULT}
+        ${CMAKE_MATCH_1}
+        PARENT_SCOPE
+    )
+  endif()
+endfunction()
+
+# guesses if the git tag is a commit hash or an actual tag or a branch name.
+function(cpm_is_git_tag_commit_hash GIT_TAG RESULT)
+  string(LENGTH "${GIT_TAG}" length)
+  # full hash has 40 characters, and short hash has at least 7 characters.
+  if(length LESS 7 OR length GREATER 40)
+    set(${RESULT}
+        0
+        PARENT_SCOPE
+    )
+  else()
+    if(${GIT_TAG} MATCHES "^[a-fA-F0-9]+$")
+      set(${RESULT}
+          1
+          PARENT_SCOPE
+      )
+    else()
+      set(${RESULT}
+          0
+          PARENT_SCOPE
+      )
+    endif()
+  endif()
+endfunction()
+
+function(cpm_prettify_package_arguments OUT_VAR IS_IN_COMMENT)
+  set(oneValueArgs
+      NAME
+      FORCE
+      VERSION
+      GIT_TAG
+      DOWNLOAD_ONLY
+      GITHUB_REPOSITORY
+      GITLAB_REPOSITORY
+      BITBUCKET_REPOSITORY
+      GIT_REPOSITORY
+      SOURCE_DIR
+      FIND_PACKAGE_ARGUMENTS
+      NO_CACHE
+      SYSTEM
+      GIT_SHALLOW
+      EXCLUDE_FROM_ALL
+      SOURCE_SUBDIR
+  )
+  set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND)
+  cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  foreach(oneArgName ${oneValueArgs})
+    if(DEFINED CPM_ARGS_${oneArgName})
+      if(${IS_IN_COMMENT})
+        string(APPEND PRETTY_OUT_VAR "#")
+      endif()
+      if(${oneArgName} STREQUAL "SOURCE_DIR")
+        string(REPLACE ${CMAKE_SOURCE_DIR} "\${CMAKE_SOURCE_DIR}" CPM_ARGS_${oneArgName}
+                       ${CPM_ARGS_${oneArgName}}
+        )
+      endif()
+      string(APPEND PRETTY_OUT_VAR "  ${oneArgName} ${CPM_ARGS_${oneArgName}}\n")
+    endif()
+  endforeach()
+  foreach(multiArgName ${multiValueArgs})
+    if(DEFINED CPM_ARGS_${multiArgName})
+      if(${IS_IN_COMMENT})
+        string(APPEND PRETTY_OUT_VAR "#")
+      endif()
+      string(APPEND PRETTY_OUT_VAR "  ${multiArgName}\n")
+      foreach(singleOption ${CPM_ARGS_${multiArgName}})
+        if(${IS_IN_COMMENT})
+          string(APPEND PRETTY_OUT_VAR "#")
+        endif()
+        string(APPEND PRETTY_OUT_VAR "    \"${singleOption}\"\n")
+      endforeach()
+    endif()
+  endforeach()
+
+  if(NOT "${CPM_ARGS_UNPARSED_ARGUMENTS}" STREQUAL "")
+    if(${IS_IN_COMMENT})
+      string(APPEND PRETTY_OUT_VAR "#")
+    endif()
+    string(APPEND PRETTY_OUT_VAR " ")
+    foreach(CPM_ARGS_UNPARSED_ARGUMENT ${CPM_ARGS_UNPARSED_ARGUMENTS})
+      string(APPEND PRETTY_OUT_VAR " ${CPM_ARGS_UNPARSED_ARGUMENT}")
+    endforeach()
+    string(APPEND PRETTY_OUT_VAR "\n")
+  endif()
+
+  set(${OUT_VAR}
+      ${PRETTY_OUT_VAR}
+      PARENT_SCOPE
+  )
+
+endfunction()
diff --git a/docs/Makefile b/docs/Makefile
index 6f374b16..b2a6d356 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -12,6 +12,7 @@ DOXYGEN := $(shell command -v doxygen 2> /dev/null)
 PYTHON   ?= python3
 
 GENDIR   = $(SOURCEDIR)/_generated
+APIDIR   = $(SOURCEDIR)/cpp_api/api
 
 # Put it first so that "make" without argument is like "make help".
 help:
@@ -24,6 +25,7 @@ clean:
 	rm -rf $(BUILDDIR)
 	rm -rf doxygen
 	rm -rf $(GENDIR)
+	rm -rf $(APIDIR)
 
 # Generate Doxygen documentation (only if doxygen is installed)
 doxygen:
diff --git a/docs/scripts/generate_api_index.py b/docs/scripts/generate_api_index.py
index 61e07a30..08404bf0 100644
--- a/docs/scripts/generate_api_index.py
+++ b/docs/scripts/generate_api_index.py
@@ -22,13 +22,13 @@
 from __future__ import annotations
 
 import argparse
-import re
+import os
+import subprocess
 import xml.etree.ElementTree as ET
 from collections import defaultdict
 from dataclasses import dataclass, field
 from pathlib import Path
 
-
 # Root namespace prefix - everything under this is considered public API
 ROOT_NS = "dftracer::utils"
 
@@ -45,7 +45,7 @@
 GUIDE_PAGES: dict[str, str] = {
     "coro": "coro",
     "io": "io",
-    "sqlite": "sqlite",
+    "rocksdb": "rocksdb",
     "task_graph": "task_graph",
     "utilities.common.arrow": "arrow",
     "utilities.indexer": "indexer",
@@ -58,7 +58,7 @@
 TITLE_OVERRIDES: dict[str, str] = {
     "coro": "Coroutine Primitives",
     "io": "Async I/O",
-    "sqlite": "SQLite",
+    "rocksdb": "RocksDB",
     "task_graph": "Task Graph",
     "server": "HTTP Server",
     "call_tree": "Call Tree",
@@ -86,7 +86,6 @@
 }
 
 
-
 def is_inner_type(name: str, all_names: set[str]) -> bool:
     """Check if a name is an inner/nested type of another class.
 
@@ -108,6 +107,10 @@ class APIItem:
     file: str = ""
     brief: str = ""
     is_inner: bool = False
+    line: int | None = None
+    bodyfile: str = ""
+    bodystart: int | None = None
+    bodyend: int | None = None
 
 
 @dataclass
@@ -127,8 +130,7 @@ def parse_doxygen_xml(xml_dir: Path) -> list[APIItem]:
     index_path = xml_dir / "index.xml"
     if not index_path.exists():
         raise FileNotFoundError(
-            f"{index_path} not found. Run doxygen first:\n"
-            f"  cd docs && doxygen Doxyfile"
+            f"{index_path} not found. Run doxygen first:\n  cd docs && doxygen Doxyfile"
         )
 
     tree = ET.parse(index_path)
@@ -159,6 +161,10 @@ def parse_doxygen_xml(xml_dir: Path) -> list[APIItem]:
 
         file_path = ""
         brief = ""
+        line = None
+        bodyfile = ""
+        bodystart = None
+        bodyend = None
         detail_xml = xml_dir / f"{refid}.xml"
         if detail_xml.exists():
             try:
@@ -167,6 +173,21 @@ def parse_doxygen_xml(xml_dir: Path) -> list[APIItem]:
                 loc = droot.find(".//location")
                 if loc is not None:
                     file_path = loc.get("file", "")
+                    bodyfile = loc.get("bodyfile", "")
+                    line_attr = loc.get("line")
+                    bodystart_attr = loc.get("bodystart")
+                    bodyend_attr = loc.get("bodyend")
+                    line = int(line_attr) if line_attr and line_attr.isdigit() else None
+                    bodystart = (
+                        int(bodystart_attr)
+                        if bodystart_attr and bodystart_attr.isdigit()
+                        else None
+                    )
+                    bodyend = (
+                        int(bodyend_attr)
+                        if bodyend_attr and bodyend_attr.isdigit()
+                        else None
+                    )
                 bd = droot.find(".//briefdescription/para")
                 if bd is not None and bd.text:
                     brief = bd.text.strip()
@@ -186,6 +207,10 @@ def parse_doxygen_xml(xml_dir: Path) -> list[APIItem]:
                 refid=refid,
                 file=file_path,
                 brief=brief,
+                line=line,
+                bodyfile=bodyfile,
+                bodystart=bodystart,
+                bodyend=bodyend,
             )
         )
 
@@ -264,9 +289,7 @@ def discover_modules(items: list[APIItem]) -> list[Module]:
     # Build Module objects
     modules: list[Module] = []
     for ns_suffix in sorted(final.keys()):
-        full_ns = (
-            f"{ROOT_NS}::{ns_suffix.replace('.', '::')}" if ns_suffix else ROOT_NS
-        )
+        full_ns = f"{ROOT_NS}::{ns_suffix.replace('.', '::')}" if ns_suffix else ROOT_NS
         title = TITLE_OVERRIDES.get(ns_suffix, _auto_title(ns_suffix))
         filename = _ns_to_filename(ns_suffix)
         guide_page = GUIDE_PAGES.get(ns_suffix)
@@ -309,7 +332,101 @@ def _ns_to_filename(ns_suffix: str) -> str:
     return ns_suffix.replace(".", "/")
 
 
-def generate_module_rst(mod: Module) -> str:
+def detect_repo_url(repo_root: Path) -> str:
+    """Detect the GitHub repository URL for source links."""
+    repo = os.environ.get("READTHEDOCS_GIT_REPOSITORY")
+    if repo:
+        repo = repo.removesuffix(".git")
+        if repo.startswith("git@github.com:"):
+            return repo.replace("git@github.com:", "https://github.com/", 1)
+        if repo.startswith("https://github.com/"):
+            return repo
+        if repo.startswith("github.com/"):
+            return f"https://{repo}"
+
+    repo = os.environ.get("GITHUB_REPOSITORY")
+    if repo:
+        return f"https://github.com/{repo}"
+
+    try:
+        remote = (
+            subprocess.check_output(
+                ["git", "remote", "get-url", "origin"],
+                cwd=repo_root,
+                text=True,
+            )
+            .strip()
+            .removesuffix(".git")
+        )
+        if remote.startswith("git@github.com:"):
+            return remote.replace("git@github.com:", "https://github.com/", 1)
+        if remote.startswith("https://github.com/"):
+            return remote
+    except Exception:
+        pass
+
+    return "https://github.com/LLNL/dftracer-utils"
+
+
+def detect_source_ref(repo_root: Path) -> str:
+    """Detect the git ref used for source links."""
+    for env_name in ("READTHEDOCS_GIT_COMMIT_HASH", "GITHUB_SHA"):
+        value = os.environ.get(env_name)
+        if value:
+            return value
+
+    try:
+        return (
+            subprocess.check_output(
+                ["git", "rev-parse", "HEAD"],
+                cwd=repo_root,
+                text=True,
+            )
+            .strip()
+        )
+    except Exception:
+        return "develop"
+
+
+def resolve_repo_path(repo_root: Path, item: APIItem) -> str | None:
+    """Resolve a Doxygen location path to a repo-relative source file."""
+    candidates = []
+    if item.bodyfile:
+        candidates.append(item.bodyfile)
+    if item.file:
+        candidates.append(item.file)
+
+    for candidate in candidates:
+        rel = Path(candidate)
+        for base in (repo_root / "include", repo_root / "src"):
+            full = base / rel
+            if full.exists():
+                return full.relative_to(repo_root).as_posix()
+    return None
+
+
+def source_link(repo_root: Path, repo_url: str, source_ref: str, item: APIItem) -> str | None:
+    """Build a GitHub source link for an API item."""
+    rel = resolve_repo_path(repo_root, item)
+    if rel is None:
+        return None
+
+    start = item.bodystart or item.line
+    end = item.bodyend or start
+    url = f"{repo_url}/blob/{source_ref}/{rel}"
+    if start is not None:
+        url += f"#L{start}"
+        if end is not None and end != start:
+            url += f"-L{end}"
+    return url
+
+
+def generate_module_rst(
+    mod: Module,
+    repo_root: Path,
+    repo_url: str,
+    source_ref: str,
+) -> str:
     """Generate RST for a single module page."""
     mod.items.sort(key=lambda x: (x.is_inner, x.name))
 
@@ -320,15 +437,19 @@ def generate_module_rst(mod: Module) -> str:
     lines.append(f"Namespace: ``{mod.full_ns}``")
     lines.append("")
     if mod.guide_page:
-        lines.append(
-            f"For usage guide and examples, see :doc:`/cpp_api/{mod.guide_page}`."
-        )
+        lines.append(f"For usage guide and examples, see :doc:`/cpp_api/{mod.guide_page}`.")
         lines.append("")
 
     top_level = [i for i in mod.items if not i.is_inner]
 
     for item in top_level:
         directive = "doxygenclass" if item.kind == "class" else "doxygenstruct"
+        link = source_link(repo_root, repo_url, source_ref, item)
+        if link:
+            lines.append(f".. rst-class:: api-source-link")
+            lines.append("")
+            lines.append(f"   `source <{link}>`_")
+            lines.append("")
         lines.append(f".. {directive}:: {item.name}")
         lines.append("   :project: dftracer-utils")
         lines.append("   :members:")
@@ -360,7 +481,7 @@ def _build_toctree_hierarchy(
             # Register all ancestor directories
             segments = parent_dir.split("/")
             for i in range(len(segments)):
-                ancestor = "/".join(segments[: i])
+                ancestor = "/".join(segments[:i])
                 child = "/".join(segments[: i + 1])
                 dirs[ancestor].add(child)
 
@@ -427,13 +548,13 @@ def _generate_dir_index(
     # Subdirectories (link to their index)
     child_dirs = sorted(dirs.get(dir_path, set()))
     for child in child_dirs:
-        rel = child[len(dir_path):].lstrip("/") if dir_path else child
+        rel = child[len(dir_path) :].lstrip("/") if dir_path else child
         entries.append(f"{rel}/index")
 
     # Leaf modules in this directory
     leaves = sorted(dir_leaves.get(dir_path, []), key=lambda m: m.filename)
     for mod in leaves:
-        rel = mod.filename[len(dir_path):].lstrip("/") if dir_path else mod.filename
+        rel = mod.filename[len(dir_path) :].lstrip("/") if dir_path else mod.filename
         entries.append(rel)
 
     if entries:
@@ -481,6 +602,9 @@ def _generate_dir_index(
 
 def generate(xml_dir: Path, output_dir: Path) -> None:
     """Main generation entry point. Called by conf.py or CLI."""
+    repo_root = output_dir.parents[3]
+    repo_url = detect_repo_url(repo_root)
+    source_ref = detect_source_ref(repo_root)
     items = parse_doxygen_xml(xml_dir)
     print(f"  Found {len(items)} public API items")
 
@@ -488,12 +612,19 @@ def generate(xml_dir: Path, output_dir: Path) -> None:
 
     # Generate per-module pages
     output_dir.mkdir(parents=True, exist_ok=True)
+    expected_paths = {output_dir / f"{mod.filename}.rst" for mod in modules}
     for mod in modules:
-        rst = generate_module_rst(mod)
+        rst = generate_module_rst(mod, repo_root, repo_url, source_ref)
         out_path = output_dir / f"{mod.filename}.rst"
         out_path.parent.mkdir(parents=True, exist_ok=True)
         out_path.write_text(rst)
 
+    for stale in output_dir.rglob("*.rst"):
+        if stale.name == "index.rst":
+            continue
+        if stale not in expected_paths:
+            stale.unlink()
+
     # Generate index pages at each directory level
     generate_index_rst(modules, output_dir)
 
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
new file mode 100644
index 00000000..236a5b96
--- /dev/null
+++ b/docs/source/_static/custom.css
@@ -0,0 +1,19 @@
+.api-source-link {
+  float: right;
+  margin: 0 0 0.35rem 1rem;
+  font-size: 0.9rem;
+  line-height: 1.2;
+}
+
+.api-source-link a::before {
+  content: "[";
+}
+
+.api-source-link a::after {
+  content: "]";
+}
+
+.api-source-link + dl.cpp,
+.api-source-link + dl.c {
+  margin-top: 0;
+}
diff --git a/docs/source/api/indexer.rst b/docs/source/api/indexer.rst
index 0a83c7ba..6be94a3e 100644
--- a/docs/source/api/indexer.rst
+++ b/docs/source/api/indexer.rst
@@ -1,12 +1,13 @@
 Indexer Module
 ==============
 
-The indexer module provides functionality for indexing and searching gzip trace files.
+The indexer module provides functionality for indexing and searching gzip trace
+files using a root-local ``.dftindex`` store.
 
 Indexer Class
 -------------
 
-.. autoclass:: dftracer.utils.Indexer(gz_path: str, idx_path: str | None = None, checkpoint_size: int = 1048576, force_rebuild: bool = False, build_bloom: bool = False, build_manifest: bool = False, index_threshold: int = 8388608, runtime: Runtime | None = None)
+.. autoclass:: dftracer.utils.Indexer(gz_path: str, index_path: str | None = None, checkpoint_size: int = 1048576, force_rebuild: bool = False, build_bloom: bool = False, build_manifest: bool = False, index_threshold: int = 8388608, runtime: Runtime | None = None)
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/source/api/trace_reader.rst b/docs/source/api/trace_reader.rst
index 4c9ac7dd..ee9fc936 100644
--- a/docs/source/api/trace_reader.rst
+++ b/docs/source/api/trace_reader.rst
@@ -2,7 +2,8 @@ TraceReader Module
 ==================
 
 The ``TraceReader`` is the recommended way to read trace files. It auto-selects
-sequential or indexed reading based on whether an ``.idx`` sidecar exists.
+sequential or indexed reading based on whether a root-local ``.dftindex``
+RocksDB store exists.
 
 TraceReader Class
 -----------------
@@ -74,7 +75,7 @@ File Metadata
 -------------
 
 ``get_max_bytes()`` and ``get_num_lines()`` return file metadata without
-reading the full file (when an index exists):
+reading the full file (when a ``.dftindex`` RocksDB index store exists):
 
 .. code-block:: python
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ff04fcc3..e46e120a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -4,13 +4,14 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
 import os
+import inspect
+import importlib
 import subprocess
 import sys
+import types
 from pathlib import Path
-
-# Don't add project root to path - we want to use the installed package from site-packages
-# If we add the project root, Python will try to import from source which doesn't have the compiled .so
-# sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from types import TracebackType
+from typing import Iterator
 
 # Auto-generate Mermaid class diagrams from Doxygen XML before building
 _docs_dir = Path(__file__).parent.parent  # docs/
@@ -48,20 +49,833 @@
         check=False,
     )
 
-# Mock imports for packages that may not be available during doc build
+ON_READTHEDOCS = os.environ.get("READTHEDOCS", "").lower() == "true"
+PYTHON_SOURCE_DIR = _docs_dir.parent / "python"
 autodoc_mock_imports = []
 
-# Try to import the package
+
+def _install_rtd_extension_stub() -> None:
+    """Install a lightweight stub for the native extension on RTD."""
+
+    ext_name = "dftracer.utils.dftracer_utils_ext"
+    if ext_name in sys.modules:
+        return
+
+    ext = types.ModuleType(ext_name)
+    JSONPrimitive = str | int | float | bool | None
+
+    class _BaseNative:
+        """RTD stub for native extension classes."""
+        pass
+
+    class TaskHandle(_BaseNative):
+        """Handle to a submitted task.
+
+        Returned by asynchronous utility calls and by
+        :class:`dftracer.utils.Runtime`. The handle can be waited on,
+        queried for completion, or used to fetch the task result.
+        """
+
+        name = ""
+        task_id = 0
+
+        def get(self) -> object | None:
+            """Block until the task completes and return its result."""
+            return None
+
+        def wait(self) -> None:
+            """Block until the task completes."""
+            return None
+
+        def done(self) -> bool:
+            """Return ``True`` when the task has completed."""
+            return True
+
+    class Runtime(_BaseNative):
+        """Lightweight task runtime for native and Python work.
+
+        The runtime owns the executor threads used by coroutine-backed
+        readers, indexers, and utilities. The higher-level Python
+        wrapper in :mod:`dftracer.utils.runtime` builds on this native
+        object to support Python callables and richer task tracking.
+        """
+
+        threads = 0
+
+        def __init__(self, threads: int = 0) -> None:
+            """Create a runtime with an optional worker-thread count."""
+            super().__init__(threads)
+            self.threads = threads
+
+        def shutdown(self) -> None:
+            """Stop the runtime and release worker resources."""
+            return None
+
+        def wait_all(self) -> None:
+            """Block until all submitted native work completes."""
+            return None
+
+        def get_progress(self) -> dict[str, object]:
+            """Return runtime progress metadata."""
+            return {}
+
+        def is_responsive(self) -> bool:
+            """Return whether the watchdog still considers the runtime healthy."""
+            return True
+
+        def set_timeout(self, global_ms: int = 0) -> None:
+            """Set a global watchdog timeout in milliseconds."""
+            return None
+
+        def set_default_task_timeout(self, ms: int = 0) -> None:
+            """Set the default per-task timeout in milliseconds."""
+            return None
+
+        def __enter__(self) -> "Runtime":
+            """Enter the runtime context manager."""
+            return self
+
+        def __exit__(
+            self,
+            exc_type: type[BaseException] | None,
+            exc_val: BaseException | None,
+            exc_tb: TracebackType | None,
+        ) -> None:
+            """Exit the runtime context manager."""
+            return None
+
+    class IndexerCheckpoint(_BaseNative):
+        """Information about a single checkpoint in a ``.dftindex`` store.
+
+        Checkpoints map compressed and uncompressed offsets and carry
+        per-chunk metadata used for seeking and chunk-level pruning.
+        """
+
+        checkpoint_idx = 0
+        uc_offset = 0
+        uc_size = 0
+        c_offset = 0
+        c_size = 0
+        bits = 0
+        num_lines = 0
+
+    class Indexer(_BaseNative):
+        """Build and query a root-local ``.dftindex`` RocksDB store.
+
+        The indexer extracts checkpoints and optional bloom/manifest
+        data for a compressed DFTracer trace. Readers and higher-level
+        utilities use this store for chunk pruning and random access.
+        """
+
+        def __init__(
+            self,
+            gz_path: str,
+            index_path: str | None = None,
+            checkpoint_size: int = 1048576,
+            force_rebuild: bool = False,
+            build_bloom: bool = False,
+            build_manifest: bool = False,
+            index_threshold: int = 8388608,
+            runtime: Runtime | None = None,
+        ) -> None:
+            """Create an indexer for a compressed DFTracer trace."""
+            self.gz_path = gz_path
+            self.index_path = index_path or ""
+            self.checkpoint_size = checkpoint_size
+            self.has_bloom = build_bloom
+            self.has_manifest = build_manifest
+
+        def build(self) -> None:
+            """Build the index store for the configured trace file."""
+            return None
+
+        def need_rebuild(self) -> bool:
+            """Return whether the index is missing or stale."""
+            return False
+
+        def exists(self) -> bool:
+            """Return whether the index store already exists."""
+            return False
+
+        def get_max_bytes(self) -> int:
+            """Return the maximum decompressed byte position in the trace."""
+            return 0
+
+        def get_num_lines(self) -> int:
+            """Return the number of lines recorded in the index."""
+            return 0
+
+        def get_checkpoints(self) -> list["IndexerCheckpoint"]:
+            """Return all checkpoints stored for the trace."""
+            return []
+
+        def find_checkpoint(self, target_offset: int) -> "IndexerCheckpoint | None":
+            """Return the checkpoint closest to a decompressed offset."""
+            return None
+
+        def close(self) -> None:
+            """Release this Python wrapper's native indexer handle."""
+            return None
+
+        def __enter__(self) -> "Indexer":
+            """Enter the indexer context manager."""
+            return self
+
+        def __exit__(
+            self,
+            exc_type: type[BaseException] | None,
+            exc_val: BaseException | None,
+            exc_tb: TracebackType | None,
+        ) -> None:
+            """Exit the indexer context manager."""
+            return None
+
+    class JSON(_BaseNative):
+        """Lazy JSON wrapper backed by yyjson.
+
+        Nested objects are exposed as additional :class:`JSON` wrappers
+        so callers can inspect large trace records without eagerly
+        converting the entire payload to Python dictionaries.
+        """
+
+        def __init__(self, json_str: str) -> None:
+            """Create a lazy JSON wrapper from a JSON string."""
+            self._json_str = json_str
+
+        def get(
+            self,
+            key: str,
+            default: "JSON | JSONPrimitive" = None,
+        ) -> "JSON | JSONPrimitive":
+            """Look up a key and return ``default`` when it is absent."""
+            return default
+
+        def keys(self) -> list[str]:
+            """Return the keys in the current JSON object."""
+            return []
+
+        def values(self) -> list["JSON | JSONPrimitive"]:
+            """Return the values in the current JSON object."""
+            return []
+
+        def items(self) -> list[tuple[str, "JSON | JSONPrimitive"]]:
+            """Return key-value pairs in the current JSON object."""
+            return []
+
+        def unwrap(self) -> dict[str, object] | list[object] | JSONPrimitive:
+            """Convert the lazy wrapper into native Python data."""
+            return {}
+
+        def copy(self) -> "JSON":
+            """Return a shallow copy of the current lazy JSON wrapper."""
+            return self
+
+        def __contains__(self, key: str) -> bool:
+            """Return ``True`` when a key exists in the object."""
+            return False
+
+        def __getitem__(self, key: str) -> "JSON | JSONPrimitive":
+            """Return a field value or nested :class:`JSON` wrapper."""
+            raise KeyError(key)
+
+        def __len__(self) -> int:
+            """Return the number of items in the current JSON object."""
+            return 0
+
+        def __bool__(self) -> bool:
+            """Return whether the current JSON value is non-empty."""
+            return False
+
+        def __str__(self) -> str:
+            """Return a JSON-like string representation."""
+            return "{}"
+
+        def __repr__(self) -> str:
+            """Return a developer-facing representation."""
+            return "JSON('{}')"
+
+    class TraceReader(_BaseNative):
+        """Read DFTracer traces with optional index-assisted pruning.
+
+        ``TraceReader`` chooses between sequential and indexed access
+        based on the file format and the presence of a shared
+        root-local ``.dftindex`` store. It exposes line, raw-byte,
+        JSON, and Arrow-based views over the same trace data.
+        """
+
+        def __init__(
+            self,
+            file_path: str,
+            index_dir: str = "",
+            checkpoint_size: int = 33554432,
+            auto_build_index: bool = False,
+            index_threshold: int = 8388608,
+            runtime: Runtime | None = None,
+        ) -> None:
+            """Create a trace reader for plain or compressed DFTracer files."""
+            self.file_path = file_path
+            self.index_dir = index_dir
+            self.checkpoint_size = checkpoint_size
+            self.auto_build_index = auto_build_index
+            self.index_threshold = index_threshold
+
+        def read_lines(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> list[str]:
+            """Materialize decoded lines into a Python list.
+
+            Supports optional line/byte ranges and query-based filtering.
+            """
+            return []
+
+        def iter_lines(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> Iterator[str]:
+            """Stream decoded lines from the trace.
+
+            The returned iterator yields one UTF-8 decoded line at a time.
+            """
+            return iter(())
+
+        def iter_raw(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            line_aligned: bool = True,
+            multi_line: bool = True,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> Iterator[bytes]:
+            """Stream raw byte chunks from the trace.
+
+            Byte-range reads can be aligned to line boundaries and can
+            optionally return multi-line chunks.
+            """
+            return iter(())
+
+        def read_raw(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            line_aligned: bool = True,
+            multi_line: bool = True,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> list[bytes]:
+            """Materialize raw byte chunks into a Python list."""
+            return []
+
+        def iter_lines_json(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> Iterator["JSON"]:
+            """Stream lazy :class:`JSON` objects for trace events."""
+            return iter(())
+
+        def read_lines_json(
+            self,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> list["JSON"]:
+            """Materialize trace events as lazy :class:`JSON` objects."""
+            return []
+
+        def iter_arrow(
+            self,
+            batch_size: int = 10000,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> Iterator["ArrowBatch"]:
+            """Stream Arrow batches parsed from trace events."""
+            return iter(())
+
+        def read_arrow(
+            self,
+            batch_size: int = 10000,
+            start_line: int = 0,
+            end_line: int = 0,
+            start_byte: int = 0,
+            end_byte: int = 0,
+            buffer_size: int = 4194304,
+            query: str | None = None,
+        ) -> "ArrowTable | None":
+            """Materialize Arrow batches as a single table-like result."""
+            return None
+
+        def get_max_bytes(self) -> int:
+            """Return indexed decompressed size when available."""
+            return 0
+
+        def get_num_lines(self) -> int:
+            """Return indexed line count when available."""
+            return 0
+
+        def __enter__(self) -> "TraceReader":
+            """Enter the trace-reader context manager."""
+            return self
+
+        def __exit__(
+            self,
+            exc_type: type[BaseException] | None,
+            exc_val: BaseException | None,
+            exc_tb: TracebackType | None,
+        ) -> None:
+            """Exit the trace-reader context manager."""
+            return None
+
+    class AggregatorUtility(_BaseNative):
+        """Aggregate trace events into Arrow-ready time buckets."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create an aggregation utility bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            source_dir: str,
+            output_path: str = "",
+            time_interval_ms: float = 5000.0,
+            query: str = "",
+            index_dir: str = "",
+            force_rebuild: bool = False,
+            custom_metric_fields: list[str] | None = None,
+            compute_percentiles: bool = False,
+        ) -> "ArrowTable | None":
+            """Aggregate trace events into a materialized Arrow-style result."""
+            return None
+
+        def iter_arrow(
+            self,
+            source_dir: str,
+            output_path: str = "",
+            time_interval_ms: float = 5000.0,
+            query: str = "",
+            index_dir: str = "",
+            force_rebuild: bool = False,
+            custom_metric_fields: list[str] | None = None,
+            compute_percentiles: bool = False,
+        ) -> Iterator["ArrowBatch"]:
+            """Stream Arrow batches for aggregated trace metrics."""
+            return iter(())
+
+    class ComparatorUtility(_BaseNative):
+        """Compare baseline and variant traces."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a comparator utility bound to an optional runtime."""
+            self.runtime = runtime
+
+        def compare(
+            self,
+            baseline: str,
+            variant: str,
+            query: str = "",
+            time_interval_ms: float = 5000.0,
+            threshold: float = 0.0,
+            index_dir: str = "",
+            force_rebuild: bool = False,
+        ) -> "ArrowTable | None":
+            """Return comparison results as Arrow-compatible output."""
+            return None
+
+        def compare_json(
+            self,
+            baseline: str,
+            variant: str,
+            query: str = "",
+            time_interval_ms: float = 5000.0,
+            threshold: float = 0.0,
+            index_dir: str = "",
+            force_rebuild: bool = False,
+        ) -> str:
+            """Return comparison results as JSON."""
+            return "{}"
+
+        def compare_table(
+            self,
+            baseline: str,
+            variant: str,
+            query: str = "",
+            time_interval_ms: float = 5000.0,
+            threshold: float = 0.0,
+            index_dir: str = "",
+            force_rebuild: bool = False,
+        ) -> str:
+            """Return comparison results as a formatted text table."""
+            return ""
+
+    class StatisticsQueryUtility(_BaseNative):
+        """Query summary or top-N statistics from a trace."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a statistics-query utility bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            file_path: str,
+            query_type: str = "summary",
+            top_n: int = 10,
+            index_dir: str = "",
+            auto_build_index: bool = False,
+            index_threshold: int = 8388608,
+        ) -> dict[str, object]:
+            """Return scalar statistics derived from the trace."""
+            return {}
+
+    class StatisticsAggregatorUtility(_BaseNative):
+        """Aggregate core statistics from a trace into a Python dictionary."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a statistics-aggregator utility bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            file_path: str,
+            index_dir: str = "",
+            auto_build_index: bool = False,
+            index_threshold: int = 8388608,
+        ) -> dict[str, object]:
+            """Return aggregate trace statistics."""
+            return {}
+
+    class MetadataCollectorUtility(_BaseNative):
+        """Collect file metadata and index-aware trace metadata."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a metadata collector bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            file_path: str,
+            index_dir: str = "",
+            checkpoint_size: int = 33554432,
+            force_rebuild: bool = False,
+            index_threshold: int = 8388608,
+        ) -> dict[str, object]:
+            """Return metadata for a DFTracer trace file."""
+            return {}
+
+    class ReorganizationPlannerUtility(_BaseNative):
+        """Build a semantic reorganization plan for trace files."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a reorganization planner bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            source_files: list[str],
+            groups: list[dict[str, object]],
+            index_dir: str = "",
+            checkpoint_size: int = 33554432,
+            force_rebuild: bool = False,
+            index_threshold: int = 8388608,
+        ) -> dict[str, object]:
+            """Return a reorganization plan for the requested groups."""
+            return {}
+
+    class ReconstructionPlannerUtility(_BaseNative):
+        """Build a reconstruction plan from reorganized traces."""
+
+        def __init__(self, runtime: Runtime | None = None) -> None:
+            """Create a reconstruction planner bound to an optional runtime."""
+            self.runtime = runtime
+
+        def process(
+            self,
+            reorganized_files: list[str],
+            provenance_dir: str = "",
+        ) -> dict[str, object]:
+            """Return a reconstruction plan for reorganized trace files."""
+            return {}
+
+    ext.Indexer = Indexer
+    ext.IndexerCheckpoint = IndexerCheckpoint
+    ext.JSON = JSON
+    ext.Runtime = Runtime
+    ext.TaskHandle = TaskHandle
+    ext.TraceReader = TraceReader
+    ext.AggregatorUtility = AggregatorUtility
+    ext.ComparatorUtility = ComparatorUtility
+    ext.MetadataCollectorUtility = MetadataCollectorUtility
+    ext.ReconstructionPlannerUtility = ReconstructionPlannerUtility
+    ext.ReorganizationPlannerUtility = ReorganizationPlannerUtility
+    ext.StatisticsAggregatorUtility = StatisticsAggregatorUtility
+    ext.StatisticsQueryUtility = StatisticsQueryUtility
+
+    def get_default_runtime() -> Runtime:
+        """Return the process-wide default runtime."""
+        return Runtime()
+
+    def set_default_runtime(runtime: Runtime | None = None) -> None:
+        """Replace or clear the process-wide default runtime."""
+        return None
+
+    ext.get_default_runtime = get_default_runtime
+    ext.set_default_runtime = set_default_runtime
+    for name in [
+        "AggregatorUtility",
+        "ComparatorUtility",
+        "Indexer",
+        "IndexerCheckpoint",
+        "JSON",
+        "MetadataCollectorUtility",
+        "ReconstructionPlannerUtility",
+        "ReorganizationPlannerUtility",
+        "Runtime",
+        "StatisticsAggregatorUtility",
+        "StatisticsQueryUtility",
+        "TaskHandle",
+        "TraceReader",
+    ]:
+        getattr(ext, name).__module__ = ext_name
+    ext.__all__ = [
+        "AggregatorUtility",
+        "ComparatorUtility",
+        "Indexer",
+        "IndexerCheckpoint",
+        "JSON",
+        "MetadataCollectorUtility",
+        "ReconstructionPlannerUtility",
+        "ReorganizationPlannerUtility",
+        "Runtime",
+        "StatisticsAggregatorUtility",
+        "StatisticsQueryUtility",
+        "TaskHandle",
+        "TraceReader",
+        "get_default_runtime",
+        "set_default_runtime",
+    ]
+    sys.modules[ext_name] = ext
+
+
+def _repo_url() -> str:
+    """Return the GitHub repository URL used for source links."""
+    repo = os.environ.get("READTHEDOCS_GIT_REPOSITORY")
+    if repo:
+        repo = repo.removesuffix(".git")
+        if repo.startswith("git@github.com:"):
+            repo = repo.replace("git@github.com:", "https://github.com/", 1)
+        elif repo.startswith("https://github.com/"):
+            return repo
+        if repo.startswith("github.com/"):
+            return f"https://{repo}"
+
+    repo = os.environ.get("GITHUB_REPOSITORY")
+    if repo:
+        return f"https://github.com/{repo}"
+
+    try:
+        remote = (
+            subprocess.check_output(
+                ["git", "remote", "get-url", "origin"],
+                cwd=_docs_dir.parent,
+                text=True,
+            )
+            .strip()
+            .removesuffix(".git")
+        )
+        if remote.startswith("git@github.com:"):
+            return remote.replace("git@github.com:", "https://github.com/", 1)
+        if remote.startswith("https://github.com/"):
+            return remote
+    except Exception:
+        pass
+
+    return "https://github.com/LLNL/dftracer-utils"
+
+
+def _source_ref() -> str:
+    """Return the git ref used for source links."""
+    for env_name in ("READTHEDOCS_GIT_COMMIT_HASH", "GITHUB_SHA"):
+        value = os.environ.get(env_name)
+        if value:
+            return value
+    try:
+        return (
+            subprocess.check_output(
+                ["git", "rev-parse", "HEAD"],
+                cwd=_docs_dir.parent,
+                text=True,
+            )
+            .strip()
+        )
+    except Exception:
+        return "develop"
+
+REPO_URL = _repo_url()
+SOURCE_REF = _source_ref()
+
+
+def _pyi_target_for_extension(fullname: str) -> tuple[Path, list[str]] | None:
+    """Map extension-exported objects to their public type-stub file."""
+    top = fullname.split(".", 1)[0]
+    utility_map = {
+        "AggregatorUtility": "python/dftracer/utils/utilities/_aggregator.pyi",
+        "ComparatorUtility": "python/dftracer/utils/utilities/_comparator.pyi",
+        "MetadataCollectorUtility": (
+            "python/dftracer/utils/utilities/_metadata_collector.pyi"
+        ),
+        "StatisticsQueryUtility": (
+            "python/dftracer/utils/utilities/_statistics_query.pyi"
+        ),
+        "StatisticsAggregatorUtility": (
+            "python/dftracer/utils/utilities/_statistics_aggregator.pyi"
+        ),
+        "ReorganizationPlannerUtility": (
+            "python/dftracer/utils/utilities/_reorganization_planner.pyi"
+        ),
+        "ReconstructionPlannerUtility": (
+            "python/dftracer/utils/utilities/_reconstruction_planner.pyi"
+        ),
+    }
+    rel_path = utility_map.get(top, "python/dftracer/utils/dftracer_utils_ext.pyi")
+    return (_docs_dir.parent / rel_path, fullname.split("."))
+
+
+def _find_symbol_lines(path: Path, parts: list[str]) -> tuple[int, int] | None:
+    """Find source lines for a class/function/method in a Python source or stub file."""
+    try:
+        tree = ast.parse(path.read_text())
+    except Exception:
+        return None
+
+    node = tree
+    current_body = tree.body
+    for part in parts:
+        match = None
+        for child in current_body:
+            if isinstance(child, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
+                if child.name == part:
+                    match = child
+                    break
+        if match is None:
+            return None
+        node = match
+        current_body = getattr(match, "body", [])
+
+    start = getattr(node, "lineno", None)
+    end = getattr(node, "end_lineno", start)
+    if start is None:
+        return None
+    return (start, end or start)
+
+
+def _github_url(path: Path, lines: tuple[int, int] | None) -> str | None:
+    """Build a GitHub blob URL for a repo-relative path and optional lines."""
+    try:
+        rel = path.resolve().relative_to(_docs_dir.parent.resolve()).as_posix()
+    except Exception:
+        return None
+    url = f"{REPO_URL}/blob/{SOURCE_REF}/{rel}"
+    if lines is not None:
+        start, end = lines
+        url += f"#L{start}"
+        if end != start:
+            url += f"-L{end}"
+    return url
+
+
+def linkcode_resolve(domain: str, info: dict[str, str]) -> str | None:
+    """Resolve Python objects to GitHub source links."""
+    if domain != "py":
+        return None
+
+    module_name = info.get("module")
+    fullname = info.get("fullname")
+    if not module_name or not fullname:
+        return None
+
+    try:
+        module = importlib.import_module(module_name)
+    except Exception:
+        return None
+
+    obj = module
+    for part in fullname.split("."):
+        obj = getattr(obj, part, None)
+        if obj is None:
+            return None
+
+    obj_module = getattr(obj, "__module__", module_name)
+    if obj_module == "dftracer.utils.dftracer_utils_ext":
+        target = _pyi_target_for_extension(fullname)
+        if target is None:
+            return None
+        path, parts = target
+        lines = _find_symbol_lines(path, parts)
+        return _github_url(path, lines)
+
+    try:
+        source_file = Path(inspect.getsourcefile(obj) or inspect.getfile(obj))
+        _, start = inspect.getsourcelines(obj)
+        end = start + max(len(inspect.getsource(obj).splitlines()) - 1, 0)
+        return _github_url(source_file, (start, end))
+    except Exception:
+        return None
+
+
+if ON_READTHEDOCS:
+    sys.path.insert(0, str(PYTHON_SOURCE_DIR))
+    _install_rtd_extension_stub()
+    autodoc_mock_imports = [
+        "pyarrow",
+        "dask",
+        "dask.distributed",
+    ]
+
 try:
     import dftracer.utils
 
     print("✓ dftracer.utils package found and imported successfully.")
 except (ImportError, ModuleNotFoundError) as e:
-    print(f"Warning: dftracer.utils package not found: {e}")
-    print("API documentation will have limited information.")
-    print("To generate full API docs, install the package: pip install -e .")
-    # Don't mock - let it fail to show what's missing
-    # autodoc_mock_imports = ['dftracer', 'dftracer.utils']
+    if not ON_READTHEDOCS and PYTHON_SOURCE_DIR.exists():
+        print(f"Warning: installed dftracer.utils package not found: {e}")
+        print("Falling back to source package with RTD extension stubs.")
+        sys.path.insert(0, str(PYTHON_SOURCE_DIR))
+        _install_rtd_extension_stub()
+        autodoc_mock_imports = [
+            "pyarrow",
+            "dask",
+            "dask.distributed",
+        ]
+        import dftracer.utils
+    else:
+        print(f"Warning: dftracer.utils package not found: {e}")
+        print("API documentation will have limited information.")
+        print("To generate full API docs, install the package: pip install -e .")
 
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
@@ -160,6 +974,7 @@
 
 html_theme = "furo"
 html_static_path = ["_static"]
+html_css_files = ["custom.css"]
 
 # Search configuration
 html_search_language = "en"
diff --git a/docs/source/cpp_api/coro.rst b/docs/source/cpp_api/coro.rst
index 462f9a34..8156f59a 100644
--- a/docs/source/cpp_api/coro.rst
+++ b/docs/source/cpp_api/coro.rst
@@ -11,7 +11,28 @@ C++20 coroutine primitives for asynchronous task execution. All classes are in t
 
 For usage examples and task scheduling, see :doc:`/pipeline` and :doc:`pipeline/tasks`.
 
-.. mermaid:: ../_generated/coro.mmd
+.. mermaid::
+
+   graph TD
+       Coro["Coro\nfire-and-forget primitive"]
+       CoroTask["CoroTask&lt;T&gt;\nawaitable task result"]
+       SpawnFuture["SpawnFuture&lt;T&gt;\nresult of CoroScope::spawn()"]
+       JoinHandle["JoinHandle\nstructured join barrier"]
+       Channel["Channel&lt;T&gt;\nasync producer/consumer queue"]
+       Producer["ChannelProducer / ProducerGuard\nproducer lifetime management"]
+       Generator["Generator&lt;T&gt;\nsynchronous lazy sequence"]
+       AsyncGenerator["AsyncGenerator&lt;T&gt;\nasynchronous lazy sequence"]
+       Yield["yield() / maybe_yield()\ncooperative scheduling"]
+
+       CoroTask --> Coro
+       SpawnFuture --> CoroTask
+       JoinHandle --> Coro
+       Channel --> Producer
+       CoroTask --> Channel
+       Coro --> Yield
+       CoroTask --> Yield
+       AsyncGenerator --> CoroTask
+       Generator --> Coro
 
 Coro
 ----
@@ -457,4 +478,3 @@ Usage example:
     } else {
         process(result.result);
     }
-
diff --git a/docs/source/cpp_api/index.rst b/docs/source/cpp_api/index.rst
index 88f4190c..eeb614cb 100644
--- a/docs/source/cpp_api/index.rst
+++ b/docs/source/cpp_api/index.rst
@@ -12,6 +12,7 @@ This section contains the C++ API documentation for dftracer utilities.
     :caption: C++ Components:
 
     core_infrastructure
+    rocksdb
     reader
     indexer
     pipeline
@@ -20,7 +21,6 @@ This section contains the C++ API documentation for dftracer utilities.
     utilities
     arrow
     io
-    sqlite
     scheduler
     dft_aggregators
     dft_indexing
@@ -39,7 +39,6 @@ The dftracer utilities C++ library is organized into several namespaces:
 - ``dftracer::utils::task_graph`` - DAG-based task graph builder
 - ``dftracer::utils::utilities`` - Composable processing utilities
 - ``dftracer::utils::io`` - Async I/O backends (io_uring, kqueue, thread pool)
-- ``dftracer::utils::sqlite`` - Async SQLite database operations
 - ``dftracer::utils::utilities::composites::dft::aggregators`` - Event aggregation pipeline
 - ``dftracer::utils::utilities::composites::dft::indexing`` - Bloom filter indexing system
 - ``dftracer::utils::utilities::common::arrow`` - Arrow data interchange (RecordBatchBuilder, IpcWriter)
@@ -67,11 +66,6 @@ The dftracer utilities C++ library is organized into several namespaces:
            IoAwaitable["IoAwaitable"]
        end
 
-       subgraph SQLite["dftracer::utils::sqlite"]
-           SqliteDB["SqliteDatabase"]
-           SqliteAwait["SqliteAwaitable"]
-       end
-
        subgraph Utilities["dftracer::utils::utilities"]
            Reader["Reader"]
            Indexer["Indexer"]
@@ -100,7 +94,6 @@ The dftracer utilities C++ library is organized into several namespaces:
        Pipeline --> Executor
        Pipeline --> Scheduler
        Executor --> IoBackend
-       Executor --> SqliteDB
        Executor --> CoroTask
        Watchdog --> Executor
        TimerService --> Executor
diff --git a/docs/source/cpp_api/pipeline/executors.rst b/docs/source/cpp_api/pipeline/executors.rst
index a4d5144f..12f66dd0 100644
--- a/docs/source/cpp_api/pipeline/executors.rst
+++ b/docs/source/cpp_api/pipeline/executors.rst
@@ -101,7 +101,7 @@ timeout thresholds. All fields have sensible defaults.
        std::size_t io_pool_size = 4;
        io::IoBackendType io_backend_type = io::IoBackendType::AUTO;
        unsigned io_batch_threshold = 16;
-       std::size_t sqlite_pool_size = 2;
+       std::size_t db_pool_size = 2;
    };
 
 **Key fields:**
@@ -116,8 +116,7 @@ timeout thresholds. All fields have sensible defaults.
   (``AUTO``, ``IO_URING``, ``THREAD_POOL``).
 - ``io_batch_threshold`` -- Minimum number of I/O operations to batch before
   submitting to the backend.
-- ``sqlite_pool_size`` -- Number of threads in the dedicated SQLite connection
-  pool.
+- ``db_pool_size`` -- Number of threads in the dedicated database work pool.
 
 **Example -- high-throughput configuration:**
 
@@ -128,7 +127,7 @@ timeout thresholds. All fields have sensible defaults.
        .io_pool_size = 8,
        .io_backend_type = io::IoBackendType::IO_URING,
        .io_batch_threshold = 32,
-       .sqlite_pool_size = 4
+       .db_pool_size = 4
    };
 
 Progress Tracking
diff --git a/docs/source/cpp_api/rocksdb.rst b/docs/source/cpp_api/rocksdb.rst
new file mode 100644
index 00000000..76a6f77a
--- /dev/null
+++ b/docs/source/cpp_api/rocksdb.rst
@@ -0,0 +1,35 @@
+RocksDB
+=======
+
+The RocksDB layer provides the shared storage backend used by the
+root-local ``.dftindex`` and provenance stores introduced by the
+RocksDB migration.
+
+It includes:
+
+- database wrappers and lifecycle management
+- async awaitables for database work on executor-backed threads
+- key encoding helpers for typed prefix/range scans
+- manager utilities for sharing open database handles across readers,
+  indexers, and higher-level composites
+
+Architecture
+------------
+
+.. mermaid::
+
+   graph TD
+       Readers["TraceReader / utilities"] --> Manager["RocksDBManager"]
+       Indexers["Indexer / provenance writers"] --> Manager
+       Manager --> Database["RocksDatabase"]
+       Database --> CFs["Column families"]
+       Database --> Async["DbAwaitable / rocks::run"]
+       Database --> Codec["KeyCodec"]
+       CFs --> Store[".dftindex / provenance store"]
+       Async --> Runtime["Executor-backed threads"]
+       Codec --> Store
+
+See also:
+
+- :doc:`api/rocksdb`
+- :doc:`indexer`
diff --git a/docs/source/cpp_api/sqlite.rst b/docs/source/cpp_api/sqlite.rst
deleted file mode 100644
index 17597e87..00000000
--- a/docs/source/cpp_api/sqlite.rst
+++ /dev/null
@@ -1,337 +0,0 @@
-Async SQLite API
-================
-
-.. seealso::
-
-   For complete class and member documentation, see the
-   :doc:`API Reference <api/sqlite>`.
-
-
-Asynchronous SQLite database operations integrated with the dftracer executor and coroutine system. All classes and functions are in the ``dftracer::utils::sqlite`` namespace.
-
-Overview
---------
-
-The async SQLite module provides a thin coroutine-aware wrapper around SQLite3 that allows database operations to be performed asynchronously without blocking the executor. Operations are submitted to a dedicated SQLite thread pool and the coroutine is suspended until completion.
-
-Key Features:
-
-- **Async execution**: Database operations submit work to a thread pool and suspend via ``co_await``
-- **Sync fallback**: When no executor is active, operations run synchronously inline
-- **VFS integration**: Custom SQLite VFS implementation uses async I/O backend for file operations
-- **Minimal overhead**: Thin wrapper on top of SQLite3; no ORM abstractions
-- **Thread-safe**: All database access is serialized through the thread pool
-
-.. mermaid::
-
-   sequenceDiagram
-       participant Task as CoroTask
-       participant Await as SqliteAwaitable
-       participant Pool as SQLite ThreadPool
-       participant DB as sqlite3
-
-       Task->>Await: co_await sqlite::run(fn)
-       Await->>Pool: submit work
-       Note over Task: suspended
-       Pool->>DB: execute SQL
-       DB-->>Pool: result
-       Pool-->>Await: complete
-       Await-->>Task: resume with result
-
-Database Management
--------------------
-
-The ``SqliteDatabase`` class wraps a SQLite connection:
-
-Opening a Database
-~~~~~~~~~~~~~~~~~~~
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/database.h>
-
-   // Create and open an in-memory database
-   sqlite::SqliteDatabase db;
-   db.open(":memory:");
-
-   // Or open a file-backed database
-   sqlite::SqliteDatabase db("path/to/db.sqlite");
-
-   // Check if open
-   if (db.is_open()) {
-       // Database is ready
-   }
-
-   // Get the raw sqlite3* handle for advanced SQLite API
-   sqlite3 *raw_db = db.get();
-
-Custom VFS
-~~~~~~~~~~
-
-For databases that should use the async I/O backend:
-
-.. code-block:: cpp
-
-   // Register the dftracer async I/O VFS
-   // This is typically done once at application startup
-   sqlite::register_dftracer_sqlite_vfs(io_backend, executor);
-
-   // Then open with the custom VFS
-   sqlite::SqliteDatabase db;
-   db.open_with_vfs("trace.db", "dftracer");
-
-   // Later, unregister when shutting down
-   sqlite::unregister_dftracer_sqlite_vfs();
-
-VFS Implementation Details
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The ``DfTracerSqliteVfs`` is a custom SQLite Virtual File System that:
-
-- Replaces SQLite's default file I/O with async operations from the I/O backend
-- Handles WAL mode, synchronization, and shared memory regions
-- Integrates with the Executor to resume coroutines on completion
-
-Prepared Statements
--------------------
-
-The ``SqliteStmt`` class wraps a compiled SQL statement:
-
-Binding Parameters
-~~~~~~~~~~~~~~~~~~~
-
-SQLite uses placeholders (``?``, ``?1``, ``:name``) in SQL. Bind values before execution:
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/statement.h>
-
-   sqlite::SqliteStmt stmt(db, "INSERT INTO logs (id, message, level) VALUES (?, ?, ?)");
-
-   stmt.bind_int(1, 42);
-   stmt.bind_text(2, "Connection opened");
-   stmt.bind_int(3, INFO_LEVEL);
-
-   // Execute and handle result...
-
-Binding Functions
-~~~~~~~~~~~~~~~~~
-
-Statement Execution (Async)
----------------------------
-
-Use the ``SqliteAwaitable<T>`` template to execute arbitrary database operations asynchronously:
-
-The Generic ``run()`` Function
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For simple async database work that doesn't require a ``SqliteDatabase`` object:
-
-Example: Async Query
-^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/async.h>
-   #include <dftracer/utils/core/sqlite/database.h>
-   #include <dftracer/utils/core/sqlite/statement.h>
-   #include <dftracer/utils/core/coro/task.h>
-   #include <iostream>
-
-   using namespace dftracer::utils;
-
-   CoroTask<void> count_events(sqlite::SqliteDatabase& db) {
-       // Define work to run on the sqlite thread pool
-       auto result = co_await sqlite::run([&db]() {
-           sqlite::SqliteStmt stmt(db, "SELECT COUNT(*) FROM events");
-           // Use SQLite C API directly
-           sqlite3 *raw_db = db.get();
-           sqlite3_stmt *raw_stmt = stmt.get();
-
-           int count = 0;
-           if (sqlite3_step(raw_stmt) == SQLITE_ROW) {
-               count = sqlite3_column_int(raw_stmt, 0);
-           }
-           return count;
-       });
-
-       std::cout << "Total events: " << result << std::endl;
-   }
-
-Async Submission Helpers
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-Low-level helpers for integrating with the executor and thread pool:
-
-Error Handling
---------------
-
-The ``SqliteError`` exception class represents database errors:
-
-Error Types
-~~~~~~~~~~~
-
-Errors are categorized into types:
-
-- ``DATABASE_ERROR``: SQLite runtime error (e.g., constraint violation, locked database)
-- ``STATEMENT_ERROR``: Prepared statement compilation or execution error
-- ``OPEN_ERROR``: Database open failure
-- ``VFS_ERROR``: VFS registration or I/O error
-- ``UNKNOWN_ERROR``: Unexpected error condition
-
-Example: Error Handling
-^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/error.h>
-   #include <dftracer/utils/core/sqlite/database.h>
-
-   try {
-       sqlite::SqliteDatabase db("data.db");
-       // ... database operations ...
-   } catch (const sqlite::SqliteError& e) {
-       if (e.type() == sqlite::SqliteError::OPEN_ERROR) {
-           std::cerr << "Cannot open database: " << e.what() << std::endl;
-       } else {
-           std::cerr << "Database error: " << e.what() << std::endl;
-       }
-   }
-
-Complete Async Example
-----------------------
-
-A complete example showing async database initialization, insertion, and querying:
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/async.h>
-   #include <dftracer/utils/core/sqlite/database.h>
-   #include <dftracer/utils/core/sqlite/statement.h>
-   #include <dftracer/utils/core/coro/task.h>
-   #include <dftracer/utils/core/pipeline/pipeline.h>
-   #include <dftracer/utils/core/pipeline/pipeline_config.h>
-   #include <iostream>
-
-   using namespace dftracer::utils;
-
-   // Initialize database schema async
-   CoroTask<void> init_db(sqlite::SqliteDatabase& db) {
-       co_await sqlite::run([&db]() {
-           sqlite3_exec(db.get(),
-               "CREATE TABLE IF NOT EXISTS logs ("
-               "  id INTEGER PRIMARY KEY,"
-               "  timestamp INTEGER,"
-               "  message TEXT"
-               ");",
-               nullptr, nullptr, nullptr);
-           return true;
-       });
-       std::cout << "Database initialized" << std::endl;
-   }
-
-   // Insert a log entry async
-   CoroTask<void> insert_log(sqlite::SqliteDatabase& db, int id,
-                             long ts, const std::string& msg) {
-       co_await sqlite::run([&db, id, ts, &msg]() {
-           sqlite::SqliteStmt stmt(db, "INSERT INTO logs VALUES (?, ?, ?)");
-           stmt.bind_int(1, id);
-           stmt.bind_int64(2, ts);
-           stmt.bind_text(3, msg);
-
-           sqlite3_step(stmt.get());
-           return true;
-       });
-       std::cout << "Inserted log entry " << id << std::endl;
-   }
-
-   // Query logs async
-   CoroTask<void> query_logs(sqlite::SqliteDatabase& db) {
-       auto count = co_await sqlite::run([&db]() {
-           sqlite::SqliteStmt stmt(db, "SELECT COUNT(*) FROM logs");
-           sqlite3_step(stmt.get());
-           return sqlite3_column_int(stmt.get(), 0);
-       });
-       std::cout << "Database has " << count << " log entries" << std::endl;
-   }
-
-   // Main coroutine
-   CoroTask<void> main_app() {
-       sqlite::SqliteDatabase db;
-       db.open(":memory:");
-
-       co_await init_db(db);
-       co_await insert_log(db, 1, 1000, "First event");
-       co_await insert_log(db, 2, 2000, "Second event");
-       co_await query_logs(db);
-   }
-
-   int main() {
-       auto config = PipelineConfig()
-           .with_name("SqliteExample")
-           .with_compute_threads(1);
-
-       auto task = make_task([](CoroScope& scope) -> CoroTask<void> {
-           co_await main_app();
-       }, "MainApp");
-
-       Pipeline pipeline(config);
-       pipeline.set_source({task});
-       pipeline.execute();
-       return 0;
-   }
-
-Raw SQLite API Access
----------------------
-
-For advanced use cases, you can access the underlying SQLite C API directly:
-
-.. code-block:: cpp
-
-   #include <sqlite3.h>
-   #include <dftracer/utils/core/sqlite/database.h>
-
-   sqlite::SqliteDatabase db("app.db");
-
-   // Get raw sqlite3* handle
-   sqlite3 *raw_db = db.get();
-
-   // Use any SQLite C API function
-   const char *sql = "SELECT * FROM users WHERE id = ?";
-   sqlite3_stmt *stmt = nullptr;
-   sqlite3_prepare_v2(raw_db, sql, -1, &stmt, nullptr);
-
-   // ... bind parameters and execute ...
-   sqlite3_finalize(stmt);
-
-Sync Operations Outside Executor
----------------------------------
-
-``SqliteDatabase`` can be used outside the coroutine executor for synchronous
-operations. When ``SqliteAwaitable`` detects no executor thread pool
-(``pool_ == nullptr``), it executes the operation inline in ``await_ready()``
-without suspending the coroutine.
-
-For fully synchronous usage (no executor at all), use ``SqliteDatabase``
-directly with the raw SQLite C API:
-
-.. code-block:: cpp
-
-   #include <dftracer/utils/core/sqlite/database.h>
-
-   sqlite::SqliteDatabase db("data.db");
-
-   // Use sqlite3 C API directly
-   sqlite3 *raw = db.get();
-
-   sqlite3_exec(raw, "CREATE TABLE IF NOT EXISTS kv (k TEXT, v TEXT)",
-                nullptr, nullptr, nullptr);
-
-   sqlite3_stmt *stmt = nullptr;
-   sqlite3_prepare_v2(raw, "INSERT INTO kv VALUES (?, ?)", -1, &stmt, nullptr);
-   sqlite3_bind_text(stmt, 1, "key", -1, SQLITE_STATIC);
-   sqlite3_bind_text(stmt, 2, "value", -1, SQLITE_STATIC);
-   sqlite3_step(stmt);
-   sqlite3_finalize(stmt);
-
-   db.close();
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 5d526975..d3637b6d 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -43,20 +43,19 @@ Before building dftracer utilities, ensure you have:
 - CMake 3.5 or higher
 - C++20 compatible compiler (GCC 11+, Clang 14+)
 - zlib development library
-- SQLite3 development library
 - pkg-config
 
 On Ubuntu/Debian:
 
 .. code-block:: bash
 
-   sudo apt-get install cmake build-essential zlib1g-dev libsqlite3-dev pkg-config
+   sudo apt-get install cmake build-essential zlib1g-dev pkg-config
 
 On macOS:
 
 .. code-block:: bash
 
-   brew install cmake zlib sqlite pkg-config
+   brew install cmake zlib pkg-config
 
 Building from Source
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index d51db6f9..11cbee48 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -246,7 +246,7 @@ Create and use indexes for faster access:
    from dftracer.utils import Indexer
 
    # Create an indexer
-   indexer = Indexer("trace.pfw.gz", "trace.pfw.gz.idx")
+   indexer = Indexer("trace.pfw.gz")
 
    # Build the index if needed
    if indexer.need_rebuild():
diff --git a/docs/source/utilities/indexer.rst b/docs/source/utilities/indexer.rst
index 8fd06a4b..6306ecd8 100644
--- a/docs/source/utilities/indexer.rst
+++ b/docs/source/utilities/indexer.rst
@@ -244,6 +244,11 @@ Python API
    with Indexer("trace.pfw.gz", build_bloom=True) as indexer:
        indexer.build()  # reuses checkpoints, adds bloom only
 
+   # Wrapper cleanup only; the shared .dftindex store remains available
+   indexer = Indexer("trace.pfw.gz")
+   indexer.build()
+   indexer.close()
+
    # With explicit Runtime for thread pool control
    from dftracer.utils import Runtime
 
@@ -270,7 +275,7 @@ Python API
    partial = reader.read_lines(start_line=100, end_line=200)
 
    # Properties
-   print(reader.has_index)    # True if .idx exists
+   print(reader.has_index)    # True if .dftindex exists
    print(reader.num_lines)    # precise line count
 
    # Context manager
diff --git a/include/dftracer/utils/core/common/constants.h b/include/dftracer/utils/core/common/constants.h
index a6a8b740..72e27dcf 100644
--- a/include/dftracer/utils/core/common/constants.h
+++ b/include/dftracer/utils/core/common/constants.h
@@ -22,7 +22,7 @@ static constexpr std::uint64_t DEFAULT_CHECKPOINT_SIZE =
 static constexpr std::size_t DEFAULT_INDEX_SIZE_THRESHOLD =
     1 * 1024 * 1024;   // 1MB
 extern const char* const& SQL_SCHEMA;
-inline const char* EXTENSION = ".idx";
+inline const char* EXTENSION = ".dftindex";
 }  // namespace indexer
 
 namespace reader {
@@ -45,7 +45,7 @@ static constexpr std::size_t FILE_IO_BUFFER_SIZE =
 #define DFTRACER_UTILS_DEFAULT_BUFFER_SIZE 65536
 #define DFTRACER_UTILS_SKIP_BUFFER_SIZE 131072
 #define DFTRACER_UTILS_FILE_IO_BUFFER_SIZE 262144
-#define DFTRACER_UTILS_INDEX_EXTENSION ".idx"
+#define DFTRACER_UTILS_INDEX_EXTENSION ".dftindex"
 
 extern const char *DFTRACER_UTILS_SQL_SCHEMA;
 
diff --git a/include/dftracer/utils/core/common/scoped_fd.h b/include/dftracer/utils/core/common/scoped_fd.h
new file mode 100644
index 00000000..49d3139c
--- /dev/null
+++ b/include/dftracer/utils/core/common/scoped_fd.h
@@ -0,0 +1,44 @@
+#ifndef DFTRACER_UTILS_CORE_COMMON_SCOPED_FD_H
+#define DFTRACER_UTILS_CORE_COMMON_SCOPED_FD_H
+
+#include <unistd.h>
+
+namespace dftracer::utils {
+
+struct ScopedFd {
+    int value = -1;
+
+    ScopedFd() = default;
+    explicit ScopedFd(int fd) : value(fd) {}
+
+    ScopedFd(const ScopedFd&) = delete;
+    ScopedFd& operator=(const ScopedFd&) = delete;
+
+    ScopedFd(ScopedFd&& other) noexcept : value(other.value) {
+        other.value = -1;
+    }
+
+    ScopedFd& operator=(ScopedFd&& other) noexcept {
+        if (this != &other) {
+            reset();
+            value = other.value;
+            other.value = -1;
+        }
+        return *this;
+    }
+
+    ~ScopedFd() { reset(); }
+
+    void reset() {
+        if (value >= 0) {
+            ::close(value);
+            value = -1;
+        }
+    }
+
+    int get() const { return value; }
+};
+
+}  // namespace dftracer::utils
+
+#endif
diff --git a/include/dftracer/utils/core/env.h b/include/dftracer/utils/core/env.h
new file mode 100644
index 00000000..4a8003f9
--- /dev/null
+++ b/include/dftracer/utils/core/env.h
@@ -0,0 +1,35 @@
+#ifndef DFTRACER_UTILS_CORE_ENV_H
+#define DFTRACER_UTILS_CORE_ENV_H
+
+#include <optional>
+#include <string_view>
+#include <type_traits>
+
+namespace dftracer::utils {
+
+class Env {
+   public:
+    template <typename T = std::string_view>
+    static std::optional<T> get(std::string_view name);
+
+    static int rocksdb_max_open_files();
+};
+
+template <typename T>
+std::optional<T> Env::get(std::string_view name) {
+    static_assert(sizeof(T) == 0,
+                  "Env::get<T>() requires an explicit specialization");
+    (void)name;
+    return std::nullopt;
+}
+
+template <>
+std::optional<std::string_view> Env::get<std::string_view>(
+    std::string_view name);
+
+template <>
+std::optional<int> Env::get<int>(std::string_view name);
+
+}  // namespace dftracer::utils
+
+#endif  // DFTRACER_UTILS_CORE_ENV_H
diff --git a/include/dftracer/utils/core/io/io_backend.h b/include/dftracer/utils/core/io/io_backend.h
index 63894f46..232f0dab 100644
--- a/include/dftracer/utils/core/io/io_backend.h
+++ b/include/dftracer/utils/core/io/io_backend.h
@@ -13,6 +13,8 @@
 
 namespace dftracer::utils::io {
 
+using IoCompletionFn = void (*)(void *context, ssize_t result) noexcept;
+
 /// Backend selection preference.
 enum class IoBackendType {
     AUTO,  // Runtime detection: io_uring > epoll/kqueue+threadpool > threadpool
@@ -46,6 +48,12 @@ class IoBackend {
     virtual IoAwaitable submit_pread(int fd, void *buf, std::size_t len,
                                      off_t offset) = 0;
 
+    /// Submit an async positional read with a completion callback.
+    /// The callback receives either a byte count or a negative errno.
+    virtual void submit_pread_callback(int fd, void *buf, std::size_t len,
+                                       off_t offset, IoCompletionFn completion,
+                                       void *context) = 0;
+
     /// Submit an async positional write. Only seekable fds.
     virtual IoAwaitable submit_pwrite(int fd, const void *buf, std::size_t len,
                                       off_t offset) = 0;
diff --git a/include/dftracer/utils/core/pipeline/executor.h b/include/dftracer/utils/core/pipeline/executor.h
index 334c3e6c..f2daf5e2 100644
--- a/include/dftracer/utils/core/pipeline/executor.h
+++ b/include/dftracer/utils/core/pipeline/executor.h
@@ -42,7 +42,7 @@ struct ExecutorConfig {
     std::size_t io_pool_size = 4;
     io::IoBackendType io_backend_type = io::IoBackendType::AUTO;
     unsigned io_batch_threshold = 16;
-    std::size_t sqlite_pool_size = 2;
+    std::size_t db_pool_size = 2;
 };
 
 /**
@@ -229,14 +229,14 @@ class Executor {
     // I/O backend (owned by executor, created by factory)
     std::unique_ptr<io::IoBackend> io_backend_;
 
-    // Dedicated thread pool for SQLite async operations
-    std::unique_ptr<io::IoThreadPool> sqlite_pool_;
+    // Dedicated thread pool for blocking DB operations.
+    std::unique_ptr<io::IoThreadPool> db_pool_;
 
     // Configuration (stored from ExecutorConfig)
     std::size_t io_pool_size_ = 4;
     io::IoBackendType io_backend_type_ = io::IoBackendType::AUTO;
     unsigned io_batch_threshold_ = 16;
-    std::size_t sqlite_pool_size_ = 2;
+    std::size_t db_pool_size_ = 2;
 
    public:
     /**
@@ -306,9 +306,9 @@ class Executor {
     const io::IoBackend& io_backend() const { return *io_backend_; }
 
     /**
-     * Get the dedicated SQLite thread pool (nullptr if not started).
+     * Get the dedicated DB thread pool (nullptr if not started).
      */
-    io::IoThreadPool* sqlite_pool() noexcept;
+    io::IoThreadPool* db_pool() noexcept;
 
     /**
      * Get the executor running on the current worker thread (nullptr
diff --git a/include/dftracer/utils/core/pipeline/pipeline_config.h b/include/dftracer/utils/core/pipeline/pipeline_config.h
index 1e02257e..5f727620 100644
--- a/include/dftracer/utils/core/pipeline/pipeline_config.h
+++ b/include/dftracer/utils/core/pipeline/pipeline_config.h
@@ -73,7 +73,7 @@ struct PipelineConfig {
     io::IoBackendType io_backend_type =
         io::IoBackendType::AUTO;       // Backend selection
     unsigned io_batch_threshold = 16;  // SQE batch threshold (0 = per-op)
-    std::size_t sqlite_pool_size = 2;  // SQLite async thread pool size
+    std::size_t db_pool_size = 2;      // Blocking DB async thread pool size
 
     /**
      * Set pipeline name
@@ -201,10 +201,10 @@ struct PipelineConfig {
     }
 
     /**
-     * Set SQLite async thread pool size (default 2)
+     * Set blocking DB async thread pool size (default 2)
      */
-    PipelineConfig& with_sqlite_pool_size(std::size_t size) {
-        sqlite_pool_size = size;
+    PipelineConfig& with_db_pool_size(std::size_t size) {
+        db_pool_size = size;
         return *this;
     }
 
diff --git a/include/dftracer/utils/core/rocksdb/async.h b/include/dftracer/utils/core/rocksdb/async.h
new file mode 100644
index 00000000..3ff71a07
--- /dev/null
+++ b/include/dftracer/utils/core/rocksdb/async.h
@@ -0,0 +1,130 @@
+#ifndef DFTRACER_UTILS_CORE_ROCKSDB_ASYNC_H
+#define DFTRACER_UTILS_CORE_ROCKSDB_ASYNC_H
+
+#include <coroutine>
+#include <exception>
+#include <functional>
+#include <optional>
+#include <utility>
+
+namespace dftracer::utils::io {
+class IoThreadPool;
+}  // namespace dftracer::utils::io
+
+namespace dftracer::utils::rocksdb {
+
+io::IoThreadPool* get_db_pool();
+void db_async_submit(io::IoThreadPool* pool, std::function<void()> fn);
+void db_async_resume_on(void* executor, std::coroutine_handle<> h);
+void* get_current_executor_opaque();
+
+template <typename T>
+class DbAwaitable {
+    io::IoThreadPool* pool_;
+    void* executor_;
+    std::function<T()> fn_;
+    std::optional<T> result_;
+    std::exception_ptr error_;
+    std::coroutine_handle<> handle_;
+
+   public:
+    DbAwaitable(io::IoThreadPool* pool, void* executor, std::function<T()> fn)
+        : pool_(pool), executor_(executor), fn_(std::move(fn)) {}
+
+    bool await_ready() noexcept {
+        if (pool_ == nullptr) {
+            try {
+                auto fn = std::move(fn_);
+                fn_ = {};
+                result_.emplace(fn());
+            } catch (...) {
+                error_ = std::current_exception();
+            }
+            return true;
+        }
+        return false;
+    }
+
+    void await_suspend(std::coroutine_handle<> h) {
+        handle_ = h;
+        auto* self = this;
+        db_async_submit(pool_, [self] {
+            try {
+                auto fn = std::move(self->fn_);
+                self->fn_ = {};
+                self->result_.emplace(fn());
+            } catch (...) {
+                self->error_ = std::current_exception();
+            }
+            db_async_resume_on(self->executor_, self->handle_);
+        });
+    }
+
+    T await_resume() {
+        if (error_ != nullptr) {
+            std::rethrow_exception(error_);
+        }
+        return std::move(*result_);
+    }
+};
+
+template <>
+class DbAwaitable<void> {
+    io::IoThreadPool* pool_;
+    void* executor_;
+    std::function<void()> fn_;
+    std::exception_ptr error_;
+    std::coroutine_handle<> handle_;
+
+   public:
+    DbAwaitable(io::IoThreadPool* pool, void* executor,
+                std::function<void()> fn)
+        : pool_(pool), executor_(executor), fn_(std::move(fn)) {}
+
+    bool await_ready() noexcept {
+        if (pool_ == nullptr) {
+            try {
+                auto fn = std::move(fn_);
+                fn_ = {};
+                fn();
+            } catch (...) {
+                error_ = std::current_exception();
+            }
+            return true;
+        }
+        return false;
+    }
+
+    void await_suspend(std::coroutine_handle<> h) {
+        handle_ = h;
+        auto* self = this;
+        db_async_submit(pool_, [self] {
+            try {
+                auto fn = std::move(self->fn_);
+                self->fn_ = {};
+                fn();
+            } catch (...) {
+                self->error_ = std::current_exception();
+            }
+            db_async_resume_on(self->executor_, self->handle_);
+        });
+    }
+
+    void await_resume() {
+        if (error_ != nullptr) {
+            std::rethrow_exception(error_);
+        }
+    }
+};
+
+template <typename F>
+auto run(F&& fn) -> DbAwaitable<decltype(fn())> {
+    using R = decltype(fn());
+    auto* pool = get_db_pool();
+    auto* executor = get_current_executor_opaque();
+    return DbAwaitable<R>(pool, executor, std::forward<F>(fn));
+}
+
+}  // namespace dftracer::utils::rocksdb
+
+#endif  // DFTRACER_UTILS_CORE_ROCKSDB_ASYNC_H
diff --git a/include/dftracer/utils/core/rocksdb/database.h b/include/dftracer/utils/core/rocksdb/database.h
new file mode 100644
index 00000000..e4d70216
--- /dev/null
+++ b/include/dftracer/utils/core/rocksdb/database.h
@@ -0,0 +1,82 @@
+#ifndef DFTRACER_UTILS_CORE_ROCKSDB_DATABASE_H
+#define DFTRACER_UTILS_CORE_ROCKSDB_DATABASE_H
+
+#include <rocksdb/db.h>
+#include <rocksdb/env.h>
+#include <rocksdb/file_system.h>
+#include <rocksdb/options.h>
+#include <rocksdb/write_batch.h>
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+namespace dftracer::utils::rocksdb {
+
+void mark_process_exiting_for_rocksdb();
+
+class RocksDatabase {
+   public:
+    using Batch = ::rocksdb::WriteBatch;
+    enum class OpenMode { ReadWrite, ReadOnly };
+
+    RocksDatabase();
+    explicit RocksDatabase(const std::string& db_path,
+                           OpenMode open_mode = OpenMode::ReadWrite);
+    ~RocksDatabase();
+
+    RocksDatabase(const RocksDatabase&) = delete;
+    RocksDatabase& operator=(const RocksDatabase&) = delete;
+
+    RocksDatabase(RocksDatabase&& other) noexcept;
+    RocksDatabase& operator=(RocksDatabase&& other) noexcept;
+
+    bool open(const std::string& db_path,
+              OpenMode open_mode = OpenMode::ReadWrite);
+    void close();
+
+    bool is_open() const noexcept;
+    bool is_read_only() const noexcept;
+    const std::string& path() const noexcept;
+    ::rocksdb::DB* get() const noexcept;
+
+    ::rocksdb::Status put(std::string_view key, std::string_view value,
+                          std::string_view column_family = "default");
+    ::rocksdb::Status get(std::string_view key, std::string* value,
+                          std::string_view column_family = "default") const;
+    ::rocksdb::Status del(std::string_view key,
+                          std::string_view column_family = "default");
+
+    ::rocksdb::Status put(Batch& batch, std::string_view column_family,
+                          std::string_view key, std::string_view value);
+    ::rocksdb::Status del(Batch& batch, std::string_view column_family,
+                          std::string_view key);
+
+    Batch begin_batch() const;
+    ::rocksdb::Status commit_batch(Batch& batch);
+
+    std::unique_ptr<::rocksdb::Iterator> new_iterator(
+        std::string_view column_family = "default") const;
+
+    static std::vector<std::string> default_column_families();
+    static ::rocksdb::Options default_options();
+    static ::rocksdb::ColumnFamilyOptions default_column_family_options();
+
+   private:
+    ::rocksdb::ColumnFamilyHandle* column_family_handle(
+        std::string_view column_family) const;
+
+    std::string db_path_;
+    OpenMode open_mode_ = OpenMode::ReadWrite;
+    std::shared_ptr<::rocksdb::FileSystem> file_system_;
+    std::unique_ptr<::rocksdb::Env> env_;
+    ::rocksdb::DB* db_ = nullptr;
+    std::unordered_map<std::string, ::rocksdb::ColumnFamilyHandle*>
+        column_families_;
+};
+
+}  // namespace dftracer::utils::rocksdb
+
+#endif  // DFTRACER_UTILS_CORE_ROCKSDB_DATABASE_H
diff --git a/include/dftracer/utils/core/rocksdb/db_manager.h b/include/dftracer/utils/core/rocksdb/db_manager.h
new file mode 100644
index 00000000..c45eec95
--- /dev/null
+++ b/include/dftracer/utils/core/rocksdb/db_manager.h
@@ -0,0 +1,40 @@
+#ifndef DFTRACER_UTILS_CORE_ROCKSDB_DB_MANAGER_H
+#define DFTRACER_UTILS_CORE_ROCKSDB_DB_MANAGER_H
+
+#include <dftracer/utils/core/rocksdb/database.h>
+
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace dftracer::utils::rocksdb {
+
+// Process-wide registry of open RocksDB instances keyed by their normalized
+// .dftindex root path. The manager owns one live instance per path so short-
+// lived wrappers (IndexDatabase, ProvenanceDatabase, Python bindings, etc.)
+// reuse the same DB instead of repeatedly reopening it.
+class RocksDBManager {
+   public:
+    static RocksDBManager& instance();
+
+    std::shared_ptr<RocksDatabase> get_or_open(
+        const std::string& db_path,
+        RocksDatabase::OpenMode open_mode = RocksDatabase::OpenMode::ReadWrite);
+    void reset(const std::string& db_path);
+    void shutdown();
+
+   private:
+    RocksDBManager() = default;
+
+    std::mutex mutex_;
+    std::condition_variable cv_;
+    std::unordered_map<std::string, std::weak_ptr<RocksDatabase>> databases_;
+    std::unordered_set<std::string> opening_;
+};
+
+}  // namespace dftracer::utils::rocksdb
+
+#endif  // DFTRACER_UTILS_CORE_ROCKSDB_DB_MANAGER_H
diff --git a/include/dftracer/utils/core/rocksdb/filesystem.h b/include/dftracer/utils/core/rocksdb/filesystem.h
new file mode 100644
index 00000000..8d70429c
--- /dev/null
+++ b/include/dftracer/utils/core/rocksdb/filesystem.h
@@ -0,0 +1,19 @@
+#ifndef DFTRACER_UTILS_CORE_ROCKSDB_FILESYSTEM_H
+#define DFTRACER_UTILS_CORE_ROCKSDB_FILESYSTEM_H
+
+#include <memory>
+
+namespace rocksdb {
+class Env;
+class FileSystem;
+}  // namespace rocksdb
+
+namespace dftracer::utils::rocksdb {
+
+std::shared_ptr<::rocksdb::FileSystem> make_dftracer_file_system();
+std::unique_ptr<::rocksdb::Env> make_dftracer_env(
+    const std::shared_ptr<::rocksdb::FileSystem>& file_system);
+
+}  // namespace dftracer::utils::rocksdb
+
+#endif  // DFTRACER_UTILS_CORE_ROCKSDB_FILESYSTEM_H
diff --git a/include/dftracer/utils/core/rocksdb/key_codec.h b/include/dftracer/utils/core/rocksdb/key_codec.h
new file mode 100644
index 00000000..59223617
--- /dev/null
+++ b/include/dftracer/utils/core/rocksdb/key_codec.h
@@ -0,0 +1,39 @@
+#ifndef DFTRACER_UTILS_CORE_ROCKSDB_KEY_CODEC_H
+#define DFTRACER_UTILS_CORE_ROCKSDB_KEY_CODEC_H
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+namespace dftracer::utils::rocksdb {
+
+class KeyCodec {
+   public:
+    static std::string encode_be32(std::uint32_t value);
+    static std::string encode_be64(std::uint64_t value);
+
+    static std::uint32_t decode_be32(std::string_view bytes);
+    static std::uint64_t decode_be64(std::string_view bytes);
+
+    static void append_be32(std::string& out, std::uint32_t value);
+    static void append_be64(std::string& out, std::uint64_t value);
+};
+
+class KeyBuilder {
+   public:
+    KeyBuilder& append_tag(std::string_view tag);
+    KeyBuilder& append_separator();
+    KeyBuilder& append_string(std::string_view value);
+    KeyBuilder& append_be32(std::uint32_t value);
+    KeyBuilder& append_be64(std::uint64_t value);
+
+    std::string build() const;
+    void clear();
+
+   private:
+    std::string key_;
+};
+
+}  // namespace dftracer::utils::rocksdb
+
+#endif  // DFTRACER_UTILS_CORE_ROCKSDB_KEY_CODEC_H
diff --git a/include/dftracer/utils/core/runtime.h b/include/dftracer/utils/core/runtime.h
index 77d32f2b..0e617a93 100644
--- a/include/dftracer/utils/core/runtime.h
+++ b/include/dftracer/utils/core/runtime.h
@@ -91,9 +91,13 @@ TypedTaskHandle<T> Runtime::submit(coro::CoroTask<T> task, std::string name) {
            std::shared_ptr<std::atomic<TaskIndex>> task_id) -> coro::Coro {
         try {
             T val = co_await std::move(t);
+            t = coro::CoroTask<T>{std::coroutine_handle<
+                typename coro::CoroTask<T>::promise_type>{}};
             exec->mark_coro_completed(task_id->load(std::memory_order_acquire));
             tp->set_value(std::move(val));
         } catch (...) {
+            t = coro::CoroTask<T>{std::coroutine_handle<
+                typename coro::CoroTask<T>::promise_type>{}};
             exec->mark_coro_completed(task_id->load(std::memory_order_acquire));
             auto ex = std::current_exception();
             tp->set_exception(ex);
diff --git a/include/dftracer/utils/core/sqlite/async.h b/include/dftracer/utils/core/sqlite/async.h
deleted file mode 100644
index c907f619..00000000
--- a/include/dftracer/utils/core/sqlite/async.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef DFTRACER_UTILS_CORE_SQLITE_ASYNC_H
-#define DFTRACER_UTILS_CORE_SQLITE_ASYNC_H
-
-#include <coroutine>
-#include <functional>
-#include <utility>
-
-namespace dftracer::utils::io {
-class IoThreadPool;
-}  // namespace dftracer::utils::io
-
-namespace dftracer::utils::sqlite {
-
-// Returns the sqlite IoThreadPool from Executor::current(), or nullptr.
-// Defined in async.cpp to avoid Executor header dependency.
-io::IoThreadPool *get_sqlite_pool();
-
-// Non-template helper — submits work to the pool.
-// Defined in async.cpp where IoThreadPool is visible.
-void sqlite_async_submit(io::IoThreadPool *pool, std::function<void()> fn);
-
-// Resumes the coroutine on the given executor, or inline if null.
-// The executor pointer must be captured at await_suspend time (on an
-// executor worker thread), NOT retrieved via TLS at resume time
-// (which may be on a non-executor pool thread).
-// Defined in async.cpp to avoid Executor header dependency.
-void sqlite_async_resume_on(void *executor, std::coroutine_handle<> h);
-
-// Returns an opaque pointer to Executor::current() for capture.
-// Defined in async.cpp to avoid Executor header dependency.
-void *get_current_executor_opaque();
-
-template <typename T>
-class SqliteAwaitable {
-    io::IoThreadPool *pool_;
-    std::function<T()> fn_;
-    T result_{};
-    std::coroutine_handle<> handle_;
-
-   public:
-    SqliteAwaitable(io::IoThreadPool *pool, std::function<T()> fn)
-        : pool_(pool), fn_(std::move(fn)) {}
-
-    bool await_ready() noexcept {
-        if (pool_ == nullptr) {
-            result_ = fn_();
-            return true;
-        }
-        return false;
-    }
-
-    void await_suspend(std::coroutine_handle<> h) {
-        handle_ = h;
-        auto *self = this;
-        // Capture the executor while still on a worker thread.
-        // The lambda runs on the sqlite pool thread where TLS is unset.
-        void *exec = get_current_executor_opaque();
-        sqlite_async_submit(pool_, [self, exec] {
-            self->result_ = self->fn_();
-            sqlite_async_resume_on(exec, self->handle_);
-        });
-    }
-
-    T await_resume() { return std::move(result_); }
-};
-
-template <>
-class SqliteAwaitable<void> {
-    io::IoThreadPool *pool_;
-    std::function<void()> fn_;
-    std::coroutine_handle<> handle_;
-
-   public:
-    SqliteAwaitable(io::IoThreadPool *pool, std::function<void()> fn)
-        : pool_(pool), fn_(std::move(fn)) {}
-
-    bool await_ready() noexcept {
-        if (pool_ == nullptr) {
-            fn_();
-            return true;
-        }
-        return false;
-    }
-
-    void await_suspend(std::coroutine_handle<> h) {
-        handle_ = h;
-        auto *self = this;
-        // Capture the executor while still on a worker thread.
-        void *exec = get_current_executor_opaque();
-        sqlite_async_submit(pool_, [self, exec] {
-            self->fn_();
-            sqlite_async_resume_on(exec, self->handle_);
-        });
-    }
-
-    void await_resume() {}
-};
-
-// Free function — offload arbitrary work to the sqlite thread pool.
-// Use when you don't have a SqliteDatabase instance yet (e.g. the
-// lambda creates its own database internally).
-// Returns the pool-backed awaitable, or runs fn inline when no pool.
-template <typename F>
-auto run(F &&fn) -> SqliteAwaitable<decltype(fn())> {
-    using R = decltype(fn());
-    auto *pool = get_sqlite_pool();
-    return SqliteAwaitable<R>(pool, std::forward<F>(fn));
-}
-
-}  // namespace dftracer::utils::sqlite
-
-#endif  // DFTRACER_UTILS_CORE_SQLITE_ASYNC_H
diff --git a/include/dftracer/utils/core/sqlite/database.h b/include/dftracer/utils/core/sqlite/database.h
deleted file mode 100644
index 13baa34d..00000000
--- a/include/dftracer/utils/core/sqlite/database.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef DFTRACER_UTILS_CORE_SQLITE_DATABASE_H
-#define DFTRACER_UTILS_CORE_SQLITE_DATABASE_H
-
-#include <sqlite3.h>
-
-#include <string>
-
-namespace dftracer::utils::sqlite {
-
-class SqliteDatabase {
-   public:
-    SqliteDatabase();
-    explicit SqliteDatabase(const std::string &db_path);
-    ~SqliteDatabase();
-
-    SqliteDatabase(const SqliteDatabase &) = delete;
-    SqliteDatabase &operator=(const SqliteDatabase &) = delete;
-
-    SqliteDatabase(SqliteDatabase &&other) noexcept;
-    SqliteDatabase &operator=(SqliteDatabase &&other) noexcept;
-
-    bool open(const std::string &db_path);
-    void close();
-    bool open_with_vfs(const std::string &db_path, const char *vfs_name);
-
-    sqlite3 *get() const;
-    bool is_open() const;
-
-   private:
-    std::string db_path_;
-    sqlite3 *db_;
-};
-
-}  // namespace dftracer::utils::sqlite
-
-#endif  // DFTRACER_UTILS_CORE_SQLITE_DATABASE_H
diff --git a/include/dftracer/utils/core/sqlite/error.h b/include/dftracer/utils/core/sqlite/error.h
deleted file mode 100644
index 21707565..00000000
--- a/include/dftracer/utils/core/sqlite/error.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef DFTRACER_UTILS_CORE_SQLITE_ERROR_H
-#define DFTRACER_UTILS_CORE_SQLITE_ERROR_H
-
-#include <stdexcept>
-#include <string>
-
-namespace dftracer::utils::sqlite {
-
-class SqliteError : public std::runtime_error {
-   public:
-    enum Type {
-        DATABASE_ERROR,
-        STATEMENT_ERROR,
-        OPEN_ERROR,
-        VFS_ERROR,
-        UNKNOWN_ERROR
-    };
-
-    SqliteError(Type type, const std::string &message)
-        : std::runtime_error(format_message(type, message)), type_(type) {}
-
-    inline Type type() const { return type_; }
-
-   private:
-    Type type_;
-    static std::string format_message(Type type, const std::string &message);
-};
-
-}  // namespace dftracer::utils::sqlite
-
-#endif  // DFTRACER_UTILS_CORE_SQLITE_ERROR_H
diff --git a/include/dftracer/utils/core/sqlite/statement.h b/include/dftracer/utils/core/sqlite/statement.h
deleted file mode 100644
index c0a70d07..00000000
--- a/include/dftracer/utils/core/sqlite/statement.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef DFTRACER_UTILS_CORE_SQLITE_STATEMENT_H
-#define DFTRACER_UTILS_CORE_SQLITE_STATEMENT_H
-
-#include <sqlite3.h>
-
-#include <cstddef>
-#include <span>
-#include <string>
-#include <string_view>
-
-namespace dftracer::utils::sqlite {
-
-class SqliteDatabase;
-
-class SqliteStmt {
-   public:
-    SqliteStmt(const SqliteDatabase &db, const char *sql);
-    SqliteStmt(sqlite3 *db, const char *sql);
-    ~SqliteStmt();
-
-    SqliteStmt(const SqliteStmt &) = delete;
-    SqliteStmt &operator=(const SqliteStmt &) = delete;
-    SqliteStmt(SqliteStmt &&other) noexcept : stmt_(other.stmt_) {
-        other.stmt_ = nullptr;
-    }
-    SqliteStmt &operator=(SqliteStmt &&other) noexcept {
-        if (this != &other) {
-            if (stmt_) sqlite3_finalize(stmt_);
-            stmt_ = other.stmt_;
-            other.stmt_ = nullptr;
-        }
-        return *this;
-    }
-
-    operator sqlite3_stmt *();
-    sqlite3_stmt *get();
-
-    void reset();
-
-    void bind_int(int index, int value);
-    void bind_int64(int index, int64_t value);
-    void bind_double(int index, double value);
-    void bind_text(int index, const std::string &text);
-    void bind_text(int index, std::string_view text);
-    void bind_text(int index, const char *text, int length = -1,
-                   void (*destructor)(void *) = SQLITE_TRANSIENT);
-    void bind_blob(int index, const void *blob, int length);
-    void bind_blob(int index, std::span<const std::byte> data);
-    void bind_blob(int index, std::span<const unsigned char> data);
-    void bind_blob_static(int index, const void *blob, int length);
-    void bind_text_static(int index, std::string_view text);
-    void bind_null(int index);
-
-    void clear_bindings();
-    int bind_parameter_count();
-
-   private:
-    sqlite3_stmt *stmt_;
-
-    void validate_parameter_index(int index);
-};
-
-}  // namespace dftracer::utils::sqlite
-
-#endif  // DFTRACER_UTILS_CORE_SQLITE_STATEMENT_H
diff --git a/include/dftracer/utils/core/sqlite/vfs.h b/include/dftracer/utils/core/sqlite/vfs.h
deleted file mode 100644
index 39ca32b0..00000000
--- a/include/dftracer/utils/core/sqlite/vfs.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef DFTRACER_UTILS_CORE_SQLITE_VFS_H
-#define DFTRACER_UTILS_CORE_SQLITE_VFS_H
-
-#include <sqlite3.h>
-
-#include <cstdint>
-
-namespace dftracer::utils::io {
-class IoBackend;
-}  // namespace dftracer::utils::io
-
-namespace dftracer::utils {
-class Executor;
-}  // namespace dftracer::utils
-
-namespace dftracer::utils::sqlite {
-
-/// Maximum path length for VFS file paths.
-/// Matches mxPathname in the VFS registration.
-inline constexpr int VFS_MAX_PATHNAME = 512;
-
-struct DfTracerSqliteVfsAppData {
-    io::IoBackend* backend;
-    Executor* executor;
-};
-
-struct DfTracerSqliteVfsFile {
-    sqlite3_file base;  // Must be first, SQLite casts to this
-    io::IoBackend* backend;
-    Executor* executor;
-    int fd;
-    bool read_only;
-    char path[VFS_MAX_PATHNAME];
-    int shm_fd;
-    int n_shm_region;
-    void* shm_regions[32];
-};
-
-void register_dftracer_sqlite_vfs(io::IoBackend* backend, Executor* executor);
-void unregister_dftracer_sqlite_vfs();
-
-}  // namespace dftracer::utils::sqlite
-
-#endif  // DFTRACER_UTILS_CORE_SQLITE_VFS_H
diff --git a/include/dftracer/utils/server/trace_index.h b/include/dftracer/utils/server/trace_index.h
index 3afa0e3c..d131cd00 100644
--- a/include/dftracer/utils/server/trace_index.h
+++ b/include/dftracer/utils/server/trace_index.h
@@ -15,12 +15,12 @@
 namespace dftracer::utils::server {
 
 /// Scans a directory for trace files and caches paths to their
-/// sidecar index file (.idx). Used by API handlers to resolve file
+/// root-local `.dftindex` database. Used by API handlers to resolve file
 /// paths and check index availability.
 class TraceIndex {
    public:
     // Files below this compressed size are streamed directly without
-    // building a sidecar index file (.idx).  At 8 MB compressed
+    // building a `.dftindex` database. At 8 MB compressed
     // (~160 MB uncompressed with typical 20x JSON compression), a file
     // has only a handful of 32 MB checkpoints -- the indexing overhead
     // exceeds the benefit of bloom-filter skip.
@@ -29,7 +29,7 @@ class TraceIndex {
 
     struct FileInfo {
         std::string path;
-        std::string idx_path;
+        std::string index_path;
         bool has_bloom_data = false;
         bool has_checkpoint_index = false;
         bool is_small = false;
diff --git a/include/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.h b/include/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.h
index 9e8d551e..0691c45e 100644
--- a/include/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.h
@@ -29,7 +29,7 @@ using dftracer::utils::utilities::composites::dft::DFTracerEvent;
 
 struct ChunkAggregatorInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::size_t start_byte;
     std::size_t end_byte;
     std::size_t start_line;
@@ -46,8 +46,8 @@ struct ChunkAggregatorInput {
         return *this;
     }
 
-    ChunkAggregatorInput& with_idx_path(const std::string& path) {
-        idx_path = path;
+    ChunkAggregatorInput& with_index_path(const std::string& path) {
+        index_path = path;
         return *this;
     }
 
diff --git a/include/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.h b/include/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.h
index 8d7a79a3..6b0e3fe3 100644
--- a/include/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.h
@@ -66,7 +66,7 @@ struct ChunkExtractorUtilityInput {
         for (const auto& dft_spec : manifest.specs) {
             fileio::ChunkSpec io_spec;
             io_spec.file_path = dft_spec.file_path;
-            io_spec.idx_path = dft_spec.idx_path;
+            io_spec.index_path = dft_spec.index_path;
             io_spec.size_mb = dft_spec.size_mb;
             io_spec.start_byte = dft_spec.start_byte;
             io_spec.end_byte = dft_spec.end_byte;
diff --git a/include/dftracer/utils/utilities/composites/dft/comparator/comparison_config.h b/include/dftracer/utils/utilities/composites/dft/comparator/comparison_config.h
index 1129709d..87acb4fa 100644
--- a/include/dftracer/utils/utilities/composites/dft/comparator/comparison_config.h
+++ b/include/dftracer/utils/utilities/composites/dft/comparator/comparison_config.h
@@ -87,7 +87,7 @@ struct ComparisonConfig {
     std::size_t executor_threads = 0;
     /// Checkpoint size for index building (0 = default).
     std::size_t checkpoint_size = 0;
-    /// Directory for index sidecar files.
+    /// Directory for `.dftindex` stores.
     std::string index_dir;
     /// Force rebuild of existing indexes.
     bool force_rebuild = false;
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h b/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h
index f80d2862..f6a75b7c 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h
@@ -15,7 +15,7 @@ namespace dftracer::utils::utilities::composites::dft::indexing {
  *
  * Uses Kirsch-Mitzenmacher optimization: k hash functions derived from
  * 2 base hash values (std::hash with different seeds). Supports
- * serialization to/from BLOB for SQLite storage.
+ * serialization to/from binary blobs for RocksDB storage.
  *
  * Serialization format (self-describing):
  *   [4 bytes: num_hashes (uint32_t LE)]
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter_cache.h b/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter_cache.h
index 677dad34..e7c8f8cb 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter_cache.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/bloom_filter_cache.h
@@ -13,8 +13,8 @@
 namespace dftracer::utils::utilities::composites::dft::indexing {
 
 /// Thread-safe bounded cache for deserialized bloom filters.
-/// Keyed by (idx_path, dimension, checkpoint_idx) for chunk blooms,
-/// or (idx_path, dimension, UINT64_MAX) for file-level blooms.
+/// Keyed by (index_path, dimension, checkpoint_idx) for chunk blooms,
+/// or (index_path, dimension, UINT64_MAX) for file-level blooms.
 /// When the cache exceeds max_entries, it is cleared entirely.
 class BloomFilterCache {
    public:
@@ -25,23 +25,23 @@ class BloomFilterCache {
         : max_entries_(max_entries) {}
 
     /// Look up a cached bloom filter. Returns nullopt on miss.
-    std::optional<BloomFilter> get(const std::string& idx_path,
+    std::optional<BloomFilter> get(const std::string& index_path,
                                    const std::string& dimension,
                                    std::uint64_t checkpoint_idx) const {
         std::lock_guard<std::mutex> lock(mutex_);
-        auto it = cache_.find(make_key(idx_path, dimension, checkpoint_idx));
+        auto it = cache_.find(make_key(index_path, dimension, checkpoint_idx));
         if (it == cache_.end()) return std::nullopt;
         return it->second;
     }
 
     /// Insert a bloom filter into the cache. Evicts all entries if full.
-    void put(const std::string& idx_path, const std::string& dimension,
+    void put(const std::string& index_path, const std::string& dimension,
              std::uint64_t checkpoint_idx, const BloomFilter& bloom) {
         std::lock_guard<std::mutex> lock(mutex_);
         if (cache_.size() >= max_entries_) {
             cache_.clear();
         }
-        cache_.emplace(make_key(idx_path, dimension, checkpoint_idx), bloom);
+        cache_.emplace(make_key(index_path, dimension, checkpoint_idx), bloom);
     }
 
     std::size_t size() const {
@@ -50,12 +50,12 @@ class BloomFilterCache {
     }
 
    private:
-    static std::string make_key(const std::string& idx_path,
+    static std::string make_key(const std::string& index_path,
                                 const std::string& dimension,
                                 std::uint64_t checkpoint_idx) {
         std::string key;
-        key.reserve(idx_path.size() + dimension.size() + 24);
-        key += idx_path;
+        key.reserve(index_path.size() + dimension.size() + 24);
+        key += index_path;
         key += '\0';
         key += dimension;
         key += '\0';
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h
index 0bec43b7..7807ed77 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h
@@ -49,7 +49,7 @@ struct ChunkDimensionStats {
     decompress_value_counts(const std::uint8_t* data, std::size_t len);
 };
 
-/// Result type for querying chunk_dimension_stats from SQLite.
+/// Result type for querying chunk_dimension_stats from the shared index DB.
 struct ChunkDimensionStatsResult {
     std::uint64_t checkpoint_idx;
     std::string dimension;
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.h b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.h
index 645d66b0..a49ebe64 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.h
@@ -121,7 +121,7 @@ struct ChunkIndexState {
 
 struct ChunkIndexerInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::size_t checkpoint_size = 0;
     std::uint64_t checkpoint_idx = 0;
     std::size_t start_byte = 0;
@@ -137,8 +137,8 @@ struct ChunkIndexerInput {
         return *this;
     }
 
-    ChunkIndexerInput& with_idx_path(const std::string& path) {
-        idx_path = path;
+    ChunkIndexerInput& with_index_path(const std::string& path) {
+        index_path = path;
         return *this;
     }
 
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.h b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.h
index 9c645882..cb0f0378 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.h
@@ -16,7 +16,7 @@ using common::query::Query;
 
 /// Input for chunk pruning: index path, file path, query, optional cache.
 struct ChunkPrunerInput {
-    std::string idx_path;               ///< Path to .idx sidecar file.
+    std::string index_path;             ///< Path to the `.dftindex` store.
     std::string file_path;              ///< Path to trace file.
     Query query;                        ///< Query to evaluate for pruning.
     BloomFilterCache* cache = nullptr;  ///< Optional bloom filter cache.
diff --git a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h
index 3399f6da..37abe99c 100644
--- a/include/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h
+++ b/include/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h
@@ -18,7 +18,8 @@ namespace dftracer::utils::utilities::composites::dft::indexing {
  *
  * Tracks event counts by category/name/pid:tid, timestamp ranges,
  * and duration statistics using Welford's online algorithm for variance.
- * Map fields serialize to JSON TEXT for SQLite storage via yyjson.
+ * Map fields serialize to JSON text via yyjson for storage in the
+ * shared `.dftindex` database.
  */
 struct ChunkStatistics {
     std::uint64_t total_events = 0;
diff --git a/include/dftracer/utils/utilities/composites/dft/internal/chunk_spec.h b/include/dftracer/utils/utilities/composites/dft/internal/chunk_spec.h
index 3e28c469..653c115f 100644
--- a/include/dftracer/utils/utilities/composites/dft/internal/chunk_spec.h
+++ b/include/dftracer/utils/utilities/composites/dft/internal/chunk_spec.h
@@ -30,7 +30,7 @@ struct DFTracerChunkSpec : public fileio::ChunkSpec {
     static DFTracerChunkSpec from_chunk_spec(const fileio::ChunkSpec& spec) {
         DFTracerChunkSpec dft_spec;
         dft_spec.file_path = spec.file_path;
-        dft_spec.idx_path = spec.idx_path;
+        dft_spec.index_path = spec.index_path;
         dft_spec.size_mb = spec.size_mb;
         dft_spec.start_byte = spec.start_byte;
         dft_spec.end_byte = spec.end_byte;
diff --git a/include/dftracer/utils/utilities/composites/dft/internal/utils.h b/include/dftracer/utils/utilities/composites/dft/internal/utils.h
index 5639b0f6..d2c6db5d 100644
--- a/include/dftracer/utils/utilities/composites/dft/internal/utils.h
+++ b/include/dftracer/utils/utilities/composites/dft/internal/utils.h
@@ -11,18 +11,15 @@ namespace dftracer::utils::utilities::composites::dft::internal {
 bool is_data_transfer_op(std::string_view cat, std::string_view name);
 
 /**
- * @brief Determine the index file path for a given data file.
+ * @brief Determine the root-local RocksDB index path for a given data file.
  *
- * When a custom index directory is provided, the index is placed there
- * directly. Otherwise, a unique subdirectory under /tmp is created
- * using a hash of the data file's absolute path, preventing collisions
- * when multiple files share the same basename.
+ * When a custom index directory is provided, the index root is
+ * `<index_dir>/.dftindex`. Otherwise, the index root is placed alongside the
+ * data file as `<file_dir>/.dftindex`.
  *
  * @param file_path Path to the data file (e.g., "data/trace.pfw.gz")
- * @param index_dir Optional custom directory for the index file.
- *                  If empty, uses /tmp/dft_<hash>/.
- * @return Complete path to the index file
- *         (e.g., "/tmp/dft_a1b2c3d4/trace.pfw.gz.idx")
+ * @param index_dir Optional custom directory for the index root.
+ * @return Path to the owning `.dftindex` directory.
  */
 std::string determine_index_path(const std::string& file_path,
                                  const std::string& index_dir = "");
@@ -30,12 +27,12 @@ std::string determine_index_path(const std::string& file_path,
 /**
  * @brief Determine the provenance index file path for a given data file.
  *
- * Follows the same placement logic as determine_index_path but produces
- * a `.pidx` sidecar instead of `.idx`.
+ * Provenance now lives in the same root-local `.dftindex` database as
+ * the regular index data.
  *
  * @param data_path Path to the data file
  * @param index_dir Optional directory. If empty, places next to data file.
- * @return Complete path to the provenance index file
+ * @return Path to the owning `.dftindex` directory
  */
 std::string determine_provenance_index_path(const std::string& data_path,
                                             const std::string& index_dir = "");
diff --git a/include/dftracer/utils/utilities/composites/dft/metadata_collector_utility.h b/include/dftracer/utils/utilities/composites/dft/metadata_collector_utility.h
index 5cfc2cba..cb0d0b25 100644
--- a/include/dftracer/utils/utilities/composites/dft/metadata_collector_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/metadata_collector_utility.h
@@ -17,7 +17,7 @@ namespace dftracer::utils::utilities::composites::dft {
  */
 struct MetadataCollectorUtilityInput {
     std::string file_path;
-    std::string idx_path;  // Empty for plain files
+    std::string index_path;  // Empty for plain files, otherwise `.dftindex`.
     std::size_t checkpoint_size = dftracer::utils::utilities::indexer::
         internal::Indexer::DEFAULT_CHECKPOINT_SIZE;
     bool force_rebuild = false;
@@ -31,7 +31,7 @@ struct MetadataCollectorUtilityInput {
             Indexer::DEFAULT_CHECKPOINT_SIZE,
         bool force = false, bool hash = false)
         : file_path(std::move(fpath)),
-          idx_path(std::move(ipath)),
+          index_path(std::move(ipath)),
           checkpoint_size(ckpt),
           force_rebuild(force),
           compute_hash(hash) {}
@@ -43,7 +43,7 @@ struct MetadataCollectorUtilityInput {
     }
 
     MetadataCollectorUtilityInput& with_index(std::string idx) {
-        idx_path = std::move(idx);
+        index_path = std::move(idx);
         return *this;
     }
 
@@ -63,7 +63,7 @@ struct MetadataCollectorUtilityInput {
     }
 
     bool operator==(const MetadataCollectorUtilityInput& other) const {
-        return file_path == other.file_path && idx_path == other.idx_path &&
+        return file_path == other.file_path && index_path == other.index_path &&
                checkpoint_size == other.checkpoint_size &&
                force_rebuild == other.force_rebuild &&
                compute_hash == other.compute_hash;
@@ -75,7 +75,7 @@ struct MetadataCollectorUtilityInput {
  */
 struct MetadataCollectorUtilityOutput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;  // Root-local `.dftindex` path when available.
     double size_mb = 0;
     std::size_t start_line = 0;
     std::size_t end_line = 0;
@@ -98,7 +98,7 @@ struct MetadataCollectorUtilityOutput {
     MetadataCollectorUtilityOutput() = default;
 
     bool operator==(const MetadataCollectorUtilityOutput& other) const {
-        return file_path == other.file_path && idx_path == other.idx_path &&
+        return file_path == other.file_path && index_path == other.index_path &&
                size_mb == other.size_mb && start_line == other.start_line &&
                end_line == other.end_line &&
                valid_events == other.valid_events &&
@@ -119,7 +119,8 @@ struct MetadataCollectorUtilityOutput {
  * files.
  *
  * Supports both plain (.pfw) and compressed (.pfw.gz) files.
- * For compressed files, builds/uses an index for efficient access.
+ * For compressed files, builds/uses the root-local `.dftindex` store for
+ * efficient access.
  *
  * Tagged with Parallelizable - safe for parallel batch processing.
  */
diff --git a/include/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.h b/include/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.h
index c02bc3c7..d2bffcd3 100644
--- a/include/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.h
+++ b/include/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.h
@@ -1,6 +1,7 @@
 #ifndef DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_REORGANIZE_PROVENANCE_TRACKER_H
 #define DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_REORGANIZE_PROVENANCE_TRACKER_H
 
+#include <dftracer/utils/core/coro/task.h>
 #include <dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.h>
 #include <dftracer/utils/utilities/fileio/chunk_writer.h>
 
@@ -27,10 +28,11 @@ class ProvenanceTracker {
     void record(int source_file_idx, int checkpoint_idx, int output_chunk_idx,
                 int output_line_start, int output_line_end, int event_count);
 
-    void flush_to_db(const ExtractionPlan& plan, const std::string& group_name,
-                     const std::string& group_query,
-                     const std::vector<fileio::ChunkInfo>& chunks,
-                     const std::string& output_dir);
+    coro::CoroTask<void> flush_to_db(
+        const ExtractionPlan& plan, const std::string& group_name,
+        const std::string& group_query,
+        const std::vector<fileio::ChunkInfo>& chunks,
+        const std::string& output_dir);
 
     std::size_t record_count() const { return records_.size(); }
     const std::vector<ProvenanceRecord>& records() const { return records_; }
diff --git a/include/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.h b/include/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.h
index 65a701e1..92ea2f20 100644
--- a/include/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.h
+++ b/include/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.h
@@ -17,7 +17,7 @@ struct PredicateGroup {
 
 struct SourceFileInfo {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::size_t num_checkpoints = 0;
     std::uint64_t uncompressed_size = 0;
     std::uint64_t checkpoint_size = 0;
diff --git a/include/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.h b/include/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.h
index 33e38979..be4f18e5 100644
--- a/include/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.h
@@ -14,7 +14,7 @@ namespace dftracer::utils::utilities::composites::dft::statistics {
 
 struct ChunkDetailScanInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::size_t checkpoint_size = 0;
     std::size_t start_byte = 0;
     std::size_t end_byte = 0;
diff --git a/include/dftracer/utils/utilities/composites/dft/statistics/statistics.h b/include/dftracer/utils/utilities/composites/dft/statistics/statistics.h
index 27587e2a..16cbf10b 100644
--- a/include/dftracer/utils/utilities/composites/dft/statistics/statistics.h
+++ b/include/dftracer/utils/utilities/composites/dft/statistics/statistics.h
@@ -5,7 +5,8 @@
  * @file statistics.h
  * @brief Convenience header for all DFTracer statistics components.
  *
- * Provides zero-cost statistics aggregation from pre-indexed .idx databases:
+ * Provides zero-cost statistics aggregation from pre-indexed `.dftindex`
+ * databases:
  * trace statistics, aggregation, and querying.
  */
 
diff --git a/include/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h b/include/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h
index 2241bb72..7141ba23 100644
--- a/include/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h
@@ -11,7 +11,7 @@ namespace dftracer::utils::utilities::composites::dft::statistics {
 
 struct StatisticsAggregatorInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::string index_dir;
 };
 
diff --git a/include/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.h b/include/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.h
index 6323a805..a06e182e 100644
--- a/include/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.h
+++ b/include/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.h
@@ -12,7 +12,7 @@ using indexing::ChunkStatistics;
 
 struct TraceStatistics {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     ChunkStatistics merged;
     std::uint64_t num_chunks = 0;
     bool success = false;
diff --git a/include/dftracer/utils/utilities/composites/dft/views/view_builder_utility.h b/include/dftracer/utils/utilities/composites/dft/views/view_builder_utility.h
index fdb2ccbf..cd8974d9 100644
--- a/include/dftracer/utils/utilities/composites/dft/views/view_builder_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/views/view_builder_utility.h
@@ -18,7 +18,7 @@ namespace dftracer::utils::utilities::composites::dft::views {
 struct ViewBuilderInput {
     ViewDefinition view;
     std::string file_path;
-    std::string idx_path;  // index sidecar path
+    std::string index_path;  // `.dftindex` store path
     std::size_t uncompressed_size = 0;
     std::size_t num_checkpoints = 0;
     indexing::BloomFilterCache* bloom_cache = nullptr;
@@ -27,7 +27,7 @@ struct ViewBuilderInput {
     // Fluent builders
     ViewBuilderInput& with_view(const ViewDefinition& v);
     ViewBuilderInput& with_file_path(const std::string& path);
-    ViewBuilderInput& with_idx_path(const std::string& path);
+    ViewBuilderInput& with_index_path(const std::string& path);
     ViewBuilderInput& with_uncompressed_size(std::size_t s);
     ViewBuilderInput& with_num_checkpoints(std::size_t n);
     ViewBuilderInput& with_bloom_cache(indexing::BloomFilterCache* c);
@@ -57,4 +57,4 @@ class ViewBuilderUtility : public Utility<ViewBuilderInput, ViewBuilderOutput,
 
 }  // namespace dftracer::utils::utilities::composites::dft::views
 
-#endif  // DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_VIEWS_VIEW_BUILDER_UTILITY_H
\ No newline at end of file
+#endif  // DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_VIEWS_VIEW_BUILDER_UTILITY_H
diff --git a/include/dftracer/utils/utilities/composites/dft/views/view_reader_utility.h b/include/dftracer/utils/utilities/composites/dft/views/view_reader_utility.h
index 67dd50ca..7709f9e8 100644
--- a/include/dftracer/utils/utilities/composites/dft/views/view_reader_utility.h
+++ b/include/dftracer/utils/utilities/composites/dft/views/view_reader_utility.h
@@ -22,7 +22,7 @@ namespace dftracer::utils::utilities::composites::dft::views {
 
 struct ViewReaderInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     std::size_t checkpoint_size =
         utilities::indexer::internal::Indexer::DEFAULT_CHECKPOINT_SIZE;
     std::size_t start_byte = 0;
@@ -34,7 +34,7 @@ struct ViewReaderInput {
     std::optional<common::query::Query> query;
 
     ViewReaderInput& with_file_path(const std::string& path);
-    ViewReaderInput& with_idx_path(const std::string& path);
+    ViewReaderInput& with_index_path(const std::string& path);
     ViewReaderInput& with_checkpoint_size(std::size_t sz);
     ViewReaderInput& with_byte_range(std::size_t start, std::size_t end);
     ViewReaderInput& with_checkpoint_idx(std::uint64_t idx);
diff --git a/include/dftracer/utils/utilities/composites/file_merger_utility.h b/include/dftracer/utils/utilities/composites/file_merger_utility.h
index 1be3876b..c20d90b2 100644
--- a/include/dftracer/utils/utilities/composites/file_merger_utility.h
+++ b/include/dftracer/utils/utilities/composites/file_merger_utility.h
@@ -45,8 +45,9 @@ struct FileMergeValidatorUtilityInput {
         return input;
     }
 
-    FileMergeValidatorUtilityInput& with_index(const std::string& idx_path) {
-        index_path = idx_path;
+    FileMergeValidatorUtilityInput& with_index(
+        const std::string& index_path_value) {
+        index_path = index_path_value;
         return *this;
     }
 
diff --git a/include/dftracer/utils/utilities/composites/indexed_file_reader_utility.h b/include/dftracer/utils/utilities/composites/indexed_file_reader_utility.h
index 50b837bd..79c7a6cf 100644
--- a/include/dftracer/utils/utilities/composites/indexed_file_reader_utility.h
+++ b/include/dftracer/utils/utilities/composites/indexed_file_reader_utility.h
@@ -3,8 +3,11 @@
 
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/coro/task.h>
+#include <dftracer/utils/core/rocksdb/db_manager.h>
 #include <dftracer/utils/core/utilities/utilities.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/types.h>
+#include <dftracer/utils/utilities/indexer/internal/helpers.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 #include <dftracer/utils/utilities/reader/internal/reader.h>
 #include <dftracer/utils/utilities/reader/internal/reader_factory.h>
@@ -30,7 +33,7 @@ namespace dftracer::utils::utilities::composites {
  * @code
  * IndexedFileReader reader_workflow;
  * auto reader = reader_workflow.process(
- *     IndexedReadInput{"file.gz", "file.gz.idx", checkpoint_size, false}
+ *     IndexedReadInput{"file.gz", ".dftindex", checkpoint_size, false}
  * );
  * // Now use reader to read lines
  * @endcode
@@ -52,40 +55,54 @@ class IndexedFileReaderUtility
             throw std::runtime_error("File does not exist: " + input.file_path);
         }
 
+        const std::string normalized_index_path =
+            input.index_path.empty()
+                ? dft::internal::determine_index_path(input.file_path, "")
+                : indexer::internal::normalize_index_root(input.index_path);
+
         // Step 1: Check if index needs to be built/rebuilt
-        bool need_build = !fs::exists(input.idx_path) || input.force_rebuild;
+        bool need_build =
+            !fs::exists(normalized_index_path) || input.force_rebuild;
 
         if (need_build) {
             // Remove old index if forcing rebuild
-            if (input.force_rebuild && fs::exists(input.idx_path)) {
-                fs::remove(input.idx_path);
+            if (input.force_rebuild && fs::exists(normalized_index_path)) {
+                // Force rebuild must discard the manager-owned DB instance
+                // before removing the root directory so the next open is a
+                // true reopen, not a reuse of the previous live handle.
+                rocksdb::RocksDBManager::instance().reset(
+                    normalized_index_path);
+                fs::remove_all(normalized_index_path);
             }
 
             // Build new index
             auto indexer = dftracer::utils::utilities::indexer::internal::
-                IndexerFactory::create(input.file_path, input.idx_path,
+                IndexerFactory::create(input.file_path, input.index_path,
                                        input.checkpoint_size, true);
             co_await indexer->build_async();
         } else {
             // Check if existing index needs rebuild
             auto indexer = dftracer::utils::utilities::indexer::internal::
-                IndexerFactory::create(input.file_path, input.idx_path,
+                IndexerFactory::create(input.file_path, input.index_path,
                                        input.checkpoint_size, false);
 
             if (indexer->need_rebuild()) {
                 // Rebuild the index
-                fs::remove(input.idx_path);
-                auto new_indexer =
-                    dftracer::utils::utilities::indexer::internal::
-                        IndexerFactory::create(input.file_path, input.idx_path,
-                                               input.checkpoint_size, true);
+                // Drop the cached DB instance before deleting the store.
+                rocksdb::RocksDBManager::instance().reset(
+                    normalized_index_path);
+                fs::remove_all(normalized_index_path);
+                auto new_indexer = dftracer::utils::utilities::indexer::
+                    internal::IndexerFactory::create(
+                        input.file_path, input.index_path,
+                        input.checkpoint_size, true);
                 co_await new_indexer->build_async();
             }
         }
 
         // Step 2: Create and return Reader
-        co_return reader::internal::ReaderFactory::create(input.file_path,
-                                                          input.idx_path);
+        co_return reader::internal::ReaderFactory::create(
+            input.file_path, normalized_index_path);
     }
 };
 
diff --git a/include/dftracer/utils/utilities/composites/line_batch_processor_utility.h b/include/dftracer/utils/utilities/composites/line_batch_processor_utility.h
index c5932575..08ebc5da 100644
--- a/include/dftracer/utils/utilities/composites/line_batch_processor_utility.h
+++ b/include/dftracer/utils/utilities/composites/line_batch_processor_utility.h
@@ -40,7 +40,7 @@ using LineBatchProcessUtilityOutput = std::vector<LineOutput>;
  *
  * LineBatchProcessor<MyData> workflow(processor);
  * auto results = workflow.process(LineBatchInput{"/path/to/file.gz",
- * "file.gz.idx"});
+ * "/path/to/.dftindex"});
  * @endcode
  */
 template <typename LineOutput>
@@ -74,10 +74,10 @@ class LineBatchProcessorUtility
         LineBatchProcessUtilityOutput<LineOutput> results;
 
         auto gen = [&]() {
-            if (!input.idx_path.empty()) {
+            if (!input.index_path.empty()) {
                 auto iter_config =
                     fileio::lines::sources::IndexedFileLineIteratorConfig()
-                        .with_file(input.file_path, input.idx_path);
+                        .with_file(input.file_path, input.index_path);
                 if (input.start_line > 0 && input.end_line > 0) {
                     iter_config.with_line_range(input.start_line,
                                                 input.end_line);
@@ -131,10 +131,10 @@ class SimpleLineBatchProcessorUtility
         SimpleLineBatchProcessUtilityOutput<LineOutput> results;
 
         auto gen = [&]() {
-            if (!input.idx_path.empty()) {
+            if (!input.index_path.empty()) {
                 auto iter_config =
                     fileio::lines::sources::IndexedFileLineIteratorConfig()
-                        .with_file(input.file_path, input.idx_path);
+                        .with_file(input.file_path, input.index_path);
                 if (input.start_line > 0 && input.end_line > 0) {
                     iter_config.with_line_range(input.start_line,
                                                 input.end_line);
diff --git a/include/dftracer/utils/utilities/composites/types.h b/include/dftracer/utils/utilities/composites/types.h
index e7d3be0e..2b9e6d0c 100644
--- a/include/dftracer/utils/utilities/composites/types.h
+++ b/include/dftracer/utils/utilities/composites/types.h
@@ -59,7 +59,7 @@ struct DirectoryProcessInput {
  */
 struct IndexedReadInput {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;  // Root-local `.dftindex` path.
     std::size_t checkpoint_size = dftracer::utils::utilities::indexer::
         internal::Indexer::DEFAULT_CHECKPOINT_SIZE;
     bool force_rebuild = false;
@@ -71,7 +71,7 @@ struct IndexedReadInput {
                          indexer::internal::Indexer::DEFAULT_CHECKPOINT_SIZE,
                      bool force = false)
         : file_path(std::move(fpath)),
-          idx_path(std::move(ipath)),
+          index_path(std::move(ipath)),
           checkpoint_size(ckpt_size),
           force_rebuild(force) {}
 
@@ -82,7 +82,7 @@ struct IndexedReadInput {
     }
 
     IndexedReadInput& with_index(std::string idx) {
-        idx_path = std::move(idx);
+        index_path = std::move(idx);
         return *this;
     }
 
@@ -102,7 +102,8 @@ struct IndexedReadInput {
  */
 struct LineBatchInput {
     std::string file_path;
-    std::string idx_path;        // Empty for plain text files
+    std::string index_path;      // Empty for plain text files
+                                 // or `.dftindex` for indexed archives.
     std::size_t start_line = 0;  // 0 = from beginning
     std::size_t end_line = 0;    // 0 = to end
     std::size_t checkpoint_size = dftracer::utils::utilities::indexer::
@@ -113,7 +114,7 @@ struct LineBatchInput {
     LineBatchInput(std::string fpath, std::string ipath = "",
                    std::size_t start = 0, std::size_t end = 0)
         : file_path(std::move(fpath)),
-          idx_path(std::move(ipath)),
+          index_path(std::move(ipath)),
           start_line(start),
           end_line(end) {}
 
@@ -124,7 +125,7 @@ struct LineBatchInput {
     }
 
     LineBatchInput& with_index(std::string idx) {
-        idx_path = std::move(idx);
+        index_path = std::move(idx);
         return *this;
     }
 
diff --git a/include/dftracer/utils/utilities/fileio/lines/line_bytes_range.h b/include/dftracer/utils/utilities/fileio/lines/line_bytes_range.h
index 987bf25a..4a6f0f7a 100644
--- a/include/dftracer/utils/utilities/fileio/lines/line_bytes_range.h
+++ b/include/dftracer/utils/utilities/fileio/lines/line_bytes_range.h
@@ -28,7 +28,7 @@ namespace dftracer::utils::utilities::fileio::lines {
  * Usage:
  * @code
  * // From indexed file with byte range
- * auto reader = ReaderFactory::create("file.gz", "file.gz.idx");
+ * auto reader = ReaderFactory::create("file.gz", "/data/.dftindex");
  * LineBytesRange range1 = LineBytesRange::from_indexed_file(reader, 1000,
  * 5000);
  *
diff --git a/include/dftracer/utils/utilities/fileio/lines/line_types.h b/include/dftracer/utils/utilities/fileio/lines/line_types.h
index 8fc937c6..ba4bf62d 100644
--- a/include/dftracer/utils/utilities/fileio/lines/line_types.h
+++ b/include/dftracer/utils/utilities/fileio/lines/line_types.h
@@ -42,13 +42,14 @@ struct Line {
  * Usage:
  * @code
  * auto input = LineReadInput::from_file("data.txt")
- *                  .with_index("data.txt.idx")
+ *                  .with_index("/data/.dftindex")
  *                  .with_range(10, 100);
  * @endcode
  */
 struct LineReadInput {
     std::string file_path;   // Path to the archive file
-    std::string idx_path;    // Path to the index file (empty for plain files)
+    std::string index_path;  // Path to the `.dftindex` store
+                             // (empty for plain files)
     std::size_t start_line;  // Starting line (1-based, inclusive), 0 = start
     std::size_t end_line;    // Ending line (1-based, inclusive), 0 = end
 
@@ -57,7 +58,7 @@ struct LineReadInput {
     LineReadInput(std::string file_path_, std::string idx_path_,
                   std::size_t start_line_, std::size_t end_line_)
         : file_path(std::move(file_path_)),
-          idx_path(std::move(idx_path_)),
+          index_path(std::move(idx_path_)),
           start_line(start_line_),
           end_line(end_line_) {}
 
@@ -68,7 +69,7 @@ struct LineReadInput {
     }
 
     LineReadInput& with_index(std::string idx) {
-        idx_path = std::move(idx);
+        index_path = std::move(idx);
         return *this;
     }
 
@@ -79,7 +80,7 @@ struct LineReadInput {
     }
 
     bool operator==(const LineReadInput& other) const {
-        return file_path == other.file_path && idx_path == other.idx_path &&
+        return file_path == other.file_path && index_path == other.index_path &&
                start_line == other.start_line && end_line == other.end_line;
     }
 
@@ -153,7 +154,7 @@ struct hash<dftracer::utils::utilities::fileio::lines::LineReadInput> {
         const {
         ::dftracer::utils::utilities::hash::HasherUtility hasher;
         hasher.update(req.file_path);
-        hasher.update(req.idx_path);
+        hasher.update(req.index_path);
         hasher.update(req.start_line);
         hasher.update(req.end_line);
         return hasher.get_hash().value;
diff --git a/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_bytes_generator.h b/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_bytes_generator.h
index 9d89d186..98eef888 100644
--- a/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_bytes_generator.h
+++ b/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_bytes_generator.h
@@ -1,6 +1,7 @@
 #ifndef DFTRACER_UTILS_UTILITIES_FILEIO_LINES_SOURCES_ASYNC_PLAIN_FILE_BYTES_GENERATOR_H
 #define DFTRACER_UTILS_UTILITIES_FILEIO_LINES_SOURCES_ASYNC_PLAIN_FILE_BYTES_GENERATOR_H
 
+#include <dftracer/utils/core/common/scoped_fd.h>
 #include <dftracer/utils/core/coro/async_generator.h>
 #include <dftracer/utils/core/io/io.h>
 #include <dftracer/utils/utilities/fileio/lines/line_types.h>
@@ -9,7 +10,6 @@
 #include <vector>
 
 namespace dftracer::utils::utilities::fileio::lines::sources {
-
 /**
  * @brief Async generator that yields lines from plain text files
  *        within a byte range, with line-boundary alignment.
@@ -39,7 +39,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
     if (fd_result < 0) {
         throw std::runtime_error("Cannot open file: " + file_path);
     }
-    int fd = static_cast<int>(fd_result);
+    dftracer::utils::ScopedFd fd(static_cast<int>(fd_result));
 
     std::vector<char> read_buffer(buffer_size);
     std::string line_buffer;
@@ -56,7 +56,8 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
             bool aligned = false;
             while (!aligned) {
                 ssize_t bytes_read = co_await ::dftracer::utils::io::pread(
-                    fd, read_buffer.data(), read_buffer.size(), file_offset);
+                    fd.get(), read_buffer.data(), read_buffer.size(),
+                    file_offset);
 
                 if (bytes_read < 0) {
                     throw std::runtime_error(
@@ -66,7 +67,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
 
                 if (bytes_read == 0) {
                     // Hit EOF before finding a newline — nothing to yield
-                    co_await ::dftracer::utils::io::close(fd);
+                    fd.reset();
                     co_return;
                 }
 
@@ -80,7 +81,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
 
                 if (static_cast<std::size_t>(file_offset) >= end_byte) {
                     // Passed end_byte while aligning — nothing to yield
-                    co_await ::dftracer::utils::io::close(fd);
+                    fd.reset();
                     co_return;
                 }
             }
@@ -96,7 +97,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
             }
 
             ssize_t bytes_read = co_await ::dftracer::utils::io::pread(
-                fd, read_buffer.data(), read_buffer.size(), file_offset);
+                fd.get(), read_buffer.data(), read_buffer.size(), file_offset);
 
             if (bytes_read < 0) {
                 throw std::runtime_error(
@@ -141,7 +142,6 @@ inline coro::AsyncGenerator<Line> async_plain_file_bytes(
         ex = std::current_exception();
     }
 
-    co_await ::dftracer::utils::io::close(fd);
     if (ex) {
         std::rethrow_exception(ex);
     }
diff --git a/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_line_generator.h b/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_line_generator.h
index e8a0365c..2c851eb9 100644
--- a/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_line_generator.h
+++ b/include/dftracer/utils/utilities/fileio/lines/sources/async_plain_file_line_generator.h
@@ -1,6 +1,7 @@
 #ifndef DFTRACER_UTILS_UTILITIES_FILEIO_LINES_SOURCES_ASYNC_PLAIN_FILE_LINE_GENERATOR_H
 #define DFTRACER_UTILS_UTILITIES_FILEIO_LINES_SOURCES_ASYNC_PLAIN_FILE_LINE_GENERATOR_H
 
+#include <dftracer/utils/core/common/scoped_fd.h>
 #include <dftracer/utils/core/coro/async_generator.h>
 #include <dftracer/utils/core/io/io.h>
 #include <dftracer/utils/utilities/fileio/lines/line_types.h>
@@ -9,7 +10,6 @@
 #include <vector>
 
 namespace dftracer::utils::utilities::fileio::lines::sources {
-
 /**
  * @brief Async generator that yields lines from plain text files.
  *
@@ -32,7 +32,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_lines(
     if (fd_result < 0) {
         throw std::runtime_error("Cannot open file: " + file_path);
     }
-    int fd = static_cast<int>(fd_result);
+    dftracer::utils::ScopedFd fd(static_cast<int>(fd_result));
 
     constexpr std::size_t BUFFER_SIZE = 256 * 1024;  // 256KB
     std::vector<char> read_buffer(BUFFER_SIZE);
@@ -48,7 +48,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_lines(
         bool eof = false;
         while (!eof) {
             ssize_t bytes_read = co_await ::dftracer::utils::io::pread(
-                fd, read_buffer.data(), BUFFER_SIZE, file_offset);
+                fd.get(), read_buffer.data(), BUFFER_SIZE, file_offset);
 
             if (bytes_read < 0) {
                 throw std::runtime_error(
@@ -82,7 +82,7 @@ inline coro::AsyncGenerator<Line> async_plain_file_lines(
                                       current_line);
                     }
                     if (end_line > 0 && current_line >= end_line) {
-                        co_await ::dftracer::utils::io::close(fd);
+                        fd.reset();
                         co_return;
                     }
                     line_buffer.clear();
@@ -95,7 +95,6 @@ inline coro::AsyncGenerator<Line> async_plain_file_lines(
         ex = std::current_exception();
     }
 
-    co_await ::dftracer::utils::io::close(fd);
     if (ex) {
         std::rethrow_exception(ex);
     }
diff --git a/include/dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h b/include/dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h
index 7769ba5f..aa3b2a2f 100644
--- a/include/dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h
+++ b/include/dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h
@@ -2,6 +2,7 @@
 #define DFTRACER_UTILS_UTILITIES_FILEIO_LINES_SOURCES_ASYNC_STREAMING_GZ_LINE_GENERATOR_H
 
 #include <dftracer/utils/core/common/byte_view.h>
+#include <dftracer/utils/core/common/scoped_fd.h>
 #include <dftracer/utils/core/coro/async_generator.h>
 #include <dftracer/utils/core/io/io.h>
 #include <dftracer/utils/utilities/compression/zlib/streaming_decompressor_utility.h>
@@ -11,7 +12,6 @@
 #include <vector>
 
 namespace dftracer::utils::utilities::fileio::lines::sources {
-
 /**
  * @brief Async generator that yields lines from .gz files without an index.
  *
@@ -30,7 +30,7 @@ inline coro::AsyncGenerator<Line> async_streaming_gz_lines(
             "Cannot open compressed file: " + file_path +
             " (errno=" + std::to_string(static_cast<int>(-fd_result)) + ")");
     }
-    int fd = static_cast<int>(fd_result);
+    dftracer::utils::ScopedFd fd(static_cast<int>(fd_result));
 
     constexpr std::size_t READ_BUFFER_SIZE = 256 * 1024;  // 256KB
     std::vector<char> read_buffer(READ_BUFFER_SIZE);
@@ -46,7 +46,7 @@ inline coro::AsyncGenerator<Line> async_streaming_gz_lines(
     try {
         while (true) {
             ssize_t bytes_read = co_await ::dftracer::utils::io::pread(
-                fd, read_buffer.data(), READ_BUFFER_SIZE, file_offset);
+                fd.get(), read_buffer.data(), READ_BUFFER_SIZE, file_offset);
 
             if (bytes_read < 0) {
                 throw std::runtime_error(
@@ -94,7 +94,7 @@ inline coro::AsyncGenerator<Line> async_streaming_gz_lines(
                                           current_line);
                         }
                         if (end_line > 0 && current_line >= end_line) {
-                            co_await ::dftracer::utils::io::close(fd);
+                            fd.reset();
                             co_return;
                         }
                         line_buffer.clear();
@@ -110,7 +110,6 @@ inline coro::AsyncGenerator<Line> async_streaming_gz_lines(
         ex = std::current_exception();
     }
 
-    co_await ::dftracer::utils::io::close(fd);
     if (ex) {
         std::rethrow_exception(ex);
     }
diff --git a/include/dftracer/utils/utilities/fileio/lines/streaming_line_reader.h b/include/dftracer/utils/utilities/fileio/lines/streaming_line_reader.h
index c9f8b32f..8e669a53 100644
--- a/include/dftracer/utils/utilities/fileio/lines/streaming_line_reader.h
+++ b/include/dftracer/utils/utilities/fileio/lines/streaming_line_reader.h
@@ -3,6 +3,7 @@
 
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/fileio/lines/line_bytes_range.h>
 #include <dftracer/utils/utilities/fileio/lines/line_range.h>
 #include <dftracer/utils/utilities/fileio/lines/line_types.h>
@@ -25,7 +26,7 @@ namespace dftracer::utils::utilities::fileio::lines {
  * @code
  * auto config = StreamingLineReaderConfig()
  *     .with_file("file.gz")
- *     .with_index("file.gz.idx")
+ *     .with_index("trace-root/.dftindex")
  *     .with_line_range(1, 100);
  *
  * auto range = StreamingLineReader::read(config);
@@ -71,7 +72,7 @@ class StreamingLineReaderConfig {
  * appropriate line iterator. It supports:
  * - Indexed compressed files (.gz, .tar.gz) via Reader
  * - Plain text files
- * - Automatic index file detection
+ * - Automatic `.dftindex` detection for compressed files
  *
  * Usage:
  * @code
@@ -91,10 +92,10 @@ class StreamingLineReaderConfig {
 class StreamingLineReader {
    public:
     /**
-     * @brief Read lines from a file, auto-detecting format and index.
+     * @brief Read lines from a file, auto-detecting format and `.dftindex`.
      *
      * This method automatically:
-     * 1. Detects if an index file exists (.idx)
+     * 1. Detects if a `.dftindex` store exists
      * 2. Creates appropriate reader (indexed or plain)
      * 3. Returns a LineRange for streaming iteration
      *
@@ -105,16 +106,18 @@ class StreamingLineReader {
         const std::string& file_path = config.file_path();
         std::size_t start_line = config.start_line();
         std::size_t end_line = config.end_line();
-        const std::string& idx_path = config.index_path();
-        // Check if index file exists
-        std::string actual_idx_path =
-            idx_path.empty() ? file_path + ".idx" : idx_path;
-        bool has_index = fs::exists(actual_idx_path);
+        const std::string& index_path = config.index_path();
+        std::string actual_index_path = index_path;
+        if (actual_index_path.empty()) {
+            actual_index_path =
+                composites::dft::internal::determine_index_path(file_path, "");
+        }
+        bool has_index = fs::exists(actual_index_path);
 
         DFTRACER_UTILS_LOG_DEBUG(
-            "StreamingLineReader::read - file=%s, idx_path_param=%s, "
-            "actual_idx=%s, has_index=%d",
-            file_path.c_str(), idx_path.c_str(), actual_idx_path.c_str(),
+            "StreamingLineReader::read - file=%s, index_path_param=%s, "
+            "actual_index=%s, has_index=%d",
+            file_path.c_str(), index_path.c_str(), actual_index_path.c_str(),
             has_index);
 
         // Check file extension to determine if it's compressed
@@ -123,7 +126,7 @@ class StreamingLineReader {
         if (is_compressed && has_index) {
             auto iter_config =
                 sources::IndexedFileLineIteratorConfig().with_file(
-                    file_path, actual_idx_path);
+                    file_path, actual_index_path);
             if (start_line > 0 && end_line > 0) {
                 iter_config.with_line_range(start_line, end_line);
             }
@@ -143,7 +146,7 @@ class StreamingLineReader {
      * @brief Read lines from a file using indexed reader.
      *
      * @param file_path Path to the compressed file
-     * @param idx_path Path to the index file
+     * @param config Indexed reader configuration
      * @param start_line Starting line (1-based, inclusive), 0 means start
      * @param end_line Ending line (1-based, inclusive), 0 means end
      * @return LineRange for streaming iteration
@@ -185,24 +188,24 @@ class StreamingLineReader {
     static coro::AsyncGenerator<Line> read_async(
         const StreamingLineReaderConfig& config) {
         const std::string& file_path = config.file_path();
-        const std::string& idx_path = config.index_path();
+        const std::string& index_path = config.index_path();
         bool is_compressed = is_compressed_format(file_path);
 
         // Only use the indexed path when an index was explicitly
-        // provided.  Auto-discovering .idx files would silently
+        // provided. Auto-discovering `.dftindex` would silently
         // override callers that intentionally omit the index to
         // get single-pass streaming decompression.
         bool has_index = false;
-        std::string actual_idx_path;
-        if (!idx_path.empty()) {
-            actual_idx_path = idx_path;
-            has_index = fs::exists(actual_idx_path);
+        std::string actual_index_path;
+        if (!index_path.empty()) {
+            actual_index_path = index_path;
+            has_index = fs::exists(actual_index_path);
         }
 
         if (is_compressed && has_index) {
             auto iter_config =
                 sources::IndexedFileLineIteratorConfig().with_file(
-                    file_path, actual_idx_path);
+                    file_path, actual_index_path);
             if (config.start_line() > 0 || config.end_line() > 0) {
                 iter_config.with_line_range(config.start_line(),
                                             config.end_line());
@@ -238,7 +241,7 @@ class StreamingLineReader {
      * @brief Async read lines from compressed file without an index.
      *
      * Stream-decompresses the file and splits into lines in a single
-     * pass, avoiding the overhead of building a sidecar index.
+     * pass, avoiding the overhead of building a `.dftindex` store.
      */
     static coro::AsyncGenerator<Line> read_streaming_gz_async(
         const std::string& file_path, std::size_t start_line = 0,
diff --git a/include/dftracer/utils/utilities/fileio/types/chunk_spec.h b/include/dftracer/utils/utilities/fileio/types/chunk_spec.h
index c71e7532..7f17b36c 100644
--- a/include/dftracer/utils/utilities/fileio/types/chunk_spec.h
+++ b/include/dftracer/utils/utilities/fileio/types/chunk_spec.h
@@ -16,7 +16,7 @@ namespace dftracer::utils::utilities::fileio {
  */
 struct ChunkSpec {
     std::string file_path;
-    std::string idx_path;    // Empty for plain text files
+    std::string index_path;  // Empty for plain text files
     double size_mb;
     std::size_t start_byte;  // Starting byte offset (0-based)
     std::size_t end_byte;    // Ending byte offset (exclusive)
@@ -26,13 +26,13 @@ struct ChunkSpec {
     ChunkSpec(std::string path, std::string idx, double mb, std::size_t start,
               std::size_t end)
         : file_path(std::move(path)),
-          idx_path(std::move(idx)),
+          index_path(std::move(idx)),
           size_mb(mb),
           start_byte(start),
           end_byte(end) {}
 
     bool operator==(const ChunkSpec& other) const {
-        return file_path == other.file_path && idx_path == other.idx_path &&
+        return file_path == other.file_path && index_path == other.index_path &&
                size_mb == other.size_mb && start_byte == other.start_byte &&
                end_byte == other.end_byte;
     }
@@ -54,7 +54,7 @@ struct hash<dftracer::utils::utilities::fileio::ChunkSpec> {
                                spec) const noexcept {
         ::dftracer::utils::utilities::hash::HasherUtility hasher;
         hasher.update(spec.file_path);
-        hasher.update(spec.idx_path);
+        hasher.update(spec.index_path);
         hasher.update(spec.size_mb);
         hasher.update(spec.start_byte);
         hasher.update(spec.end_byte);
diff --git a/include/dftracer/utils/utilities/indexer/index_builder_utility.h b/include/dftracer/utils/utilities/indexer/index_builder_utility.h
index 1036ca6a..cf86a8af 100644
--- a/include/dftracer/utils/utilities/indexer/index_builder_utility.h
+++ b/include/dftracer/utils/utilities/indexer/index_builder_utility.h
@@ -43,7 +43,7 @@ struct IndexBuildConfig {
 
 struct IndexBuildResult {
     std::string file_path;
-    std::string idx_path;
+    std::string index_path;
     bool success = false;
     bool was_skipped = false;
     bool index_created = false;
diff --git a/include/dftracer/utils/utilities/indexer/index_database.h b/include/dftracer/utils/utilities/indexer/index_database.h
index 2be2cc31..76846a7c 100644
--- a/include/dftracer/utils/utilities/indexer/index_database.h
+++ b/include/dftracer/utils/utilities/indexer/index_database.h
@@ -1,13 +1,16 @@
 #ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INDEX_DATABASE_H
 #define DFTRACER_UTILS_UTILITIES_INDEXER_INDEX_DATABASE_H
 
-#include <dftracer/utils/core/sqlite/database.h>
+#include <dftracer/utils/core/rocksdb/database.h>
+#include <dftracer/utils/core/rocksdb/db_manager.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
+#include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
 
 #include <cstdint>
+#include <memory>
 #include <optional>
 #include <span>
 #include <string>
@@ -18,8 +21,8 @@
 namespace dftracer::utils::utilities::indexer {
 
 /**
- * @brief Unified .idx SQLite database combining checkpoint, bloom filter,
- *        and manifest data in a single sidecar file.
+ * @brief Unified `.dftindex` RocksDB store combining checkpoint, bloom
+ *        filter, manifest, and archive metadata.
  *
  * Schema is additive: call init_base_schema() always, then
  * init_bloom_schema() and/or init_manifest_schema() as needed.
@@ -44,8 +47,27 @@ class IndexDatabase {
     using ChunkDimensionStats = composites::dft::indexing::ChunkDimensionStats;
     using ChunkDimensionStatsResult =
         composites::dft::indexing::ChunkDimensionStatsResult;
-
-    explicit IndexDatabase(const std::string& idx_path);
+    using IndexerCheckpoint = internal::IndexerCheckpoint;
+    struct TarArchiveMetadata {
+        std::string archive_name;
+        std::uint64_t checkpoint_size = 0;
+        std::uint64_t total_lines = 0;
+        std::uint64_t total_uc_size = 0;
+        std::uint64_t total_files = 0;
+    };
+    struct TarFileRecord {
+        std::string file_name;
+        std::uint64_t file_size = 0;
+        std::uint64_t file_mtime = 0;
+        char typeflag = '\0';
+        std::uint64_t data_offset = 0;
+        std::uint64_t uncompressed_offset = 0;
+    };
+
+    explicit IndexDatabase(
+        const std::string& index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode open_mode =
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadWrite);
 
     IndexDatabase(const IndexDatabase&) = delete;
     IndexDatabase& operator=(const IndexDatabase&) = delete;
@@ -66,11 +88,16 @@ class IndexDatabase {
 
     int get_or_create_file_info(std::string_view path, std::uint64_t file_hash);
     int get_file_info_id(std::string_view path) const;
+    std::optional<std::uint64_t> get_file_hash(std::string_view path) const;
 
     // Convenience: resolve file path to file_id (handles logical path)
     int find_file(std::string_view file_path) const;
 
     // Metadata queries
+    void insert_file_metadata(int file_id, std::uint64_t checkpoint_size,
+                              std::uint64_t total_lines,
+                              std::uint64_t total_uc_size);
+    std::uint64_t get_checkpoint_size(int file_id) const;
     std::uint64_t get_num_lines(int file_id) const;
     std::uint64_t get_max_bytes(int file_id) const;
 
@@ -80,12 +107,7 @@ class IndexDatabase {
 
     void begin_transaction();
     void commit_transaction();
-
-    sqlite3* db() const { return db_.get(); }
-    dftracer::utils::sqlite::SqliteDatabase& sql_db() { return db_; }
-    const dftracer::utils::sqlite::SqliteDatabase& sql_db() const {
-        return db_;
-    }
+    void rollback_transaction() noexcept;
 
     // -----------------------------------------------------------------------
     // Bloom insert operations
@@ -111,6 +133,7 @@ class IndexDatabase {
 
     void insert_chunk_statistics(int file_id, std::uint64_t checkpoint_idx,
                                  const ChunkStatistics& stats);
+    void insert_checkpoint(int file_id, const IndexerCheckpoint& checkpoint);
 
     void insert_index_dimension(int file_id, std::string_view dimension);
 
@@ -121,6 +144,12 @@ class IndexDatabase {
     void insert_chunk_dimension_stats(int file_id, std::uint64_t checkpoint_idx,
                                       const ChunkDimensionStats& stats,
                                       std::size_t value_counts_cap = 4096);
+    void insert_tar_archive_metadata(int file_id, std::string_view archive_name,
+                                     std::uint64_t checkpoint_size,
+                                     std::uint64_t total_lines,
+                                     std::uint64_t total_uc_size,
+                                     std::uint64_t total_files);
+    void insert_tar_file(int file_id, const TarFileRecord& record);
 
     // -----------------------------------------------------------------------
     // Bloom query operations
@@ -146,6 +175,19 @@ class IndexDatabase {
 
     std::vector<ChunkStatisticsResult> query_chunk_statistics(
         int file_id) const;
+    bool find_checkpoint(int file_id, std::size_t target_offset,
+                         IndexerCheckpoint& checkpoint) const;
+    std::vector<IndexerCheckpoint> query_checkpoints(int file_id) const;
+    std::vector<IndexerCheckpoint> query_checkpoints_for_line_range(
+        int file_id, std::uint64_t start_line, std::uint64_t end_line) const;
+    std::optional<TarArchiveMetadata> query_tar_archive_metadata(
+        int file_id) const;
+    std::vector<TarFileRecord> query_tar_files(int file_id) const;
+    bool find_tar_file(int file_id, std::string_view file_name,
+                       TarFileRecord& record) const;
+    std::vector<TarFileRecord> query_tar_files_in_range(
+        int file_id, std::uint64_t start_offset,
+        std::uint64_t end_offset) const;
 
     TimeBounds query_time_bounds(int file_id) const;
 
@@ -215,7 +257,12 @@ class IndexDatabase {
     void delete_metadata_lines(int file_id);
 
    private:
-    dftracer::utils::sqlite::SqliteDatabase db_;
+    void delete_file_data(int file_id);
+
+    std::string db_path_;
+    dftracer::utils::rocksdb::RocksDatabase::OpenMode open_mode_;
+    std::shared_ptr<dftracer::utils::rocksdb::RocksDatabase> db_;
+    std::unique_ptr<dftracer::utils::rocksdb::RocksDatabase::Batch> txn_batch_;
 };
 
 }  // namespace dftracer::utils::utilities::indexer
diff --git a/include/dftracer/utils/utilities/indexer/internal/indexer.h b/include/dftracer/utils/utilities/indexer/internal/indexer.h
index 68016cb5..dbc28ff3 100644
--- a/include/dftracer/utils/utilities/indexer/internal/indexer.h
+++ b/include/dftracer/utils/utilities/indexer/internal/indexer.h
@@ -12,7 +12,7 @@ typedef void *dft_indexer_handle_t;
 
 // C API function declarations
 dft_indexer_handle_t dft_indexer_create(const char *gz_path,
-                                        const char *idx_path,
+                                        const char *index_path,
                                         uint64_t checkpoint_size,
                                         int force_rebuild);
 int dft_indexer_build(dft_indexer_handle_t indexer);
@@ -65,7 +65,7 @@ class Indexer {
     virtual void set_visitors(VisitorList visitors) { (void)visitors; }
 
     // Metadata accessors
-    virtual const std::string &get_idx_path() const = 0;
+    virtual const std::string &get_index_path() const = 0;
     virtual const std::string &get_archive_path() const = 0;
     virtual std::uint64_t get_checkpoint_size() const = 0;
     virtual std::uint64_t get_max_bytes() const = 0;
diff --git a/include/dftracer/utils/utilities/indexer/internal/indexer_factory.h b/include/dftracer/utils/utilities/indexer/internal/indexer_factory.h
index 4ad42a21..63ec35ad 100644
--- a/include/dftracer/utils/utilities/indexer/internal/indexer_factory.h
+++ b/include/dftracer/utils/utilities/indexer/internal/indexer_factory.h
@@ -22,15 +22,15 @@ class IndexerFactory {
      * appropriate indexer.
      *
      * @param archive_path Path to the archive file (.gz or .tar.gz)
-     * @param idx_path Path to the index file (optional - will be auto-generated
-     * if empty)
+     * @param index_path Path to the `.dftindex` store (optional - will be
+     * auto-generated if empty)
      * @param checkpoint_size Checkpoint size in bytes
      * @param force Force rebuilding the index even if it exists
      * @return Shared pointer to the appropriate indexer, or nullptr if format
      * not supported
      */
     static std::shared_ptr<Indexer> create(
-        const std::string &archive_path, const std::string &idx_path = "",
+        const std::string &archive_path, const std::string &index_path = "",
         std::uint64_t checkpoint_size =
             constants::indexer::DEFAULT_CHECKPOINT_SIZE,
         bool force = false);
diff --git a/include/dftracer/utils/utilities/indexer/internal/scan_prefix.h b/include/dftracer/utils/utilities/indexer/internal/scan_prefix.h
new file mode 100644
index 00000000..a0118303
--- /dev/null
+++ b/include/dftracer/utils/utilities/indexer/internal/scan_prefix.h
@@ -0,0 +1,38 @@
+#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SCAN_PREFIX_H
+#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SCAN_PREFIX_H
+
+#include <dftracer/utils/utilities/indexer/internal/error.h>
+#include <rocksdb/iterator.h>
+#include <rocksdb/slice.h>
+#include <rocksdb/status.h>
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+namespace dftracer::utils::utilities::indexer::internal {
+
+template <typename IteratorFactory, typename Fn>
+void scan_prefix_iterator(std::string_view error_message,
+                          std::string_view prefix,
+                          IteratorFactory&& make_iterator, Fn&& fn) {
+    auto it = make_iterator();
+    for (it->Seek(::rocksdb::Slice(prefix.data(), prefix.size()));
+         it->Valid() && std::string_view(it->key().data(), it->key().size())
+                            .starts_with(prefix);
+         it->Next()) {
+        fn(*it);
+    }
+
+    const auto status = it->status();
+    if (!status.ok()) {
+        throw IndexerError(
+            IndexerError::Type::DATABASE_ERROR,
+            std::string(error_message) + ": " + status.ToString());
+    }
+}
+
+}  // namespace dftracer::utils::utilities::indexer::internal
+
+#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SCAN_PREFIX_H
diff --git a/include/dftracer/utils/utilities/indexer/provenance_database.h b/include/dftracer/utils/utilities/indexer/provenance_database.h
index 1b6dd75a..974eb771 100644
--- a/include/dftracer/utils/utilities/indexer/provenance_database.h
+++ b/include/dftracer/utils/utilities/indexer/provenance_database.h
@@ -1,10 +1,12 @@
 #ifndef DFTRACER_UTILS_UTILITIES_INDEXER_PROVENANCE_DATABASE_H
 #define DFTRACER_UTILS_UTILITIES_INDEXER_PROVENANCE_DATABASE_H
 
-#include <dftracer/utils/core/sqlite/database.h>
+#include <dftracer/utils/core/rocksdb/database.h>
+#include <dftracer/utils/core/rocksdb/db_manager.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
 
 #include <cstdint>
+#include <memory>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -12,12 +14,11 @@
 namespace dftracer::utils::utilities::indexer {
 
 /**
- * @brief Manages the .pidx SQLite database for provenance indices.
+ * @brief Manages provenance data in the shared `.dftindex` RocksDB store.
  *
- * Sidecar database that records the full reorganization provenance of
+ * Shared index data that records the full reorganization provenance of
  * an output file: which source files contributed, which checkpoints,
  * and which line ranges map to which output lines.
- * Path convention: file.pfw.gz -> file.pfw.gz.pidx
  *
  * Schema:
  *   - file_info: output file identity (path + hash)
@@ -34,7 +35,10 @@ class ProvenanceDatabase {
     using ProvenanceSegment =
         composites::dft::indexing::queries::ProvenanceSegment;
 
-    explicit ProvenanceDatabase(const std::string& pidx_path);
+    explicit ProvenanceDatabase(
+        const std::string& provenance_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode open_mode =
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadWrite);
 
     ProvenanceDatabase(const ProvenanceDatabase&) = delete;
     ProvenanceDatabase& operator=(const ProvenanceDatabase&) = delete;
@@ -49,24 +53,24 @@ class ProvenanceDatabase {
 
     int get_file_info_id(const std::string& path) const;
 
-    dftracer::utils::sqlite::SqliteDatabase& db() { return db_; }
-    const dftracer::utils::sqlite::SqliteDatabase& db() const { return db_; }
-
     void begin_transaction();
     void commit_transaction();
+    void rollback_transaction() noexcept;
 
     // -----------------------------------------------------------------------
     // Provenance insert operations
     // -----------------------------------------------------------------------
 
-    void insert_info(std::string_view key, std::string_view value);
+    void insert_info(int file_info_id, std::string_view key,
+                     std::string_view value);
 
     void insert_source(int file_info_id, int source_idx, std::string_view path,
                        int num_checkpoints, std::string_view event_hash = "");
 
-    void insert_group(std::string_view name, std::string_view predicate);
+    void insert_group(int file_info_id, std::string_view name,
+                      std::string_view predicate);
 
-    void insert_segment(int source_idx, int source_checkpoint,
+    void insert_segment(int file_info_id, int source_idx, int source_checkpoint,
                         int output_line_start, int output_line_end,
                         int event_count);
 
@@ -76,22 +80,26 @@ class ProvenanceDatabase {
 
     std::vector<ProvenanceSource> query_sources(int file_info_id) const;
 
-    std::vector<ProvenanceSegment> query_segments(int source_idx) const;
+    std::vector<ProvenanceSegment> query_segments(int file_info_id,
+                                                  int source_idx) const;
 
-    std::vector<ProvenanceSegment> query_all_segments() const;
+    std::vector<ProvenanceSegment> query_all_segments(int file_info_id) const;
 
-    std::string query_info(std::string_view key) const;
+    std::string query_info(int file_info_id, std::string_view key) const;
 
-    std::string query_group_name() const;
+    std::string query_group_name(int file_info_id) const;
 
-    std::string query_group_predicate() const;
+    std::string query_group_predicate(int file_info_id) const;
 
    private:
-    dftracer::utils::sqlite::SqliteDatabase db_;
+    std::string db_path_;
+    dftracer::utils::rocksdb::RocksDatabase::OpenMode open_mode_;
+    std::shared_ptr<dftracer::utils::rocksdb::RocksDatabase> db_;
+    std::unique_ptr<dftracer::utils::rocksdb::RocksDatabase::Batch> txn_batch_;
 };
 
 /**
- * @brief Determine the provenance index (.pidx) path for a given data file.
+ * @brief Determine the shared `.dftindex` provenance root for a data file.
  */
 std::string determine_provenance_index_path(const std::string& data_path,
                                             const std::string& index_dir = "");
diff --git a/include/dftracer/utils/utilities/reader/internal/reader.h b/include/dftracer/utils/utilities/reader/internal/reader.h
index f23f88e6..bd464f00 100644
--- a/include/dftracer/utils/utilities/reader/internal/reader.h
+++ b/include/dftracer/utils/utilities/reader/internal/reader.h
@@ -18,7 +18,8 @@ typedef void *dft_indexer_handle_t;
  * Opaque handle for DFT reader
  */
 typedef void *dft_reader_handle_t;
-dft_reader_handle_t dft_reader_create(const char *gz_path, const char *idx_path,
+dft_reader_handle_t dft_reader_create(const char *gz_path,
+                                      const char *index_path,
                                       size_t index_ckpt_size);
 dft_reader_handle_t dft_reader_create_with_indexer(
     dft_indexer_handle_t indexer);
@@ -73,7 +74,7 @@ class Reader {
     virtual std::size_t get_max_bytes() const = 0;
     virtual std::size_t get_num_lines() const = 0;
     virtual const std::string &get_archive_path() const = 0;
-    virtual const std::string &get_idx_path() const = 0;
+    virtual const std::string &get_index_path() const = 0;
     virtual void set_buffer_size(std::size_t size) = 0;
 
     // Estimate line count for a byte range (for pre-allocation)
diff --git a/include/dftracer/utils/utilities/reader/internal/reader_factory.h b/include/dftracer/utils/utilities/reader/internal/reader_factory.h
index 9986467e..4413440b 100644
--- a/include/dftracer/utils/utilities/reader/internal/reader_factory.h
+++ b/include/dftracer/utils/utilities/reader/internal/reader_factory.h
@@ -19,7 +19,7 @@ class ReaderFactory {
      * Create a reader for any supported archive format (returns Reader)
      */
     static std::shared_ptr<Reader> create(
-        const std::string &archive_path, const std::string &idx_path,
+        const std::string &archive_path, const std::string &index_path,
         std::size_t index_ckpt_size = dftracer::utils::utilities::indexer::
             internal::Indexer::DEFAULT_CHECKPOINT_SIZE);
 
diff --git a/include/dftracer/utils/utilities/reader/trace_reader.h b/include/dftracer/utils/utilities/reader/trace_reader.h
index b2723b0f..ccd2341a 100644
--- a/include/dftracer/utils/utilities/reader/trace_reader.h
+++ b/include/dftracer/utils/utilities/reader/trace_reader.h
@@ -20,7 +20,7 @@ using fileio::lines::Line;
 /// File-level configuration for TraceReader.
 struct TraceReaderConfig {
     std::string file_path;  ///< Path to trace file (.pfw.gz or plain).
-    std::string index_dir;  ///< Directory for .idx sidecar files.
+    std::string index_dir;  ///< Directory containing `.dftindex` roots.
     std::size_t checkpoint_size = 32 * 1024 * 1024;  ///< Checkpoint interval.
     bool auto_build_index = false;  ///< Auto-build index if missing.
     std::size_t index_threshold =
@@ -62,7 +62,7 @@ class TraceReader {
     coro::AsyncGenerator<std::span<const char>> read_raw(
         ReadConfig config = {});
 
-    /// True if an .idx sidecar was found at construction time.
+    /// True if a `.dftindex` database was found at construction time.
     bool has_index() const;
     /// Decompressed size (0 if no index for compressed files).
     std::size_t get_max_bytes();
@@ -72,7 +72,7 @@ class TraceReader {
    private:
     TraceReaderConfig config_;
     bool has_index_ = false;
-    std::string idx_path_;
+    std::string index_path_;
     ArchiveFormat format_ = ArchiveFormat::UNKNOWN;
     std::size_t cached_max_bytes_ = 0;
     std::size_t cached_num_lines_ = 0;
diff --git a/python/dftracer/utils/dftracer_utils_ext.pyi b/python/dftracer/utils/dftracer_utils_ext.pyi
index b8c02533..c9324a74 100644
--- a/python/dftracer/utils/dftracer_utils_ext.pyi
+++ b/python/dftracer/utils/dftracer_utils_ext.pyi
@@ -17,12 +17,12 @@ class IndexerCheckpoint:
     num_lines: int
 
 class Indexer:
-    """Indexer for creating and managing gzip file indices."""
+    """Indexer for creating and managing root-local ``.dftindex`` stores."""
 
     def __init__(
         self,
         gz_path: str,
-        idx_path: Optional[str] = None,
+        index_path: Optional[str] = None,
         checkpoint_size: int = 1048576,
         force_rebuild: bool = False,
         build_bloom: bool = False,
@@ -34,7 +34,8 @@ class Indexer:
 
         Args:
             gz_path: Path to the gzip trace file.
-            idx_path: Path to the index file. If None, uses gz_path + ".idx".
+            index_path: Path to the `.dftindex` store. If None, uses the
+                root-local `.dftindex` next to ``gz_path``.
             checkpoint_size: Checkpoint size in bytes for index building.
             force_rebuild: If True, rebuild the index even if it exists.
             build_bloom: If True, build bloom filter data in the index.
@@ -56,7 +57,7 @@ class Indexer:
         ...
 
     def exists(self) -> bool:
-        """Check if the index file exists."""
+        """Check if the `.dftindex` store exists."""
         ...
 
     def get_max_bytes(self) -> int:
@@ -75,14 +76,22 @@ class Indexer:
         """Find checkpoint for target offset."""
         ...
 
+    def close(self) -> None:
+        """Release this Python wrapper's native indexer handle.
+
+        This does not force-close the shared RocksDB instance for the same
+        ``.dftindex`` path.
+        """
+        ...
+
     @property
     def gz_path(self) -> str:
         """Get gzip path."""
         ...
 
     @property
-    def idx_path(self) -> str:
-        """Get index path."""
+    def index_path(self) -> str:
+        """Get the `.dftindex` path."""
         ...
 
     @property
@@ -92,12 +101,12 @@ class Indexer:
 
     @property
     def has_bloom(self) -> bool:
-        """Whether bloom filter data exists in the index sidecar."""
+        """Whether bloom filter data exists in the `.dftindex` store."""
         ...
 
     @property
     def has_manifest(self) -> bool:
-        """Whether manifest data exists in the index sidecar."""
+        """Whether manifest data exists in the `.dftindex` store."""
         ...
 
     def __enter__(self) -> "Indexer":
@@ -110,7 +119,11 @@ class Indexer:
         exc_val: Optional[BaseException],
         exc_tb: Optional[TracebackType],
     ) -> None:
-        """Exit the runtime context for the with statement."""
+        """Release this Python wrapper on context exit.
+
+        This does not force-close the shared RocksDB instance for the same
+        ``.dftindex`` path.
+        """
         ...
 
 # ========== JSON ==========
@@ -321,7 +334,7 @@ class TraceReader:
 
         Args:
             file_path: Path to the trace file (.pfw.gz or plain text).
-            index_dir: Directory to search for ``.idx`` sidecar files.
+            index_dir: Directory to search for ``.dftindex`` stores.
                 Empty string (default) searches next to the trace file.
             checkpoint_size: Checkpoint interval in bytes for index
                 building (default 32 MB).
@@ -514,7 +527,7 @@ class TraceReader:
 
     @property
     def index_dir(self) -> str:
-        """Directory searched for index sidecar files."""
+        """Directory searched for `.dftindex` stores."""
         ...
 
     @property
diff --git a/setup.py b/setup.py
index 5b69509e..1087223d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from setuptools import setup
-
 from setuptools_scm import ScmVersion
 
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7f68597a..14b9493a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,7 +5,8 @@
 add_rpath()
 
 need_zlib()
-need_sqlite3()
+need_lz4()
+need_rocksdb()
 need_argparse()
 need_ghc_filesystem()
 need_cpplogger()
@@ -31,6 +32,7 @@ set(DFTRACER_UTILS_CORE_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/common/constants.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/common/format_detector.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/common/filesystem.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/env.cpp
     # Utilities
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/utils/timer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/utils/string.cpp
@@ -56,12 +58,12 @@ set(DFTRACER_UTILS_CORE_SOURCES
     # Tasks
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/tasks/task.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/tasks/task_result.cpp
-    # SQLite
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/sqlite/error.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/sqlite/database.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/sqlite/statement.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/sqlite/vfs.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/sqlite/async.cpp
+    # RocksDB
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/rocksdb/database.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/rocksdb/filesystem.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/rocksdb/key_codec.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/rocksdb/async.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/core/rocksdb/db_manager.cpp
 )
 
 # Conditionally add io_uring backend sources
@@ -126,37 +128,6 @@ set(DFTRACER_UTILS_UTILITIES_SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_bloom_filter.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_file_bloom_filter.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_statistics.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_index_dimension.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_hash_resolution.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters_batch.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filter.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filters_batch.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_index_dimensions.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_hash_by_resolved.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_resolved_by_hash.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_bloom_filters.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_file_bloom_filter.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_statistics.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_time_bounds.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_statistics.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_dimension_stats.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_dimension_stats.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_dimension_stats.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_hash_resolutions.cpp
-    # Manifest index queries
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_event_range.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_metadata_lines.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_event_ranges.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_metadata_lines.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_event_ranges.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/delete_metadata_lines.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/insert_provenance.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/indexing/queries/query_provenance.cpp
-
     # DFT Reorganization
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/composites/dft/reorganize/reconstruction_planner.cpp
@@ -197,33 +168,12 @@ set(DFTRACER_UTILS_UTILITIES_SOURCES
     # Indexer factory
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/indexer_factory.cpp
     # GZIP indexer
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/delete_file_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_checkpoint_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_metadata_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoints.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_file_id.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_max_bytes.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_num_lines.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_schema_validity.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_stored_file_info.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint_size.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/gzip/constants.cpp
     # TAR indexer
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/tar_parser.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/constants.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/insert_file_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_metadata_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_file_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_checkpoint_record.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/query_archive_id.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_files.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_checkpoints.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/indexer/internal/tar/queries/query_metadata.cpp
     # Reader
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/reader/internal/reader_c.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/dftracer/utils/utilities/reader/internal/stream_c.cpp
@@ -279,14 +229,6 @@ set(PKG_CONFIG_LIBS_PRIVATE "")
 # Add Threads (always required)
 set(PKG_CONFIG_LIBS_PRIVATE "${PKG_CONFIG_LIBS_PRIVATE} -lpthread")
 
-# Only add sqlite3 to pkg-config requirements if it was found on system
-if(SQLite3_FOUND AND NOT SQLite3_CPM)
-  set(PKG_CONFIG_REQUIRES "${PKG_CONFIG_REQUIRES} sqlite3")
-  set(PKG_CONFIG_LIBS_PRIVATE "${PKG_CONFIG_LIBS_PRIVATE} -lsqlite3")
-else()
-  set(PKG_CONFIG_LIBS_PRIVATE "${PKG_CONFIG_LIBS_PRIVATE} -lsqlite3")
-endif()
-
 # Only add zlib to pkg-config requirements if it was found on system
 if(ZLIB_FOUND AND NOT ZLIB_CPM)
   set(PKG_CONFIG_LIBS_PRIVATE "${PKG_CONFIG_LIBS_PRIVATE} -lz")
@@ -373,7 +315,7 @@ foreach(variant shared static)
     # Link dependencies using helper functions
     link_cpp_logger(dftracer_utils_core_${variant} ${VARIANT_UPPER})
     link_yyjson(dftracer_utils_core_${variant} ${VARIANT_UPPER})
-    link_sqlite3(dftracer_utils_core_${variant} ${VARIANT_UPPER})
+    link_rocksdb(dftracer_utils_core_${variant} ${VARIANT_UPPER})
     link_zlib(dftracer_utils_core_${variant} ${VARIANT_UPPER})
 
     # Add stdfs if needed
@@ -822,6 +764,7 @@ if(DFTRACER_UTILS_BUILD_BINARIES)
     set_target_properties(
       ${bin_exec} PROPERTIES OUTPUT_NAME "${bin_exec}" RUNTIME_OUTPUT_DIRECTORY
                                                        ${CMAKE_BINARY_DIR}/bin)
+    target_add_rpath(${bin_exec})
 
     # Link to unified library (which includes both core and utilities)
     target_link_libraries(${bin_exec} PRIVATE dftracer_utils argparse::argparse)
@@ -867,6 +810,7 @@ if(DFTRACER_UTILS_BUILD_EXAMPLES)
 
     add_executable(${example_name} ${example})
     set_target_properties(${example_name} PROPERTIES OUTPUT_NAME "${example_name}")
+    target_add_rpath(${example_name})
     target_link_libraries(
       ${example_name} PRIVATE dftracer_utils argparse::argparse)
 
diff --git a/src/dftracer/utils/binaries/dftracer_aggregator.cpp b/src/dftracer/utils/binaries/dftracer_aggregator.cpp
index 1a86b51a..c511cabe 100644
--- a/src/dftracer/utils/binaries/dftracer_aggregator.cpp
+++ b/src/dftracer/utils/binaries/dftracer_aggregator.cpp
@@ -265,6 +265,17 @@ static coro::CoroTask<int> run_aggregator(argparse::ArgumentParser& program) {
     EventAggregatorUtility merger;
     std::atomic<int> global_chunk_idx{0};
 
+    if (force_rebuild && !input_files.empty()) {
+        const std::string shared_index_path =
+            composites::dft::internal::determine_index_path(input_files.front(),
+                                                            index_dir);
+        if (fs::exists(shared_index_path)) {
+            DFTRACER_UTILS_LOG_INFO("Clearing shared index store: %s",
+                                    shared_index_path.c_str());
+            fs::remove_all(shared_index_path);
+        }
+    }
+
     // Streaming aggregation: file producers -> chunk workers -> merger
     auto streaming_task = make_task(
         [&](CoroScope& ctx) -> coro::CoroTask<void> {
@@ -283,23 +294,24 @@ static coro::CoroTask<int> run_aggregator(argparse::ArgumentParser& program) {
                                     -> coro::CoroTask<void> {
                         [[maybe_unused]] auto producer_guard = ch.guard();
                         // Build index
-                        std::string idx_path =
+                        std::string index_path =
                             composites::dft::internal::determine_index_path(
                                 file_path, index_dir);
                         auto idx_input =
                             indexer::IndexBuildConfig::for_file(file_path)
                                 .with_checkpoint_size(checkpoint_size)
-                                .with_force_rebuild(force_rebuild)
+                                .with_force_rebuild(false)
                                 .with_index_dir(index_dir);
-                        indexer::IndexBuilderUtility{}.process(idx_input);
+                        co_await indexer::IndexBuilderUtility{}.process(
+                            idx_input);
 
                         // Collect metadata
                         auto meta_input =
                             composites::dft::MetadataCollectorUtilityInput::
                                 from_file(file_path)
                                     .with_checkpoint_size(checkpoint_size)
-                                    .with_force_rebuild(force_rebuild)
-                                    .with_index(idx_path);
+                                    .with_force_rebuild(false)
+                                    .with_index(index_path);
                         auto metadata =
                             co_await composites::dft::MetadataCollectorUtility{}
                                 .process(meta_input);
diff --git a/src/dftracer/utils/binaries/dftracer_comparator.cpp b/src/dftracer/utils/binaries/dftracer_comparator.cpp
index 2b2743a5..a9cd9750 100644
--- a/src/dftracer/utils/binaries/dftracer_comparator.cpp
+++ b/src/dftracer/utils/binaries/dftracer_comparator.cpp
@@ -79,7 +79,7 @@ static coro::CoroTask<EventAggregatorUtilityOutput> run_aggregation(
                                     -> coro::CoroTask<void> {
                         [[maybe_unused]] auto producer_guard = ch.guard();
 
-                        std::string idx_path =
+                        std::string index_path =
                             composites::dft::internal::determine_index_path(
                                 file_path, index_dir);
 
@@ -88,7 +88,7 @@ static coro::CoroTask<EventAggregatorUtilityOutput> run_aggregation(
                                 from_file(file_path)
                                     .with_checkpoint_size(checkpoint_size)
                                     .with_force_rebuild(force_rebuild)
-                                    .with_index(idx_path);
+                                    .with_index(index_path);
                         auto metadata =
                             co_await composites::dft::MetadataCollectorUtility{}
                                 .process(meta_input);
@@ -288,8 +288,19 @@ static coro::CoroTask<int> run_comparator(argparse::ArgumentParser& program) {
         co_return 1;
     }
 
-    // Build indexes upfront so parallel aggregation doesn't race on .idx
+    // Build indexes upfront so parallel aggregation doesn't race on
+    // `.dftindex`.
     {
+        if (config.force_rebuild && !baseline_files.empty()) {
+            const std::string shared_index_path =
+                composites::dft::internal::determine_index_path(
+                    baseline_files.front(), config.index_dir);
+            if (fs::exists(shared_index_path)) {
+                DFTRACER_UTILS_LOG_INFO("Clearing shared index store: %s",
+                                        shared_index_path.c_str());
+                fs::remove_all(shared_index_path);
+            }
+        }
         std::unordered_set<std::string> seen;
         std::vector<std::string> all_files;
         for (const auto& f : baseline_files) {
@@ -306,7 +317,7 @@ static coro::CoroTask<int> run_comparator(argparse::ArgumentParser& program) {
             idx_configs.push_back(
                 indexer::IndexBuildConfig::for_file(file_path)
                     .with_checkpoint_size(config.checkpoint_size)
-                    .with_force_rebuild(config.force_rebuild)
+                    .with_force_rebuild(false)
                     .with_index_dir(config.index_dir));
         }
         std::vector<coro::CoroTask<indexer::IndexBuildResult>> idx_tasks;
diff --git a/src/dftracer/utils/binaries/dftracer_event_count.cpp b/src/dftracer/utils/binaries/dftracer_event_count.cpp
index 27b00836..c5e91c21 100644
--- a/src/dftracer/utils/binaries/dftracer_event_count.cpp
+++ b/src/dftracer/utils/binaries/dftracer_event_count.cpp
@@ -127,7 +127,6 @@ static coro::CoroTask<int> run_event_count(argparse::ArgumentParser& program) {
                 auto* total_events_ptr = &total_events;
                 auto* files_processed_ptr = &files_processed;
                 auto* is_approximate_ptr = &is_approximate;
-
                 auto file_chan =
                     coro::make_channel<std::size_t>(executor_threads * 2);
 
@@ -162,19 +161,19 @@ static coro::CoroTask<int> run_event_count(argparse::ArgumentParser& program) {
                             co_await builder.process(config);
 
                             // Read event count from index
-                            std::string idx_path =
+                            std::string index_path =
                                 fp + constants::indexer::EXTENSION;
                             if (!index_dir.empty()) {
                                 auto fname = fs::path(fp).filename();
-                                idx_path =
+                                index_path =
                                     (fs::path(index_dir) / fname).string() +
                                     constants::indexer::EXTENSION;
                             }
 
-                            if (fs::exists(idx_path)) {
+                            if (fs::exists(index_path)) {
                                 try {
                                     utilities::indexer::IndexDatabase db(
-                                        idx_path);
+                                        index_path);
                                     int fid = db.find_file(fp);
                                     if (fid >= 0) {
                                         if (!db.has_bloom_data(fid)) {
diff --git a/src/dftracer/utils/binaries/dftracer_gen_fake_trace.cpp b/src/dftracer/utils/binaries/dftracer_gen_fake_trace.cpp
index 19540409..eb3f4c45 100644
--- a/src/dftracer/utils/binaries/dftracer_gen_fake_trace.cpp
+++ b/src/dftracer/utils/binaries/dftracer_gen_fake_trace.cpp
@@ -241,7 +241,7 @@ static coro::CoroTask<int> run_verify(
         std::string abs_path = fs::absolute(file_path).string();
 
         // 1. Build gzip index
-        std::string idx_path = internal::determine_index_path(abs_path, "");
+        std::string index_path = internal::determine_index_path(abs_path, "");
         auto idx_input = IndexBuildConfig::for_file(abs_path)
                              .with_checkpoint_size(ckpt_size)
                              .with_force_rebuild(true);
@@ -251,7 +251,7 @@ static coro::CoroTask<int> run_verify(
         auto meta_input = MetadataCollectorUtilityInput::from_file(abs_path)
                               .with_checkpoint_size(ckpt_size)
                               .with_force_rebuild(false)
-                              .with_index(idx_path);
+                              .with_index(index_path);
         auto metadata = co_await MetadataCollectorUtility{}.process(meta_input);
 
         if (!metadata.success) {
@@ -307,7 +307,7 @@ static coro::CoroTask<int> run_verify(
             for (const auto& chunk : chunks) {
                 ChunkIndexerInput ci;
                 ci.with_file_path(abs_path)
-                    .with_idx_path(idx_path)
+                    .with_index_path(index_path)
                     .with_checkpoint_size(ckpt_size)
                     .with_checkpoint_idx(chunk.idx)
                     .with_byte_range(chunk.start, chunk.end)
diff --git a/src/dftracer/utils/binaries/dftracer_index.cpp b/src/dftracer/utils/binaries/dftracer_index.cpp
index 47488222..249f262a 100644
--- a/src/dftracer/utils/binaries/dftracer_index.cpp
+++ b/src/dftracer/utils/binaries/dftracer_index.cpp
@@ -130,7 +130,6 @@ static coro::CoroTask<int> run_index(argparse::ArgumentParser& program) {
                 auto* all_dims_ptr = &all_dimensions;
                 auto* files_ptr = &input_files;
                 auto* index_dir_ptr = &index_dir;
-
                 // Bounded fan-out: channel limits concurrent file processing
                 // to avoid memory pressure from unbounded coroutine spawning.
                 auto file_chan =
@@ -247,7 +246,7 @@ int main(int argc, char** argv) {
                                      DFTRACER_UTILS_PACKAGE_VERSION);
     program.add_description(
         "Build per-chunk bloom filter indices for DFTracer trace files. "
-        "Creates .idx sidecar databases enabling fast chunk-skipping "
+        "Creates root-local .dftindex databases enabling fast chunk-skipping "
         "queries.");
 
     program.add_argument("-d", "--directory")
@@ -278,7 +277,7 @@ int main(int argc, char** argv) {
             static_cast<std::size_t>(dftracer_utils_hardware_concurrency()));
 
     program.add_argument("--index-dir")
-        .help("Directory to store index files (default: same as data files)")
+        .help("Directory where .dftindex stores are created")
         .default_value<std::string>("");
 
     program.add_argument("--expected-entries")
@@ -300,7 +299,7 @@ int main(int argc, char** argv) {
 
     program.add_argument("--manifest")
         .help(
-            "Also build .idx manifest index "
+            "Also build manifest data in the .dftindex store "
             "(per-checkpoint event line routing)")
         .flag();
 
diff --git a/src/dftracer/utils/binaries/dftracer_info.cpp b/src/dftracer/utils/binaries/dftracer_info.cpp
index 7c2dc03d..7c4a7191 100644
--- a/src/dftracer/utils/binaries/dftracer_info.cpp
+++ b/src/dftracer/utils/binaries/dftracer_info.cpp
@@ -49,7 +49,7 @@ static std::string format_size(std::uint64_t bytes) {
     return oss.str();
 }
 
-/// Fast path: read metadata from the .idx database.
+/// Fast path: read metadata from the `.dftindex` database.
 /// Returns success=false if index doesn't exist, letting the caller
 /// fall back to direct_scan_info for small/unindexed files.
 static MetadataCollectorUtilityOutput index_based_info(
@@ -60,13 +60,13 @@ static MetadataCollectorUtilityOutput index_based_info(
     meta.file_path = file_path;
 
     try {
-        std::string idx_path = file_path + constants::indexer::EXTENSION;
-        if (!fs::exists(idx_path)) {
+        std::string index_path = file_path + constants::indexer::EXTENSION;
+        if (!fs::exists(index_path)) {
             meta.success = false;
             return meta;
         }
 
-        IndexDatabase db(idx_path);
+        IndexDatabase db(index_path);
         int fid = db.find_file(file_path);
         if (fid < 0) {
             meta.success = false;
@@ -78,6 +78,7 @@ static MetadataCollectorUtilityOutput index_based_info(
         meta.num_lines = db.get_num_lines(fid);
         meta.uncompressed_size = db.get_max_bytes(fid);
         meta.valid_events = db.get_total_events(fid);
+        meta.index_path = index_path;
         meta.has_index = true;
         meta.index_valid = true;
         meta.size_mb =
@@ -97,7 +98,7 @@ static MetadataCollectorUtilityOutput index_based_info(
 }
 
 /// One streaming decompress pass, count lines with JSON validation,
-/// no sidecar index created.
+/// without creating a `.dftindex` store.
 static coro::CoroTask<MetadataCollectorUtilityOutput> direct_scan_info(
     std::string file_path) {
     using dftracer::utils::utilities::fileio::lines::sources::
@@ -214,9 +215,9 @@ static void print_file_info(const MetadataCollectorUtilityOutput& info,
     if (info.format == ArchiveFormat::GZIP ||
         info.format == ArchiveFormat::TAR_GZ) {
         std::printf("\nIndex Information:\n");
-        std::printf("  Index File: %s\n", info.idx_path.empty()
-                                              ? "(auto-generated)"
-                                              : info.idx_path.c_str());
+        std::printf("  Index Store: %s\n", info.index_path.empty()
+                                               ? "(auto-generated)"
+                                               : info.index_path.c_str());
         std::printf("  Index Status: %s\n",
                     info.has_index ? (info.index_valid ? "Valid" : "Invalid")
                                    : "Not Created");
@@ -239,8 +240,8 @@ static void print_file_info(const MetadataCollectorUtilityOutput& info,
                             (unsigned long long)lines_per_checkpoint);
 
                 // Calculate index overhead
-                if (fs::exists(info.idx_path)) {
-                    std::uint64_t index_size = fs::file_size(info.idx_path);
+                if (fs::exists(info.index_path)) {
+                    std::uint64_t index_size = fs::file_size(info.index_path);
                     double index_overhead =
                         100.0 * static_cast<double>(index_size) /
                         static_cast<double>(info.compressed_size);
@@ -404,7 +405,7 @@ int main(int argc, char** argv) {
         }
     }
 
-    // Small files skip indexing to avoid creating sidecar files on
+    // Small files skip indexing to avoid creating `.dftindex` stores on
     // metadata-sensitive filesystems (e.g. Lustre).
     static constexpr std::size_t INDEX_SIZE_THRESHOLD =
         constants::indexer::DEFAULT_INDEX_SIZE_THRESHOLD;
diff --git a/src/dftracer/utils/binaries/dftracer_organize.cpp b/src/dftracer/utils/binaries/dftracer_organize.cpp
index a08db38b..1a1f9259 100644
--- a/src/dftracer/utils/binaries/dftracer_organize.cpp
+++ b/src/dftracer/utils/binaries/dftracer_organize.cpp
@@ -185,11 +185,11 @@ coro::CoroTask<int> run_organize(const std::string& output_dir,
     std::printf("  Source files processed: %zu\n",
                 router_result.source_files_processed);
 
-    // Step 4: Build sidecars for output chunk files
+    // Step 4: Build `.dftindex` stores for output chunk files.
     if (!router_result.output_files.empty()) {
-        std::printf("Step 4: Building sidecars...\n");
+        std::printf("Step 4: Building .dftindex stores...\n");
         auto pipeline_config = PipelineConfig()
-                                   .with_name("Organize: Build Sidecars")
+                                   .with_name("Organize: Build Index Stores")
                                    .with_compute_threads(executor_threads)
                                    .with_watchdog(false);
 
@@ -197,7 +197,7 @@ coro::CoroTask<int> run_organize(const std::string& output_dir,
 
         auto* output_files_ptr = &router_result.output_files;
 
-        auto sidecar_task = make_task(
+        auto index_store_task = make_task(
             [output_files_ptr, output_dir,
              checkpoint_size](CoroScope& ctx) -> coro::CoroTask<void> {
                 co_await ctx.scope(
@@ -224,8 +224,8 @@ coro::CoroTask<int> run_organize(const std::string& output_dir,
             },
             "BuildSidecars");
 
-        pipeline.set_source(sidecar_task);
-        pipeline.set_destination(sidecar_task);
+        pipeline.set_source(index_store_task);
+        pipeline.set_destination(index_store_task);
         pipeline.execute();
     }
 
@@ -293,7 +293,7 @@ int main(int argc, char** argv) {
             indexer::internal::Indexer::DEFAULT_CHECKPOINT_SIZE));
 
     program.add_argument("--index-dir")
-        .help("Directory for sidecar files")
+        .help("Directory for .dftindex stores")
         .default_value<std::string>("");
 
     program.add_argument("-f", "--force")
diff --git a/src/dftracer/utils/binaries/dftracer_reader.cpp b/src/dftracer/utils/binaries/dftracer_reader.cpp
index a15d203d..2e923f24 100644
--- a/src/dftracer/utils/binaries/dftracer_reader.cpp
+++ b/src/dftracer/utils/binaries/dftracer_reader.cpp
@@ -24,31 +24,33 @@ using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dftracer::utils::utilities::reader::internal;
 
 static coro::CoroTask<int> run_reader(const std::string &gz_path,
-                                      const std::string &idx_path,
+                                      const std::string &index_path,
                                       std::size_t checkpoint_size,
                                       bool force_rebuild, bool check_rebuild,
                                       const std::string &read_mode,
                                       std::size_t read_buffer_size,
                                       int64_t start, int64_t end) {
+    const std::string index_root = normalize_index_root(index_path);
+
     // Create indexer first
     std::shared_ptr<Indexer> indexer;
     try {
-        // check if idx file exists
-        if (!fs::exists(idx_path)) {
+        // Check whether the root-local .dftindex store already exists.
+        if (!fs::exists(index_root)) {
             if (check_rebuild) {
                 DFTRACER_UTILS_LOG_ERROR(
-                    "Index file '%s' does not exist, cannot check",
-                    idx_path.c_str());
+                    "Index store '%s' does not exist, cannot check",
+                    index_root.c_str());
                 co_return 1;
             }
-            DFTRACER_UTILS_LOG_DEBUG("Index file '%s' does not exist",
-                                     idx_path.c_str());
-            DFTRACER_UTILS_LOG_DEBUG("%s", "Will create new index file");
+            DFTRACER_UTILS_LOG_DEBUG("Index store '%s' does not exist",
+                                     index_root.c_str());
+            DFTRACER_UTILS_LOG_DEBUG("%s", "Will create new index store");
             force_rebuild = true;
         }
 
         // Use IndexerFactory to create appropriate indexer
-        indexer = IndexerFactory::create(gz_path, idx_path, checkpoint_size,
+        indexer = IndexerFactory::create(gz_path, index_path, checkpoint_size,
                                          force_rebuild);
 
         if (check_rebuild) {
@@ -60,15 +62,15 @@ static coro::CoroTask<int> run_reader(const std::string &gz_path,
         }
 
         if (force_rebuild) {
-            if (fs::exists(idx_path)) {
-                DFTRACER_UTILS_LOG_DEBUG("Removing existing index: %s",
-                                         idx_path.c_str());
-                fs::remove(idx_path);
+            if (fs::exists(index_root)) {
+                DFTRACER_UTILS_LOG_DEBUG("Removing existing index store: %s",
+                                         index_root.c_str());
+                fs::remove_all(index_root);
             }
-            // Recreate indexer after removing old index
-            indexer = IndexerFactory::create(gz_path, idx_path, checkpoint_size,
-                                             true);
-            DFTRACER_UTILS_LOG_INFO("Building index for file: %s",
+            // Recreate the store after removing the old .dftindex root.
+            indexer = IndexerFactory::create(gz_path, index_path,
+                                             checkpoint_size, true);
+            DFTRACER_UTILS_LOG_INFO("Building index store for file: %s",
                                     gz_path.c_str());
             co_await indexer->build_async();
         }
@@ -184,7 +186,7 @@ int main(int argc, char **argv) {
         .help("Compressed file to process (GZIP, TAR.GZ)")
         .required();
     program.add_argument("-i", "--index")
-        .help("Index file to use")
+        .help("Path to the .dftindex store to use")
         .default_value<std::string>("");
     program.add_argument("-s", "--start")
         .help("Start position in bytes")
@@ -201,9 +203,11 @@ int main(int argc, char **argv) {
         .default_value(
             static_cast<std::size_t>(Indexer::DEFAULT_CHECKPOINT_SIZE));
     program.add_argument("-f", "--force-rebuild")
-        .help("Force rebuild index")
+        .help("Force rebuild the .dftindex store")
+        .flag();
+    program.add_argument("--check")
+        .help("Check if the .dftindex store is valid")
         .flag();
-    program.add_argument("--check").help("Check if index is valid").flag();
     program.add_argument("--read-buffer-size")
         .help("Size of the read buffer in bytes (default: 1MB)")
         .default_value<std::size_t>(1 * 1024 * 1024)
@@ -213,7 +217,7 @@ int main(int argc, char **argv) {
         .default_value<std::string>("bytes")
         .choices("bytes", "line_bytes", "lines");
     program.add_argument("--index-dir")
-        .help("Directory to store index files (default: system temp directory)")
+        .help("Directory to store root-local .dftindex directories")
         .default_value<std::string>("");
 
     try {
@@ -260,11 +264,8 @@ int main(int argc, char **argv) {
     }
     ::close(test_fd);
 
-    std::string idx_path;
-    if (!index_path.empty()) {
-        idx_path = index_path;
-    } else {
-        idx_path = utilities::composites::dft::internal::determine_index_path(
+    if (index_path.empty()) {
+        index_path = utilities::composites::dft::internal::determine_index_path(
             gz_path, index_dir);
     }
 
@@ -277,7 +278,7 @@ int main(int argc, char **argv) {
                                                              : "UNKNOWN");
 #endif
 
-    return run_reader(gz_path, idx_path, checkpoint_size, force_rebuild,
+    return run_reader(gz_path, index_path, checkpoint_size, force_rebuild,
                       check_rebuild, read_mode, read_buffer_size, start, end)
         .get();
 }
diff --git a/src/dftracer/utils/binaries/dftracer_reconstruct.cpp b/src/dftracer/utils/binaries/dftracer_reconstruct.cpp
index d72aaeed..f40d0db1 100644
--- a/src/dftracer/utils/binaries/dftracer_reconstruct.cpp
+++ b/src/dftracer/utils/binaries/dftracer_reconstruct.cpp
@@ -207,7 +207,7 @@ static coro::CoroTask<int> run_reconstruct(const std::string& directory,
                                                       -> coro::CoroTask<void> {
                         co_await s.receive(permits);
                         try {
-                            std::string idx_path =
+                            std::string index_path =
                                 internal::determine_index_path(reorg_file_copy,
                                                                "");
 
@@ -215,14 +215,14 @@ static coro::CoroTask<int> run_reconstruct(const std::string& directory,
                             auto meta_input =
                                 MetadataCollectorUtilityInput::from_file(
                                     reorg_file_copy)
-                                    .with_index(idx_path)
+                                    .with_index(index_path)
                                     .with_checkpoint_size(checkpoint_size);
                             auto meta =
                                 co_await meta_collector.process(meta_input);
 
                             auto reader_input =
                                 IndexedReadInput::from_file(reorg_file_copy)
-                                    .with_index(idx_path)
+                                    .with_index(index_path)
                                     .with_checkpoint_size(checkpoint_size);
                             IndexedFileReaderUtility reader_utility;
                             auto reader =
diff --git a/src/dftracer/utils/binaries/dftracer_server.cpp b/src/dftracer/utils/binaries/dftracer_server.cpp
index d4671143..c740b21c 100644
--- a/src/dftracer/utils/binaries/dftracer_server.cpp
+++ b/src/dftracer/utils/binaries/dftracer_server.cpp
@@ -34,7 +34,7 @@ static coro::CoroTask<int> run_server(argparse::ArgumentParser& program) {
         program.get<std::size_t>("--executor-threads");
 
     // When no explicit index dir is given, default to the trace
-    // directory so sidecar files (.idx) persist across restarts
+    // directory so `.dftindex` stores persist across restarts
     // and don't need to be rebuilt every time.
     if (index_dir.empty()) {
         index_dir = directory;
@@ -132,7 +132,7 @@ int main(int argc, char** argv) {
 
     program.add_argument("--index-dir")
         .help(
-            "Directory for bloom/checkpoint index files (default: same as "
+            "Directory for root-local .dftindex stores (default: same as "
             "--directory)")
         .default_value<std::string>("");
 
diff --git a/src/dftracer/utils/binaries/dftracer_split.cpp b/src/dftracer/utils/binaries/dftracer_split.cpp
index b57f54a8..8be38958 100644
--- a/src/dftracer/utils/binaries/dftracer_split.cpp
+++ b/src/dftracer/utils/binaries/dftracer_split.cpp
@@ -232,6 +232,17 @@ int main(int argc, char** argv) {
 
     DFTRACER_UTILS_LOG_INFO("Found %zu input files", input_files.size());
 
+    if (force) {
+        const std::string shared_index_path =
+            utilities::composites::dft::internal::determine_index_path(
+                input_files.front(), index_dir);
+        if (fs::exists(shared_index_path)) {
+            DFTRACER_UTILS_LOG_INFO("Clearing shared index store: %s",
+                                    shared_index_path.c_str());
+            fs::remove_all(shared_index_path);
+        }
+    }
+
     // Phase 2: Build TaskGraph for file processing
     auto graph = TaskGraph::builder(
         {.name = "DFTracerSplit", .max_concurrency = executor_threads});
@@ -246,7 +257,7 @@ int main(int argc, char** argv) {
             const auto& file_path = (*input_files_ptr)[idx];
 
             // Determine index path
-            std::string idx_path =
+            std::string index_path =
                 utilities::composites::dft::internal::determine_index_path(
                     file_path, index_dir);
 
@@ -254,17 +265,18 @@ int main(int argc, char** argv) {
             auto idx_input =
                 utilities::indexer::IndexBuildConfig::for_file(file_path)
                     .with_checkpoint_size(checkpoint_size)
-                    .with_force_rebuild(force)
+                    .with_force_rebuild(false)
                     .with_index_dir(index_dir);
-            utilities::indexer::IndexBuilderUtility{}.process(idx_input);
+            co_await utilities::indexer::IndexBuilderUtility{}.process(
+                idx_input);
 
             // Collect metadata
             auto meta_input =
                 utilities::composites::dft::MetadataCollectorUtilityInput::
                     from_file(file_path)
                         .with_checkpoint_size(checkpoint_size)
-                        .with_force_rebuild(force)
-                        .with_index(idx_path)
+                        .with_force_rebuild(false)
+                        .with_index(index_path)
                         .with_compute_hash(verify);
 
             co_return co_await utilities::composites::dft::
diff --git a/src/dftracer/utils/binaries/dftracer_stats.cpp b/src/dftracer/utils/binaries/dftracer_stats.cpp
index 4165d5ff..5b9e322d 100644
--- a/src/dftracer/utils/binaries/dftracer_stats.cpp
+++ b/src/dftracer/utils/binaries/dftracer_stats.cpp
@@ -55,7 +55,7 @@ using dftracer::utils::utilities::indexer::IndexBuilderUtility;
 using dftracer::utils::utilities::indexer::IndexDatabase;
 
 // Files below this compressed size are scanned directly without building
-// sidecar index files (.idx).  At 8 MB compressed (~160 MB
+// `.dftindex` stores. At 8 MB compressed (~160 MB
 // uncompressed with typical 20x JSON compression), a file has only a
 // handful of 32 MB checkpoints — the indexing overhead exceeds the
 // benefit of bloom-filter skip.
@@ -456,7 +456,7 @@ static void print_text_detailed(
     std::printf("\n");
 }
 
-// Direct-scan a small .pfw.gz file without any sidecar index.
+// Direct-scan a small .pfw.gz file without any persisted index store.
 // Streams lines via async_streaming_gz_lines, parses each with yyjson,
 // and accumulates stats via ChunkStatistics::update_from_event().
 static coro::CoroTask<TraceStatistics> direct_scan_trace_statistics(
@@ -657,7 +657,7 @@ static coro::CoroTask<DetailedStatistics> direct_scan_detailed_statistics(
 // Per-chunk scanning coroutine for parallel detailed stats.
 // Scans a single chunk and merges results into shared file_detailed.
 static coro::CoroTask<void> scan_chunk_detailed(
-    std::string file_path, std::string idx_path, std::size_t checkpoint_size,
+    std::string file_path, std::string index_path, std::size_t checkpoint_size,
     std::size_t file_size, std::size_t num_ckpts, std::uint64_t ckpt_idx,
     const std::vector<std::string>* filter_names_ptr,
     const std::vector<std::string>* filter_cats_ptr,
@@ -676,7 +676,7 @@ static coro::CoroTask<void> scan_chunk_detailed(
 
     ChunkDetailScanInput scan_input;
     scan_input.file_path = file_path;
-    scan_input.idx_path = idx_path;
+    scan_input.index_path = index_path;
     scan_input.checkpoint_size = checkpoint_size;
     scan_input.start_byte = start_byte;
     scan_input.end_byte = end_byte;
@@ -709,12 +709,13 @@ static coro::CoroTask<void> process_file_detailed(
     DetailedStatistics* aggregate_detailed_ptr, std::mutex* aggregate_mutex_ptr,
     std::mutex* output_mutex_ptr,
     std::vector<std::pair<std::size_t, std::string>>* json_results_ptr) {
-    std::string idx_path = internal::determine_index_path(file_path, index_dir);
+    std::string index_path =
+        internal::determine_index_path(file_path, index_dir);
 
     auto meta_input = MetadataCollectorUtilityInput::from_file(file_path)
                           .with_checkpoint_size(checkpoint_size)
                           .with_force_rebuild(false)
-                          .with_index(idx_path);
+                          .with_index(index_path);
     auto metadata = co_await MetadataCollectorUtility{}.process(meta_input);
 
     if (!metadata.success) {
@@ -731,9 +732,9 @@ static coro::CoroTask<void> process_file_detailed(
     std::vector<std::uint64_t> candidate_checkpoints;
     std::uint64_t total_checkpoints = (num_ckpts == 0) ? 1 : num_ckpts;
 
-    if (query_ptr && fs::exists(idx_path)) {
+    if (query_ptr && fs::exists(index_path)) {
         try {
-            ChunkPrunerInput pruner_input{idx_path, file_path, *query_ptr,
+            ChunkPrunerInput pruner_input{index_path, file_path, *query_ptr,
                                           nullptr};
             ChunkPrunerUtility pruner;
             auto pruner_output = co_await pruner.process(pruner_input);
@@ -766,19 +767,19 @@ static coro::CoroTask<void> process_file_detailed(
         total_checkpoints - candidate_checkpoints.size();
     auto chunk_mutex = std::make_shared<std::mutex>();
 
-    co_await fctx.scope([file_path, idx_path, checkpoint_size, file_size,
+    co_await fctx.scope([file_path, index_path, checkpoint_size, file_size,
                          num_ckpts, filter_names_ptr, filter_cats_ptr,
                          group_by_ptr, file_detailed, chunk_mutex,
                          candidates = std::move(candidate_checkpoints)](
                             CoroScope& chunk_scope) -> coro::CoroTask<void> {
         for (auto ckpt_idx : candidates) {
             chunk_scope.spawn(
-                [file_path, idx_path, checkpoint_size, file_size, num_ckpts,
+                [file_path, index_path, checkpoint_size, file_size, num_ckpts,
                  ckpt_idx, filter_names_ptr, filter_cats_ptr, group_by_ptr,
                  file_detailed,
                  chunk_mutex](CoroScope& /*cctx*/) -> coro::CoroTask<void> {
                     co_return co_await scan_chunk_detailed(
-                        file_path, idx_path, checkpoint_size, file_size,
+                        file_path, index_path, checkpoint_size, file_size,
                         num_ckpts, ckpt_idx, filter_names_ptr, filter_cats_ptr,
                         group_by_ptr, file_detailed, chunk_mutex);
                 });
@@ -788,9 +789,9 @@ static coro::CoroTask<void> process_file_detailed(
 
     // Hash resolution (sequential, all chunks done)
     std::unordered_map<std::string, std::string> hash_resolutions;
-    if (needs_hash_resolution && fs::exists(idx_path)) {
+    if (needs_hash_resolution && fs::exists(index_path)) {
         try {
-            IndexDatabase idx_db(idx_path);
+            IndexDatabase idx_db(index_path);
             auto logical =
                 utilities::indexer::internal::get_logical_path(file_path);
             int file_info_id = idx_db.get_file_info_id(logical);
@@ -996,11 +997,11 @@ static coro::CoroTask<int> run_stats(argparse::ArgumentParser& program) {
     std::vector<std::string> files_needing_index;
     std::vector<std::string> small_files;
     for (const auto& file_path : files) {
-        std::string idx_path =
+        std::string index_path =
             internal::determine_index_path(file_path, index_dir);
-        if (fs::exists(idx_path)) {
+        if (fs::exists(index_path)) {
             try {
-                IndexDatabase db(idx_path);
+                IndexDatabase db(index_path);
                 auto logical =
                     utilities::indexer::internal::get_logical_path(file_path);
                 int fid = db.get_file_info_id(logical);
@@ -1370,8 +1371,8 @@ int main(int argc, char** argv) {
                                      DFTRACER_UTILS_PACKAGE_VERSION);
     program.add_description(
         "Display statistics for DFTracer trace files from pre-built "
-        "index (.idx) databases. Auto-builds indices if missing. "
-        "Zero-cost reads: only SQLite metadata, no decompression.");
+        ".dftindex databases. Auto-builds indexes if missing. "
+        "Zero-cost reads from RocksDB metadata, no decompression.");
 
     program.add_argument("--files")
         .help("Trace files to inspect (.pfw, .pfw.gz)")
@@ -1383,7 +1384,7 @@ int main(int argc, char** argv) {
         .default_value<std::string>("");
 
     program.add_argument("--index-dir")
-        .help("Directory where .idx index files are stored")
+        .help("Directory where .dftindex stores are created")
         .default_value<std::string>("");
 
     program.add_argument("--json").help("Output in JSON format").flag();
@@ -1407,7 +1408,7 @@ int main(int argc, char** argv) {
         .default_value(static_cast<std::uint64_t>(10));
 
     program.add_argument("--no-auto-index")
-        .help("Disable automatic index building for files missing .idx")
+        .help("Disable automatic index building for files missing .dftindex")
         .flag();
 
     program.add_argument("--checkpoint-size")
diff --git a/src/dftracer/utils/binaries/dftracer_tar.cpp b/src/dftracer/utils/binaries/dftracer_tar.cpp
index 7209b67a..d3ca22ba 100644
--- a/src/dftracer/utils/binaries/dftracer_tar.cpp
+++ b/src/dftracer/utils/binaries/dftracer_tar.cpp
@@ -36,8 +36,8 @@ static coro::CoroTask<int> run_tar(const std::string& archive_path,
 
         DFTRACER_UTILS_LOG_INFO("Detected format: %s",
                                 indexer->get_format_name());
-        DFTRACER_UTILS_LOG_INFO("Index file: %s",
-                                indexer->get_idx_path().c_str());
+        DFTRACER_UTILS_LOG_INFO("Index store: %s",
+                                indexer->get_index_path().c_str());
 
         // Build index if needed
         if (force_rebuild || indexer->need_rebuild()) {
@@ -57,7 +57,7 @@ static coro::CoroTask<int> run_tar(const std::string& archive_path,
             printf("Archive Information:\n");
             printf("  Format: %s\n", indexer->get_format_name());
             printf("  Path: %s\n", indexer->get_archive_path().c_str());
-            printf("  Index: %s\n", indexer->get_idx_path().c_str());
+            printf("  Index Store: %s\n", indexer->get_index_path().c_str());
             printf("  Total size: %" PRIu64 " bytes\n",
                    static_cast<std::uint64_t>(indexer->get_max_bytes()));
             printf("  Total lines: %" PRIu64 "\n", indexer->get_num_lines());
@@ -106,7 +106,7 @@ int main(int argc, char** argv) {
         "DFTracer utility for indexing and analyzing TAR.GZ archives");
     program.add_argument("file").help("TAR.GZ file to process").required();
     program.add_argument("-i", "--index")
-        .help("Index file to use (auto-generated if not specified)")
+        .help("Path to the .dftindex store to use (auto-generated if omitted)")
         .default_value<std::string>("");
     program.add_argument("-c", "--checkpoint-size")
         .help("Checkpoint size for indexing in bytes")
@@ -114,7 +114,7 @@ int main(int argc, char** argv) {
         .default_value(
             static_cast<std::size_t>(Indexer::DEFAULT_CHECKPOINT_SIZE));
     program.add_argument("-f", "--force-rebuild")
-        .help("Force rebuild index")
+        .help("Force rebuild the .dftindex store")
         .flag();
     program.add_argument("--list-files")
         .help("List all files in the TAR archive")
diff --git a/src/dftracer/utils/binaries/dftracer_view.cpp b/src/dftracer/utils/binaries/dftracer_view.cpp
index 61bc5e2c..49fcb15f 100644
--- a/src/dftracer/utils/binaries/dftracer_view.cpp
+++ b/src/dftracer/utils/binaries/dftracer_view.cpp
@@ -75,11 +75,11 @@ static coro::CoroTask<void> index_single_file(const std::string& file_path,
 }
 
 static coro::CoroTask<void> read_single_chunk(
-    const std::string& file_path, const std::string& idx_path,
+    const std::string& file_path, const std::string& index_path,
     const ViewChunkCandidate& candidate, const ViewContext& vctx, CoroScope&) {
     ViewReaderInput reader_input;
     reader_input.with_file_path(file_path)
-        .with_idx_path(idx_path)
+        .with_index_path(index_path)
         .with_checkpoint_size(vctx.checkpoint_size)
         .with_byte_range(candidate.start_byte, candidate.end_byte)
         .with_checkpoint_idx(candidate.checkpoint_idx)
@@ -117,14 +117,14 @@ static coro::CoroTask<void> read_single_chunk(
 static coro::CoroTask<void> process_single_file(const std::string& file_path,
                                                 const ViewContext& vctx,
                                                 CoroScope& fctx) {
-    std::string idx_path =
+    std::string index_path =
         internal::determine_index_path(file_path, vctx.index_dir);
 
     // Collect metadata
     auto meta_input = MetadataCollectorUtilityInput::from_file(file_path)
                           .with_checkpoint_size(vctx.checkpoint_size)
                           .with_force_rebuild(false)
-                          .with_index(idx_path);
+                          .with_index(index_path);
     auto metadata = co_await MetadataCollectorUtility{}.process(meta_input);
 
     if (!metadata.success) {
@@ -138,7 +138,7 @@ static coro::CoroTask<void> process_single_file(const std::string& file_path,
     ViewBuilderInput builder_input;
     builder_input.with_view(vctx.view)
         .with_file_path(file_path)
-        .with_idx_path(fs::exists(idx_path) ? idx_path : "")
+        .with_index_path(fs::exists(index_path) ? index_path : "")
         .with_uncompressed_size(metadata.uncompressed_size)
         .with_num_checkpoints(metadata.num_checkpoints);
 
@@ -159,13 +159,13 @@ static coro::CoroTask<void> process_single_file(const std::string& file_path,
 
     // Process each candidate chunk
     auto& candidates = build_output.candidates;
-    co_await fctx.scope([&file_path, &idx_path, &vctx, &candidates](
+    co_await fctx.scope([&file_path, &index_path, &vctx, &candidates](
                             CoroScope& chunk_scope) -> coro::CoroTask<void> {
         for (const auto& candidate : candidates) {
-            chunk_scope.spawn([&file_path, &idx_path, &candidate,
+            chunk_scope.spawn([&file_path, &index_path, &candidate,
                                &vctx](CoroScope& cctx) -> coro::CoroTask<void> {
-                co_await read_single_chunk(file_path, idx_path, candidate, vctx,
-                                           cctx);
+                co_await read_single_chunk(file_path, index_path, candidate,
+                                           vctx, cctx);
             });
         }
         co_return;
@@ -331,9 +331,9 @@ static coro::CoroTask<int> run_view(argparse::ArgumentParser& program) {
 
     std::vector<std::string> files_needing_index;
     for (const auto& file_path : files) {
-        std::string idx_path =
+        std::string index_path =
             internal::determine_index_path(file_path, index_dir);
-        if (!fs::exists(idx_path)) {
+        if (!fs::exists(index_path)) {
             files_needing_index.push_back(file_path);
         }
     }
@@ -341,7 +341,8 @@ static coro::CoroTask<int> run_view(argparse::ArgumentParser& program) {
     if (!files_needing_index.empty()) {
         if (no_auto_index) {
             DFTRACER_UTILS_LOG_ERROR(
-                "Missing .idx index for %zu file(s) and --no-auto-index is "
+                "Missing .dftindex store for %zu file(s) and --no-auto-index "
+                "is "
                 "set. Run dftracer_index first.",
                 files_needing_index.size());
             for (const auto& f : files_needing_index) {
@@ -539,11 +540,11 @@ int main(int argc, char** argv) {
 
     // Indexing options
     program.add_argument("--index-dir")
-        .help("Directory where .idx index files are stored")
+        .help("Directory where .dftindex stores are created")
         .default_value<std::string>("");
 
     program.add_argument("--no-auto-index")
-        .help("Disable automatic index building for files missing .idx")
+        .help("Disable automatic index building for files missing .dftindex")
         .flag();
 
     program.add_argument("--checkpoint-size")
diff --git a/src/dftracer/utils/core/env.cpp b/src/dftracer/utils/core/env.cpp
new file mode 100644
index 00000000..703751f3
--- /dev/null
+++ b/src/dftracer/utils/core/env.cpp
@@ -0,0 +1,57 @@
+#include <dftracer/utils/core/env.h>
+
+#include <charconv>
+#include <cstdlib>
+#include <string>
+
+namespace dftracer::utils {
+
+template <>
+std::optional<std::string_view> Env::get<std::string_view>(
+    std::string_view name) {
+    std::string key(name);
+    const char* value = std::getenv(key.c_str());
+    if (value == nullptr || value[0] == '\0') {
+        return std::nullopt;
+    }
+    return std::string_view(value);
+}
+
+template <>
+std::optional<int> Env::get<int>(std::string_view name) {
+    auto value = get<std::string_view>(name);
+    if (!value.has_value()) {
+        return std::nullopt;
+    }
+
+    int parsed = 0;
+    auto* begin = value->data();
+    auto* end = begin + value->size();
+    auto [ptr, ec] = std::from_chars(begin, end, parsed);
+    if (ec != std::errc{} || ptr != end) {
+        return std::nullopt;
+    }
+    return parsed;
+}
+
+int Env::rocksdb_max_open_files() {
+    static const int cached_value = [] {
+        constexpr int default_max_open_files = 32;
+        constexpr std::string_view env_name =
+            "DFTRACER_UTILS_ROCKSDB_MAX_OPEN_FILES";
+
+        auto configured = get<int>(env_name);
+        if (!configured.has_value()) {
+            return default_max_open_files;
+        }
+
+        if (*configured == -1 || *configured > 0) {
+            return *configured;
+        }
+        return default_max_open_files;
+    }();
+
+    return cached_value;
+}
+
+}  // namespace dftracer::utils
diff --git a/src/dftracer/utils/core/io/epoll_thread_pool_backend.cpp b/src/dftracer/utils/core/io/epoll_thread_pool_backend.cpp
index 3a86ebc8..1f733957 100644
--- a/src/dftracer/utils/core/io/epoll_thread_pool_backend.cpp
+++ b/src/dftracer/utils/core/io/epoll_thread_pool_backend.cpp
@@ -161,6 +161,23 @@ IoAwaitable EpollThreadPoolBackend::submit_pread(int fd, void* buf,
                               &executor_, &pool_);
 }
 
+void EpollThreadPoolBackend::submit_pread_callback(int fd, void* buf,
+                                                   std::size_t len,
+                                                   off_t offset,
+                                                   IoCompletionFn completion,
+                                                   void* context) {
+    auto* req = new IoRequest{};
+    req->op = IoOp::PREAD;
+    req->fd = fd;
+    req->buf = buf;
+    req->len = len;
+    req->offset = offset;
+    req->completion = completion;
+    req->completion_ctx = context;
+    req->pool = &pool_;
+    pool_.submit([req] { execute_request(req); });
+}
+
 IoAwaitable EpollThreadPoolBackend::submit_pwrite(int fd, const void* buf,
                                                   std::size_t len,
                                                   off_t offset) {
@@ -364,8 +381,12 @@ void EpollThreadPoolBackend::execute_request(IoRequest* req) {
     }
     if (result < 0) result = -errno;
 
-    req->awaitable->result_ = result;
-    req->executor->enqueue(req->awaitable->handle_);
+    if (req->awaitable != nullptr) {
+        req->awaitable->result_ = result;
+        req->executor->enqueue(req->awaitable->handle_);
+    } else if (req->completion != nullptr) {
+        req->completion(req->completion_ctx, result);
+    }
     delete req;
 }
 
diff --git a/src/dftracer/utils/core/io/epoll_thread_pool_backend.h b/src/dftracer/utils/core/io/epoll_thread_pool_backend.h
index 11cb2db4..cd9b50dd 100644
--- a/src/dftracer/utils/core/io/epoll_thread_pool_backend.h
+++ b/src/dftracer/utils/core/io/epoll_thread_pool_backend.h
@@ -37,6 +37,9 @@ class EpollThreadPoolBackend : public IoBackend {
     IoAwaitable submit_write(int fd, const void* buf, std::size_t len) override;
     IoAwaitable submit_pread(int fd, void* buf, std::size_t len,
                              off_t offset) override;
+    void submit_pread_callback(int fd, void* buf, std::size_t len, off_t offset,
+                               IoCompletionFn completion,
+                               void* context) override;
     IoAwaitable submit_pwrite(int fd, const void* buf, std::size_t len,
                               off_t offset) override;
     IoAwaitable submit_open(const char* path, int flags, mode_t mode) override;
diff --git a/src/dftracer/utils/core/io/io_backend_sync.cpp b/src/dftracer/utils/core/io/io_backend_sync.cpp
index 3dc32d88..23662bf8 100644
--- a/src/dftracer/utils/core/io/io_backend_sync.cpp
+++ b/src/dftracer/utils/core/io/io_backend_sync.cpp
@@ -9,8 +9,8 @@ namespace dftracer::utils::io {
 ssize_t IoBackend::submit_read_sync(int fd, void *buf, std::size_t len,
                                     off_t offset) {
     // For sync wrappers, we cannot use the normal IoAwaitable coroutine
-    // path. Instead, we directly call the POSIX syscall. The VFS runs
-    // on a dedicated SQLite thread (not an executor worker), so blocking
+    // path. Instead, we directly call the POSIX syscall. This synchronous
+    // path is used from dedicated blocking contexts, so blocking
     // is acceptable.
     ssize_t result = ::pread(fd, buf, len, offset);
     if (result < 0) result = -errno;
diff --git a/src/dftracer/utils/core/io/io_uring_backend.cpp b/src/dftracer/utils/core/io/io_uring_backend.cpp
index 70670172..f4c67bf1 100644
--- a/src/dftracer/utils/core/io/io_uring_backend.cpp
+++ b/src/dftracer/utils/core/io/io_uring_backend.cpp
@@ -266,6 +266,8 @@ void IoUringBackend::completion_loop() {
             if (req->awaitable) {
                 req->awaitable->result_ = cqe->res;
                 executor_.enqueue(req->awaitable->handle_);
+            } else if (req->completion != nullptr) {
+                req->completion(req->completion_ctx, cqe->res);
             }
             delete req;
         }
@@ -305,26 +307,44 @@ void IoUringBackend::submit_fn(SubmitContext* ctx, IoAwaitable* awaitable) {
         case IoUringSubmitCtx::Op::FTRUNCATE: {
             ssize_t sync_result = ::ftruncate(uring_ctx->fd, uring_ctx->offset);
             if (sync_result < 0) sync_result = -errno;
-            awaitable->result_ = sync_result;
+            if (awaitable != nullptr) {
+                awaitable->result_ = sync_result;
+            } else if (uring_ctx->completion != nullptr) {
+                uring_ctx->completion(uring_ctx->completion_ctx, sync_result);
+            }
             delete uring_ctx;
-            backend->executor_.enqueue(awaitable->handle_);
+            if (awaitable != nullptr) {
+                backend->executor_.enqueue(awaitable->handle_);
+            }
             return;
         }
         case IoUringSubmitCtx::Op::FSTAT: {
             ssize_t sync_result = ::fstat(uring_ctx->fd, uring_ctx->stat_buf);
             if (sync_result < 0) sync_result = -errno;
-            awaitable->result_ = sync_result;
+            if (awaitable != nullptr) {
+                awaitable->result_ = sync_result;
+            } else if (uring_ctx->completion != nullptr) {
+                uring_ctx->completion(uring_ctx->completion_ctx, sync_result);
+            }
             delete uring_ctx;
-            backend->executor_.enqueue(awaitable->handle_);
+            if (awaitable != nullptr) {
+                backend->executor_.enqueue(awaitable->handle_);
+            }
             return;
         }
         case IoUringSubmitCtx::Op::LSEEK: {
             ssize_t sync_result =
                 ::lseek(uring_ctx->fd, uring_ctx->offset, uring_ctx->whence);
             if (sync_result < 0) sync_result = -errno;
-            awaitable->result_ = sync_result;
+            if (awaitable != nullptr) {
+                awaitable->result_ = sync_result;
+            } else if (uring_ctx->completion != nullptr) {
+                uring_ctx->completion(uring_ctx->completion_ctx, sync_result);
+            }
             delete uring_ctx;
-            backend->executor_.enqueue(awaitable->handle_);
+            if (awaitable != nullptr) {
+                backend->executor_.enqueue(awaitable->handle_);
+            }
             return;
         }
         case IoUringSubmitCtx::Op::SENDFILE: {
@@ -332,9 +352,15 @@ void IoUringBackend::submit_fn(SubmitContext* ctx, IoAwaitable* awaitable) {
             ssize_t sync_result = ::sendfile(uring_ctx->dest_fd, uring_ctx->fd,
                                              &off, uring_ctx->len);
             if (sync_result < 0) sync_result = -errno;
-            awaitable->result_ = sync_result;
+            if (awaitable != nullptr) {
+                awaitable->result_ = sync_result;
+            } else if (uring_ctx->completion != nullptr) {
+                uring_ctx->completion(uring_ctx->completion_ctx, sync_result);
+            }
             delete uring_ctx;
-            backend->executor_.enqueue(awaitable->handle_);
+            if (awaitable != nullptr) {
+                backend->executor_.enqueue(awaitable->handle_);
+            }
             return;
         }
         default:
@@ -344,6 +370,8 @@ void IoUringBackend::submit_fn(SubmitContext* ctx, IoAwaitable* awaitable) {
     // Create the request object that will be stored in SQE user_data
     auto* req = new IoUringRequest{};
     req->awaitable = awaitable;
+    req->completion = uring_ctx->completion;
+    req->completion_ctx = uring_ctx->completion_ctx;
 
     std::lock_guard<std::mutex> lock(backend->submit_mutex_);
 
@@ -412,10 +440,16 @@ void IoUringBackend::submit_fn(SubmitContext* ctx, IoAwaitable* awaitable) {
                 break;
         }
         if (result < 0) result = -errno;
-        awaitable->result_ = result;
+        if (awaitable != nullptr) {
+            awaitable->result_ = result;
+        } else if (uring_ctx->completion != nullptr) {
+            uring_ctx->completion(uring_ctx->completion_ctx, result);
+        }
         delete req;
         delete uring_ctx;
-        backend->executor_.enqueue(awaitable->handle_);
+        if (awaitable != nullptr) {
+            backend->executor_.enqueue(awaitable->handle_);
+        }
         return;
     }
 
@@ -538,6 +572,18 @@ IoAwaitable IoUringBackend::submit_pread(int fd, void* buf, std::size_t len,
                               nullptr, 0, 0, this);
 }
 
+void IoUringBackend::submit_pread_callback(int fd, void* buf, std::size_t len,
+                                           off_t offset,
+                                           IoCompletionFn completion,
+                                           void* context) {
+    auto awaitable = make_uring_request(IoUringSubmitCtx::Op::PREAD, fd, buf,
+                                        len, offset, nullptr, 0, 0, this);
+    auto* uring_ctx = static_cast<IoUringSubmitCtx*>(awaitable.submit_ctx_);
+    uring_ctx->completion = completion;
+    uring_ctx->completion_ctx = context;
+    submit_fn(uring_ctx, nullptr);
+}
+
 IoAwaitable IoUringBackend::submit_pwrite(int fd, const void* buf,
                                           std::size_t len, off_t offset) {
     return make_uring_request(IoUringSubmitCtx::Op::PWRITE, fd,
diff --git a/src/dftracer/utils/core/io/io_uring_backend.h b/src/dftracer/utils/core/io/io_uring_backend.h
index 3c8e5d3f..fc156c84 100644
--- a/src/dftracer/utils/core/io/io_uring_backend.h
+++ b/src/dftracer/utils/core/io/io_uring_backend.h
@@ -31,6 +31,8 @@ struct IoUringRequest {
     }
 
     IoAwaitable* awaitable = nullptr;
+    IoCompletionFn completion = nullptr;
+    void* completion_ctx = nullptr;
 };
 
 /// io_uring I/O backend using raw syscalls (no liburing dependency).
@@ -52,6 +54,9 @@ class IoUringBackend : public IoBackend {
     IoAwaitable submit_write(int fd, const void* buf, std::size_t len) override;
     IoAwaitable submit_pread(int fd, void* buf, std::size_t len,
                              off_t offset) override;
+    void submit_pread_callback(int fd, void* buf, std::size_t len, off_t offset,
+                               IoCompletionFn completion,
+                               void* context) override;
     IoAwaitable submit_pwrite(int fd, const void* buf, std::size_t len,
                               off_t offset) override;
     IoAwaitable submit_open(const char* path, int flags, mode_t mode) override;
@@ -147,6 +152,8 @@ struct IoUringSubmitCtx : SubmitContext {
     int whence = 0;
     int dest_fd = -1;
     IoUringBackend* backend = nullptr;
+    IoCompletionFn completion = nullptr;
+    void* completion_ctx = nullptr;
 };
 
 }  // namespace dftracer::utils::io
diff --git a/src/dftracer/utils/core/io/kqueue_thread_pool_backend.cpp b/src/dftracer/utils/core/io/kqueue_thread_pool_backend.cpp
index 195810da..811a4afc 100644
--- a/src/dftracer/utils/core/io/kqueue_thread_pool_backend.cpp
+++ b/src/dftracer/utils/core/io/kqueue_thread_pool_backend.cpp
@@ -147,6 +147,23 @@ IoAwaitable KqueueThreadPoolBackend::submit_pread(int fd, void* buf,
                                &executor_, &pool_);
 }
 
+void KqueueThreadPoolBackend::submit_pread_callback(int fd, void* buf,
+                                                    std::size_t len,
+                                                    off_t offset,
+                                                    IoCompletionFn completion,
+                                                    void* context) {
+    auto* req = new IoRequest{};
+    req->op = IoOp::PREAD;
+    req->fd = fd;
+    req->buf = buf;
+    req->len = len;
+    req->offset = offset;
+    req->completion = completion;
+    req->completion_ctx = context;
+    req->pool = &pool_;
+    pool_.submit([req] { execute_request(req); });
+}
+
 IoAwaitable KqueueThreadPoolBackend::submit_pwrite(int fd, const void* buf,
                                                    std::size_t len,
                                                    off_t offset) {
@@ -383,8 +400,12 @@ void KqueueThreadPoolBackend::execute_request(IoRequest* req) {
     }
     if (result < 0) result = -errno;
 
-    req->awaitable->result_ = result;
-    req->executor->enqueue(req->awaitable->handle_);
+    if (req->awaitable != nullptr) {
+        req->awaitable->result_ = result;
+        req->executor->enqueue(req->awaitable->handle_);
+    } else if (req->completion != nullptr) {
+        req->completion(req->completion_ctx, result);
+    }
     delete req;
 }
 
diff --git a/src/dftracer/utils/core/io/kqueue_thread_pool_backend.h b/src/dftracer/utils/core/io/kqueue_thread_pool_backend.h
index 78bf2933..34aae394 100644
--- a/src/dftracer/utils/core/io/kqueue_thread_pool_backend.h
+++ b/src/dftracer/utils/core/io/kqueue_thread_pool_backend.h
@@ -39,6 +39,9 @@ class KqueueThreadPoolBackend : public IoBackend {
     IoAwaitable submit_write(int fd, const void* buf, std::size_t len) override;
     IoAwaitable submit_pread(int fd, void* buf, std::size_t len,
                              off_t offset) override;
+    void submit_pread_callback(int fd, void* buf, std::size_t len, off_t offset,
+                               IoCompletionFn completion,
+                               void* context) override;
     IoAwaitable submit_pwrite(int fd, const void* buf, std::size_t len,
                               off_t offset) override;
     IoAwaitable submit_open(const char* path, int flags, mode_t mode) override;
diff --git a/src/dftracer/utils/core/io/thread_pool_backend.cpp b/src/dftracer/utils/core/io/thread_pool_backend.cpp
index b1a23b49..7a7df728 100644
--- a/src/dftracer/utils/core/io/thread_pool_backend.cpp
+++ b/src/dftracer/utils/core/io/thread_pool_backend.cpp
@@ -63,6 +63,22 @@ IoAwaitable ThreadPoolBackend::submit_pread(int fd, void* buf, std::size_t len,
                         &executor_, &pool_);
 }
 
+void ThreadPoolBackend::submit_pread_callback(int fd, void* buf,
+                                              std::size_t len, off_t offset,
+                                              IoCompletionFn completion,
+                                              void* context) {
+    auto* req = new IoRequest{};
+    req->op = IoOp::PREAD;
+    req->fd = fd;
+    req->buf = buf;
+    req->len = len;
+    req->offset = offset;
+    req->completion = completion;
+    req->completion_ctx = context;
+    req->pool = &pool_;
+    pool_.submit([req] { execute_request(req); });
+}
+
 IoAwaitable ThreadPoolBackend::submit_pwrite(int fd, const void* buf,
                                              std::size_t len, off_t offset) {
     return make_request(IoOp::PWRITE, fd, const_cast<void*>(buf), len, offset,
@@ -300,8 +316,12 @@ void ThreadPoolBackend::execute_request(IoRequest* req) {
     }
     if (result < 0) result = -errno;
 
-    req->awaitable->result_ = result;
-    req->executor->enqueue(req->awaitable->handle_);
+    if (req->awaitable != nullptr) {
+        req->awaitable->result_ = result;
+        req->executor->enqueue(req->awaitable->handle_);
+    } else if (req->completion != nullptr) {
+        req->completion(req->completion_ctx, result);
+    }
     delete req;
 }
 
@@ -313,4 +333,4 @@ int ThreadPoolBackend::flush() { return static_cast<int>(pool_.flush()); }
 
 std::string ThreadPoolBackend::name() const { return "threadpool"; }
 
-}  // namespace dftracer::utils::io
\ No newline at end of file
+}  // namespace dftracer::utils::io
diff --git a/src/dftracer/utils/core/io/thread_pool_backend.h b/src/dftracer/utils/core/io/thread_pool_backend.h
index 6d548069..cce372e2 100644
--- a/src/dftracer/utils/core/io/thread_pool_backend.h
+++ b/src/dftracer/utils/core/io/thread_pool_backend.h
@@ -65,6 +65,8 @@ struct IoRequest : SubmitContext {
     int whence = 0;
     int dest_fd = -1;
     IoAwaitable* awaitable = nullptr;
+    IoCompletionFn completion = nullptr;
+    void* completion_ctx = nullptr;
     Executor* executor = nullptr;
     IoThreadPool* pool = nullptr;
 };
@@ -83,6 +85,9 @@ class ThreadPoolBackend : public IoBackend {
     IoAwaitable submit_write(int fd, const void* buf, std::size_t len) override;
     IoAwaitable submit_pread(int fd, void* buf, std::size_t len,
                              off_t offset) override;
+    void submit_pread_callback(int fd, void* buf, std::size_t len, off_t offset,
+                               IoCompletionFn completion,
+                               void* context) override;
     IoAwaitable submit_pwrite(int fd, const void* buf, std::size_t len,
                               off_t offset) override;
     IoAwaitable submit_open(const char* path, int flags, mode_t mode) override;
diff --git a/src/dftracer/utils/core/pipeline/executor.cpp b/src/dftracer/utils/core/pipeline/executor.cpp
index 9eb68422..12029f51 100644
--- a/src/dftracer/utils/core/pipeline/executor.cpp
+++ b/src/dftracer/utils/core/pipeline/executor.cpp
@@ -4,7 +4,6 @@
 #include <dftracer/utils/core/io/io_backend_factory.h>
 #include <dftracer/utils/core/io/io_thread_pool.h>
 #include <dftracer/utils/core/pipeline/executor.h>
-#include <dftracer/utils/core/sqlite/vfs.h>
 #include <dftracer/utils/core/tasks/coro_scope.h>
 #include <dftracer/utils/core/tasks/task.h>
 
@@ -33,9 +32,7 @@ Executor* Executor::set_current(Executor* e) noexcept {
     return old;
 }
 
-io::IoThreadPool* Executor::sqlite_pool() noexcept {
-    return sqlite_pool_.get();
-}
+io::IoThreadPool* Executor::db_pool() noexcept { return db_pool_.get(); }
 
 // Thread-local list of coroutine handles to destroy after the current
 // resume() returns.  FinalAwaiter pushes here instead of the shared
@@ -70,7 +67,7 @@ Executor::Executor(const ExecutorConfig& config)
       io_pool_size_(config.io_pool_size),
       io_backend_type_(config.io_backend_type),
       io_batch_threshold_(config.io_batch_threshold),
-      sqlite_pool_size_(config.sqlite_pool_size) {
+      db_pool_size_(config.db_pool_size) {
     if (num_threads_ == 0) {
         num_threads_ = 2;  // Fallback if hardware_concurrency returns 0
     }
@@ -102,10 +99,9 @@ void Executor::start() {
     io_backend_ = io::create_io_backend(*this, io_pool_size_, io_backend_type_,
                                         io_batch_threshold_);
     io_backend_->start();
-    sqlite::register_dftracer_sqlite_vfs(io_backend_.get(), this);
 
-    sqlite_pool_ = std::make_unique<io::IoThreadPool>(sqlite_pool_size_);
-    sqlite_pool_->start();
+    db_pool_ = std::make_unique<io::IoThreadPool>(db_pool_size_);
+    db_pool_->start();
 
     // Create all worker contexts first so workers_ is stable before any
     // worker thread can try to iterate/steal from it.
@@ -147,13 +143,11 @@ void Executor::shutdown() {
     // completion thread may still call enqueue() -> wake_all_workers()
     // which accesses WorkerContext cv/mutex, so workers_ must remain
     // alive until the completion thread has exited.
-    if (sqlite_pool_) {
-        sqlite_pool_->stop();
-        sqlite_pool_.reset();
+    if (db_pool_) {
+        db_pool_->stop();
+        db_pool_.reset();
     }
 
-    sqlite::unregister_dftracer_sqlite_vfs();
-
     if (io_backend_) {
         io_backend_->stop();
         io_backend_.reset();
@@ -242,6 +236,7 @@ void Executor::worker_thread(WorkerContext* context) {
             // thread.  Safe: resume() has fully returned, so the frame
             // is suspended at final_suspend and no code references it.
             drain_thread_local_destroys();
+            drain_destroy_queue();
         }
         // No work available -- sleep until signaled.
         else {
@@ -258,9 +253,11 @@ void Executor::worker_thread(WorkerContext* context) {
             if (io_backend_) {
                 auto reaped = io_backend_->poll(0);
                 if (reaped > 0) {
+                    drain_destroy_queue();
                     continue;
                 }
             }
+            drain_destroy_queue();
             std::unique_lock<std::mutex> lock(context->queue_mutex);
             context->cv.wait(lock, [this, observed_signal] {
                 return !running_.load(std::memory_order_acquire) ||
diff --git a/src/dftracer/utils/core/pipeline/pipeline.cpp b/src/dftracer/utils/core/pipeline/pipeline.cpp
index 5fa3ec2b..b546974d 100644
--- a/src/dftracer/utils/core/pipeline/pipeline.cpp
+++ b/src/dftracer/utils/core/pipeline/pipeline.cpp
@@ -20,7 +20,7 @@ Pipeline::Pipeline(const PipelineConfig& config)
     exec_cfg.io_pool_size = config.io_thread_count;
     exec_cfg.io_backend_type = config.io_backend_type;
     exec_cfg.io_batch_threshold = config.io_batch_threshold;
-    exec_cfg.sqlite_pool_size = config.sqlite_pool_size;
+    exec_cfg.db_pool_size = config.db_pool_size;
 
     std::unique_ptr<Watchdog> watchdog;
     if (config.enable_watchdog) {
diff --git a/src/dftracer/utils/core/rocksdb/async.cpp b/src/dftracer/utils/core/rocksdb/async.cpp
new file mode 100644
index 00000000..6d3b016b
--- /dev/null
+++ b/src/dftracer/utils/core/rocksdb/async.cpp
@@ -0,0 +1,32 @@
+#include <dftracer/utils/core/io/io_thread_pool.h>
+#include <dftracer/utils/core/pipeline/executor.h>
+#include <dftracer/utils/core/rocksdb/async.h>
+
+namespace dftracer::utils::rocksdb {
+
+io::IoThreadPool* get_db_pool() {
+    auto* exec = Executor::current();
+    if (exec == nullptr) {
+        return nullptr;
+    }
+    return exec->db_pool();
+}
+
+void db_async_submit(io::IoThreadPool* pool, std::function<void()> fn) {
+    pool->submit(std::move(fn));
+}
+
+void db_async_resume_on(void* executor, std::coroutine_handle<> h) {
+    auto* exec = static_cast<Executor*>(executor);
+    if (exec != nullptr) {
+        exec->enqueue(h);
+    } else {
+        h.resume();
+    }
+}
+
+void* get_current_executor_opaque() {
+    return static_cast<void*>(Executor::current());
+}
+
+}  // namespace dftracer::utils::rocksdb
diff --git a/src/dftracer/utils/core/rocksdb/database.cpp b/src/dftracer/utils/core/rocksdb/database.cpp
new file mode 100644
index 00000000..1b227a67
--- /dev/null
+++ b/src/dftracer/utils/core/rocksdb/database.cpp
@@ -0,0 +1,275 @@
+#include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/core/env.h>
+#include <dftracer/utils/core/rocksdb/database.h>
+#include <dftracer/utils/core/rocksdb/filesystem.h>
+#include <rocksdb/slice.h>
+
+#include <algorithm>
+#include <atomic>
+#include <cstdlib>
+#include <stdexcept>
+#include <utility>
+
+namespace dftracer::utils::rocksdb {
+
+namespace {
+
+std::atomic<bool>& process_exiting_flag() {
+    static std::atomic<bool> flag{false};
+    return flag;
+}
+
+const ::rocksdb::ReadOptions& read_options() {
+    static const ::rocksdb::ReadOptions options;
+    return options;
+}
+
+const ::rocksdb::WriteOptions& write_options() {
+    static const ::rocksdb::WriteOptions options;
+    return options;
+}
+
+void cleanup_failed_open(::rocksdb::DB*& db,
+                         std::vector<::rocksdb::ColumnFamilyHandle*>& handles) {
+    if (db != nullptr) {
+        for (auto* handle : handles) {
+            if (handle != nullptr) {
+                db->DestroyColumnFamilyHandle(handle);
+            }
+        }
+        static_cast<void>(db->Close());
+        delete db;
+        db = nullptr;
+    }
+    handles.clear();
+}
+
+}  // namespace
+
+void mark_process_exiting_for_rocksdb() {
+    process_exiting_flag().store(true, std::memory_order_relaxed);
+}
+
+RocksDatabase::RocksDatabase() = default;
+
+RocksDatabase::RocksDatabase(const std::string& db_path, OpenMode open_mode) {
+    open(db_path, open_mode);
+}
+
+RocksDatabase::~RocksDatabase() { close(); }
+
+RocksDatabase::RocksDatabase(RocksDatabase&& other) noexcept
+    : db_path_(std::move(other.db_path_)),
+      open_mode_(other.open_mode_),
+      file_system_(std::move(other.file_system_)),
+      env_(std::move(other.env_)),
+      db_(std::exchange(other.db_, nullptr)),
+      column_families_(std::move(other.column_families_)) {}
+
+RocksDatabase& RocksDatabase::operator=(RocksDatabase&& other) noexcept {
+    if (this != &other) {
+        close();
+        db_path_ = std::move(other.db_path_);
+        open_mode_ = other.open_mode_;
+        file_system_ = std::move(other.file_system_);
+        env_ = std::move(other.env_);
+        db_ = std::exchange(other.db_, nullptr);
+        column_families_ = std::move(other.column_families_);
+    }
+    return *this;
+}
+
+std::vector<std::string> RocksDatabase::default_column_families() {
+    return {"default",    "checkpoints", "metadata",   "chunk_bloom",
+            "file_bloom", "chunk_stats", "dimensions", "chunk_dim_stats",
+            "manifest",   "provenance",  "archives",   "tar_files"};
+}
+
+::rocksdb::Options RocksDatabase::default_options() {
+    ::rocksdb::Options options;
+    options.create_if_missing = true;
+    options.create_missing_column_families = true;
+    options.allow_concurrent_memtable_write = true;
+    options.enable_pipelined_write = true;
+    options.max_open_files = Env::rocksdb_max_open_files();
+    return options;
+}
+
+::rocksdb::ColumnFamilyOptions RocksDatabase::default_column_family_options() {
+    ::rocksdb::ColumnFamilyOptions options;
+    options.compression = ::rocksdb::kLZ4Compression;
+    options.bottommost_compression = ::rocksdb::kZlibCompression;
+    return options;
+}
+
+bool RocksDatabase::open(const std::string& db_path, OpenMode open_mode) {
+    close();
+    db_path_ = db_path;
+    open_mode_ = open_mode;
+
+    std::error_code ec;
+    if (open_mode_ == OpenMode::ReadWrite) {
+        fs::create_directories(fs::path(db_path_), ec);
+    }
+
+    auto db_options = default_options();
+    if (open_mode_ == OpenMode::ReadOnly) {
+        db_options.create_if_missing = false;
+        db_options.create_missing_column_families = false;
+    }
+    file_system_ = make_dftracer_file_system();
+    env_ = make_dftracer_env(file_system_);
+    db_options.env = env_.get();
+    auto cf_options = default_column_family_options();
+
+    std::vector<std::string> column_family_names;
+    auto list_status = ::rocksdb::DB::ListColumnFamilies(db_options, db_path_,
+                                                         &column_family_names);
+    if (!list_status.ok()) {
+        if (open_mode_ == OpenMode::ReadOnly) {
+            throw std::runtime_error(
+                "Failed to list RocksDB column families at '" + db_path_ +
+                "': " + list_status.ToString());
+        }
+        column_family_names = default_column_families();
+    } else {
+        if (open_mode_ == OpenMode::ReadWrite) {
+            for (const auto& name : default_column_families()) {
+                if (std::find(column_family_names.begin(),
+                              column_family_names.end(),
+                              name) == column_family_names.end()) {
+                    column_family_names.push_back(name);
+                }
+            }
+        }
+    }
+
+    std::vector<::rocksdb::ColumnFamilyDescriptor> descriptors;
+    descriptors.reserve(column_family_names.size());
+    for (const auto& name : column_family_names) {
+        descriptors.emplace_back(name, cf_options);
+    }
+
+    std::vector<::rocksdb::ColumnFamilyHandle*> handles;
+    auto status =
+        open_mode_ == OpenMode::ReadOnly
+            ? ::rocksdb::DB::OpenForReadOnly(db_options, db_path_, descriptors,
+                                             &handles, &db_, false)
+            : ::rocksdb::DB::Open(db_options, db_path_, descriptors, &handles,
+                                  &db_);
+    if (!status.ok()) {
+        cleanup_failed_open(db_, handles);
+        throw std::runtime_error("Failed to open RocksDB at '" + db_path_ +
+                                 "': " + status.ToString());
+    }
+
+    column_families_.clear();
+    for (std::size_t i = 0; i < descriptors.size(); ++i) {
+        column_families_.emplace(descriptors[i].name, handles[i]);
+    }
+
+    return true;
+}
+
+void RocksDatabase::close() {
+    if (db_ == nullptr) {
+        column_families_.clear();
+        return;
+    }
+
+    if (process_exiting_flag().load(std::memory_order_relaxed)) {
+        db_ = nullptr;
+        column_families_.clear();
+        env_.reset();
+        file_system_.reset();
+        db_path_.clear();
+        return;
+    }
+
+    for (auto& entry : column_families_) {
+        if (entry.second != nullptr) {
+            db_->DestroyColumnFamilyHandle(entry.second);
+            entry.second = nullptr;
+        }
+    }
+    column_families_.clear();
+
+    auto* db = db_;
+    db_ = nullptr;
+    static_cast<void>(db->Close());
+    delete db;
+    env_.reset();
+    file_system_.reset();
+    db_path_.clear();
+}
+
+bool RocksDatabase::is_open() const noexcept { return db_ != nullptr; }
+
+bool RocksDatabase::is_read_only() const noexcept {
+    return open_mode_ == OpenMode::ReadOnly;
+}
+
+const std::string& RocksDatabase::path() const noexcept { return db_path_; }
+
+::rocksdb::DB* RocksDatabase::get() const noexcept { return db_; }
+
+::rocksdb::ColumnFamilyHandle* RocksDatabase::column_family_handle(
+    std::string_view column_family) const {
+    const auto name = column_family.empty() ? std::string("default")
+                                            : std::string(column_family);
+    const auto it = column_families_.find(name);
+    if (it == column_families_.end() || it->second == nullptr) {
+        throw std::invalid_argument("Unknown RocksDB column family: " + name);
+    }
+    return it->second;
+}
+
+::rocksdb::Status RocksDatabase::put(std::string_view key,
+                                     std::string_view value,
+                                     std::string_view column_family) {
+    return db_->Put(write_options(), column_family_handle(column_family),
+                    ::rocksdb::Slice(key.data(), key.size()),
+                    ::rocksdb::Slice(value.data(), value.size()));
+}
+
+::rocksdb::Status RocksDatabase::get(std::string_view key, std::string* value,
+                                     std::string_view column_family) const {
+    return db_->Get(read_options(), column_family_handle(column_family),
+                    ::rocksdb::Slice(key.data(), key.size()), value);
+}
+
+::rocksdb::Status RocksDatabase::del(std::string_view key,
+                                     std::string_view column_family) {
+    return db_->Delete(write_options(), column_family_handle(column_family),
+                       ::rocksdb::Slice(key.data(), key.size()));
+}
+
+::rocksdb::Status RocksDatabase::put(Batch& batch,
+                                     std::string_view column_family,
+                                     std::string_view key,
+                                     std::string_view value) {
+    return batch.Put(column_family_handle(column_family),
+                     ::rocksdb::Slice(key.data(), key.size()),
+                     ::rocksdb::Slice(value.data(), value.size()));
+}
+
+::rocksdb::Status RocksDatabase::del(Batch& batch,
+                                     std::string_view column_family,
+                                     std::string_view key) {
+    return batch.Delete(column_family_handle(column_family),
+                        ::rocksdb::Slice(key.data(), key.size()));
+}
+
+RocksDatabase::Batch RocksDatabase::begin_batch() const { return Batch(); }
+
+::rocksdb::Status RocksDatabase::commit_batch(Batch& batch) {
+    return db_->Write(write_options(), &batch);
+}
+
+std::unique_ptr<::rocksdb::Iterator> RocksDatabase::new_iterator(
+    std::string_view column_family) const {
+    return std::unique_ptr<::rocksdb::Iterator>(
+        db_->NewIterator(read_options(), column_family_handle(column_family)));
+}
+
+}  // namespace dftracer::utils::rocksdb
diff --git a/src/dftracer/utils/core/rocksdb/db_manager.cpp b/src/dftracer/utils/core/rocksdb/db_manager.cpp
new file mode 100644
index 00000000..a9ae5c57
--- /dev/null
+++ b/src/dftracer/utils/core/rocksdb/db_manager.cpp
@@ -0,0 +1,143 @@
+#include <dftracer/utils/core/rocksdb/db_manager.h>
+
+#include <stdexcept>
+
+namespace dftracer::utils::rocksdb {
+
+RocksDBManager& RocksDBManager::instance() {
+    static RocksDBManager manager;
+    return manager;
+}
+
+std::shared_ptr<RocksDatabase> RocksDBManager::get_or_open(
+    const std::string& db_path, RocksDatabase::OpenMode open_mode) {
+    for (;;) {
+        bool needs_upgrade = false;
+        bool do_open = false;
+
+        {
+            std::unique_lock<std::mutex> lock(mutex_);
+
+            for (;;) {
+                if (auto it = databases_.find(db_path);
+                    it != databases_.end()) {
+                    auto current = it->second.lock();
+                    if (!current) {
+                        databases_.erase(it);
+                        continue;
+                    }
+                    if (!(current->is_read_only() &&
+                          open_mode == RocksDatabase::OpenMode::ReadWrite)) {
+                        return current;
+                    }
+
+                    if (opening_.contains(db_path)) {
+                        cv_.wait(lock,
+                                 [&] { return !opening_.contains(db_path); });
+                        continue;
+                    }
+
+                    if (current.use_count() != 1) {
+                        throw std::runtime_error(
+                            "Cannot upgrade RocksDB instance at '" + db_path +
+                            "' from read-only to read-write while it is still "
+                            "in use");
+                    }
+
+                    needs_upgrade = true;
+                    opening_.insert(db_path);
+                    do_open = true;
+                    break;
+                }
+
+                if (opening_.contains(db_path)) {
+                    cv_.wait(lock, [&] { return !opening_.contains(db_path); });
+                    continue;
+                }
+
+                opening_.insert(db_path);
+                do_open = true;
+                break;
+            }
+        }
+
+        if (!do_open) {
+            continue;
+        }
+
+        std::shared_ptr<RocksDatabase> database;
+        try {
+            database = std::make_shared<RocksDatabase>(
+                db_path,
+                needs_upgrade ? RocksDatabase::OpenMode::ReadWrite : open_mode);
+        } catch (...) {
+            std::lock_guard<std::mutex> lock(mutex_);
+            opening_.erase(db_path);
+            cv_.notify_all();
+            throw;
+        }
+
+        {
+            std::lock_guard<std::mutex> lock(mutex_);
+            auto it = databases_.find(db_path);
+
+            if (it == databases_.end()) {
+                databases_[db_path] = database;
+                opening_.erase(db_path);
+                cv_.notify_all();
+                return database;
+            }
+
+            auto current = it->second.lock();
+            if (!current) {
+                databases_[db_path] = database;
+                opening_.erase(db_path);
+                cv_.notify_all();
+                return database;
+            }
+
+            if (!(current->is_read_only() &&
+                  open_mode == RocksDatabase::OpenMode::ReadWrite)) {
+                opening_.erase(db_path);
+                cv_.notify_all();
+                return current;
+            }
+
+            if (current.use_count() != 1) {
+                opening_.erase(db_path);
+                cv_.notify_all();
+                throw std::runtime_error(
+                    "Cannot upgrade RocksDB instance at '" + db_path +
+                    "' from read-only to read-write while it is still in use");
+            }
+
+            databases_[db_path] = database;
+            opening_.erase(db_path);
+            cv_.notify_all();
+            return database;
+        }
+    }
+}
+
+void RocksDBManager::reset(const std::string& db_path) {
+    std::unique_lock<std::mutex> lock(mutex_);
+
+    cv_.wait(lock, [&] { return !opening_.contains(db_path); });
+
+    auto it = databases_.find(db_path);
+    if (it == databases_.end()) {
+        return;
+    }
+
+    databases_.erase(it);
+}
+
+void RocksDBManager::shutdown() {
+    {
+        std::unique_lock<std::mutex> lock(mutex_);
+        cv_.wait(lock, [&] { return opening_.empty(); });
+        databases_.clear();
+    }
+}
+
+}  // namespace dftracer::utils::rocksdb
diff --git a/src/dftracer/utils/core/rocksdb/filesystem.cpp b/src/dftracer/utils/core/rocksdb/filesystem.cpp
new file mode 100644
index 00000000..1d31f791
--- /dev/null
+++ b/src/dftracer/utils/core/rocksdb/filesystem.cpp
@@ -0,0 +1,849 @@
+#include <dftracer/utils/core/common/object_pool.h>
+#include <dftracer/utils/core/io/io_backend.h>
+#include <dftracer/utils/core/io/io_thread_pool.h>
+#include <dftracer/utils/core/pipeline/executor.h>
+#include <dftracer/utils/core/rocksdb/filesystem.h>
+#include <fcntl.h>
+#include <rocksdb/env.h>
+#include <rocksdb/file_system.h>
+#include <rocksdb/io_status.h>
+#include <rocksdb/slice.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cerrno>
+#include <condition_variable>
+#include <cstdint>
+#include <cstring>
+#include <mutex>
+#include <string>
+#include <string_view>
+
+namespace dftracer::utils::rocksdb {
+
+namespace {
+
+io::IoBackend* current_io_backend() {
+    auto* executor = Executor::current();
+    if (executor == nullptr || !executor->has_io_backend()) {
+        return nullptr;
+    }
+    return &executor->io_backend();
+}
+
+class DfTracerFileSystem;
+
+struct AsyncReadHandle {
+    explicit AsyncReadHandle(DfTracerFileSystem* owner_) : owner(owner_) {}
+
+    static void* operator new(std::size_t size) {
+        return ObjectPool::instance().allocate(size);
+    }
+
+    static void operator delete(void* ptr, std::size_t size) noexcept {
+        ObjectPool::instance().deallocate(ptr, size);
+    }
+
+    DfTracerFileSystem* owner;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool finished = false;
+    bool callback_delivered = false;
+    bool aborted = false;
+    bool running = false;
+    std::string path;
+    std::uint64_t offset = 0;
+    std::size_t len = 0;
+    char* scratch = nullptr;
+    ::rocksdb::Slice result;
+    ::rocksdb::IOStatus status;
+    std::function<void(::rocksdb::FSReadRequest&, void*)> callback;
+    void* callback_arg = nullptr;
+};
+
+::rocksdb::IOStatus io_error(std::string_view op, std::string_view path) {
+    return ::rocksdb::IOStatus::IOError(
+        std::string(path), std::string(op) + ": " + std::strerror(errno));
+}
+
+ssize_t pread_sync(int fd, void* buf, std::size_t len, off_t offset) {
+    if (auto* backend = current_io_backend(); backend != nullptr) {
+        return backend->submit_read_sync(fd, buf, len, offset);
+    }
+    return ::pread(fd, buf, len, offset);
+}
+
+ssize_t pwrite_sync(int fd, const void* buf, std::size_t len, off_t offset) {
+    if (auto* backend = current_io_backend(); backend != nullptr) {
+        return backend->submit_write_sync(fd, buf, len, offset);
+    }
+    return ::pwrite(fd, buf, len, offset);
+}
+
+int fsync_sync(int fd) {
+    if (auto* backend = current_io_backend(); backend != nullptr) {
+        return backend->submit_fsync_sync(fd);
+    }
+    return ::fsync(fd);
+}
+
+int ftruncate_sync(int fd, off_t length) {
+    if (auto* backend = current_io_backend(); backend != nullptr) {
+        return backend->submit_ftruncate_sync(fd, length);
+    }
+    return ::ftruncate(fd, length);
+}
+
+int fstat_sync(int fd, struct stat* st) {
+    if (auto* backend = current_io_backend(); backend != nullptr) {
+        return backend->submit_fstat_sync(fd, st);
+    }
+    return ::fstat(fd, st);
+}
+
+class DfTracerSequentialFile final : public ::rocksdb::FSSequentialFile {
+   public:
+    DfTracerSequentialFile(std::string path, int fd)
+        : path_(std::move(path)), fd_(fd) {}
+
+    ~DfTracerSequentialFile() override {
+        if (fd_ >= 0) {
+            ::close(fd_);
+        }
+    }
+
+    ::rocksdb::IOStatus Read(std::size_t n, const ::rocksdb::IOOptions&,
+                             ::rocksdb::Slice* result, char* scratch,
+                             ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        const ssize_t bytes =
+            pread_sync(fd_, scratch, n, static_cast<off_t>(offset_));
+        if (bytes < 0) {
+            return io_error("read", path_);
+        }
+        offset_ += static_cast<std::uint64_t>(bytes);
+        *result = ::rocksdb::Slice(scratch, static_cast<std::size_t>(bytes));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus Skip(std::uint64_t n) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        offset_ += n;
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus InvalidateCache(std::size_t, std::size_t) override {
+        return ::rocksdb::IOStatus::OK();
+    }
+
+   private:
+    std::string path_;
+    int fd_;
+    std::uint64_t offset_ = 0;
+    std::mutex mutex_;
+};
+
+class DfTracerRandomAccessFile final : public ::rocksdb::FSRandomAccessFile {
+   public:
+    DfTracerRandomAccessFile(DfTracerFileSystem* owner, std::string path,
+                             int fd)
+        : owner_(owner), path_(std::move(path)), fd_(fd) {}
+
+    ~DfTracerRandomAccessFile() override {
+        if (fd_ >= 0) {
+            ::close(fd_);
+        }
+    }
+
+    ::rocksdb::IOStatus Read(std::uint64_t offset, std::size_t n,
+                             const ::rocksdb::IOOptions&,
+                             ::rocksdb::Slice* result, char* scratch,
+                             ::rocksdb::IODebugContext*) const override {
+        const ssize_t bytes =
+            pread_sync(fd_, scratch, n, static_cast<off_t>(offset));
+        if (bytes < 0) {
+            return io_error("pread", path_);
+        }
+        *result = ::rocksdb::Slice(scratch, static_cast<std::size_t>(bytes));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus Prefetch(std::uint64_t, std::size_t,
+                                 const ::rocksdb::IOOptions&,
+                                 ::rocksdb::IODebugContext*) override {
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus ReadAsync(
+        ::rocksdb::FSReadRequest& req, const ::rocksdb::IOOptions& opts,
+        std::function<void(::rocksdb::FSReadRequest&, void*)> cb, void* cb_arg,
+        void** io_handle, ::rocksdb::IOHandleDeleter* del_fn,
+        ::rocksdb::IODebugContext* dbg) override;
+
+    ::rocksdb::IOStatus InvalidateCache(std::size_t, std::size_t) override {
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus GetFileSize(std::uint64_t* result) override {
+        struct stat st{};
+        if (fstat_sync(fd_, &st) != 0) {
+            return io_error("fstat", path_);
+        }
+        *result = static_cast<std::uint64_t>(st.st_size);
+        return ::rocksdb::IOStatus::OK();
+    }
+
+   private:
+    DfTracerFileSystem* owner_;
+    std::string path_;
+    int fd_;
+};
+
+class DfTracerWritableFile final : public ::rocksdb::FSWritableFile {
+   public:
+    using ::rocksdb::FSWritableFile::Append;
+    using ::rocksdb::FSWritableFile::PositionedAppend;
+
+    DfTracerWritableFile(std::string path, int fd,
+                         const ::rocksdb::FileOptions& options)
+        : ::rocksdb::FSWritableFile(options), path_(std::move(path)), fd_(fd) {
+        struct stat st{};
+        if (fstat_sync(fd_, &st) == 0) {
+            size_ = static_cast<std::uint64_t>(st.st_size);
+        }
+    }
+
+    ~DfTracerWritableFile() override {
+        if (fd_ >= 0) {
+            static_cast<void>(close_fd());
+        }
+    }
+
+    ::rocksdb::IOStatus Append(const ::rocksdb::Slice& data,
+                               const ::rocksdb::IOOptions&,
+                               ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return write_at(data, size_);
+    }
+
+    ::rocksdb::IOStatus PositionedAppend(const ::rocksdb::Slice& data,
+                                         std::uint64_t offset,
+                                         const ::rocksdb::IOOptions&,
+                                         ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return write_at(data, offset);
+    }
+
+    ::rocksdb::IOStatus Truncate(std::uint64_t size,
+                                 const ::rocksdb::IOOptions&,
+                                 ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        if (ftruncate_sync(fd_, static_cast<off_t>(size)) != 0) {
+            return io_error("ftruncate", path_);
+        }
+        size_ = size;
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus Close(const ::rocksdb::IOOptions&,
+                              ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return close_fd();
+    }
+
+    ::rocksdb::IOStatus Flush(const ::rocksdb::IOOptions&,
+                              ::rocksdb::IODebugContext*) override {
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus Sync(const ::rocksdb::IOOptions&,
+                             ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        if (fd_ < 0) {
+            return ::rocksdb::IOStatus::OK();
+        }
+        if (fsync_sync(fd_) != 0) {
+            return io_error("fsync", path_);
+        }
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    bool IsSyncThreadSafe() const override { return true; }
+
+    std::uint64_t GetFileSize(const ::rocksdb::IOOptions&,
+                              ::rocksdb::IODebugContext*) override {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return size_;
+    }
+
+    ::rocksdb::IOStatus InvalidateCache(std::size_t, std::size_t) override {
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus RangeSync(std::uint64_t, std::uint64_t,
+                                  const ::rocksdb::IOOptions& options,
+                                  ::rocksdb::IODebugContext* dbg) override {
+        return Sync(options, dbg);
+    }
+
+   private:
+    ::rocksdb::IOStatus write_at(const ::rocksdb::Slice& data,
+                                 std::uint64_t offset) {
+        const ssize_t bytes = pwrite_sync(fd_, data.data(), data.size(),
+                                          static_cast<off_t>(offset));
+        if (bytes < 0 || static_cast<std::size_t>(bytes) != data.size()) {
+            return io_error("pwrite", path_);
+        }
+        size_ = std::max(size_, offset + static_cast<std::uint64_t>(bytes));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus close_fd() {
+        if (fd_ < 0) {
+            return ::rocksdb::IOStatus::OK();
+        }
+        if (::close(fd_) != 0) {
+            return io_error("close", path_);
+        }
+        fd_ = -1;
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    std::string path_;
+    int fd_;
+    std::uint64_t size_ = 0;
+    mutable std::mutex mutex_;
+};
+
+class LocalFileSystemWrapper : public ::rocksdb::FileSystem {
+   public:
+    explicit LocalFileSystemWrapper(
+        const std::shared_ptr<::rocksdb::FileSystem>& target)
+        : target_(target) {}
+
+    ::rocksdb::FileSystem* target() const { return target_.get(); }
+
+    ::rocksdb::IOStatus NewSequentialFile(
+        const std::string& f, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSSequentialFile>* r,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewSequentialFile(f, file_opts, r, dbg);
+    }
+
+    ::rocksdb::IOStatus NewRandomAccessFile(
+        const std::string& f, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSRandomAccessFile>* r,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewRandomAccessFile(f, file_opts, r, dbg);
+    }
+
+    ::rocksdb::IOStatus NewWritableFile(
+        const std::string& f, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* r,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewWritableFile(f, file_opts, r, dbg);
+    }
+
+    ::rocksdb::IOStatus ReopenWritableFile(
+        const std::string& fname, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* result,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->ReopenWritableFile(fname, file_opts, result, dbg);
+    }
+
+    ::rocksdb::IOStatus ReuseWritableFile(
+        const std::string& fname, const std::string& old_fname,
+        const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* r,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->ReuseWritableFile(fname, old_fname, file_opts, r, dbg);
+    }
+
+    ::rocksdb::IOStatus NewRandomRWFile(
+        const std::string& fname, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSRandomRWFile>* result,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewRandomRWFile(fname, file_opts, result, dbg);
+    }
+
+    ::rocksdb::IOStatus NewMemoryMappedFileBuffer(
+        const std::string& fname,
+        std::unique_ptr<::rocksdb::MemoryMappedFileBuffer>* result) override {
+        return target_->NewMemoryMappedFileBuffer(fname, result);
+    }
+
+    ::rocksdb::IOStatus NewDirectory(
+        const std::string& name, const ::rocksdb::IOOptions& io_opts,
+        std::unique_ptr<::rocksdb::FSDirectory>* result,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewDirectory(name, io_opts, result, dbg);
+    }
+
+    ::rocksdb::IOStatus FileExists(const std::string& f,
+                                   const ::rocksdb::IOOptions& io_opts,
+                                   ::rocksdb::IODebugContext* dbg) override {
+        return target_->FileExists(f, io_opts, dbg);
+    }
+
+    ::rocksdb::IOStatus GetChildren(const std::string& dir,
+                                    const ::rocksdb::IOOptions& io_opts,
+                                    std::vector<std::string>* r,
+                                    ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetChildren(dir, io_opts, r, dbg);
+    }
+
+    ::rocksdb::IOStatus GetChildrenFileAttributes(
+        const std::string& dir, const ::rocksdb::IOOptions& options,
+        std::vector<::rocksdb::FileAttributes>* result,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetChildrenFileAttributes(dir, options, result, dbg);
+    }
+
+    ::rocksdb::IOStatus DeleteFile(const std::string& f,
+                                   const ::rocksdb::IOOptions& options,
+                                   ::rocksdb::IODebugContext* dbg) override {
+        return target_->DeleteFile(f, options, dbg);
+    }
+
+    ::rocksdb::IOStatus Truncate(const std::string& fname, size_t size,
+                                 const ::rocksdb::IOOptions& options,
+                                 ::rocksdb::IODebugContext* dbg) override {
+        return target_->Truncate(fname, size, options, dbg);
+    }
+
+    ::rocksdb::IOStatus CreateDir(const std::string& d,
+                                  const ::rocksdb::IOOptions& options,
+                                  ::rocksdb::IODebugContext* dbg) override {
+        return target_->CreateDir(d, options, dbg);
+    }
+
+    ::rocksdb::IOStatus CreateDirIfMissing(
+        const std::string& d, const ::rocksdb::IOOptions& options,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->CreateDirIfMissing(d, options, dbg);
+    }
+
+    ::rocksdb::IOStatus DeleteDir(const std::string& d,
+                                  const ::rocksdb::IOOptions& options,
+                                  ::rocksdb::IODebugContext* dbg) override {
+        return target_->DeleteDir(d, options, dbg);
+    }
+
+    ::rocksdb::IOStatus GetFileSize(const std::string& f,
+                                    const ::rocksdb::IOOptions& options,
+                                    uint64_t* s,
+                                    ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetFileSize(f, options, s, dbg);
+    }
+
+    ::rocksdb::IOStatus GetFileModificationTime(
+        const std::string& fname, const ::rocksdb::IOOptions& options,
+        uint64_t* file_mtime, ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetFileModificationTime(fname, options, file_mtime,
+                                                dbg);
+    }
+
+    ::rocksdb::IOStatus GetAbsolutePath(
+        const std::string& db_path, const ::rocksdb::IOOptions& options,
+        std::string* output_path, ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetAbsolutePath(db_path, options, output_path, dbg);
+    }
+
+    ::rocksdb::IOStatus RenameFile(const std::string& s, const std::string& t,
+                                   const ::rocksdb::IOOptions& options,
+                                   ::rocksdb::IODebugContext* dbg) override {
+        return target_->RenameFile(s, t, options, dbg);
+    }
+
+    ::rocksdb::IOStatus LinkFile(const std::string& s, const std::string& t,
+                                 const ::rocksdb::IOOptions& options,
+                                 ::rocksdb::IODebugContext* dbg) override {
+        return target_->LinkFile(s, t, options, dbg);
+    }
+
+    ::rocksdb::IOStatus NumFileLinks(const std::string& fname,
+                                     const ::rocksdb::IOOptions& options,
+                                     uint64_t* count,
+                                     ::rocksdb::IODebugContext* dbg) override {
+        return target_->NumFileLinks(fname, options, count, dbg);
+    }
+
+    ::rocksdb::IOStatus AreFilesSame(const std::string& first,
+                                     const std::string& second,
+                                     const ::rocksdb::IOOptions& options,
+                                     bool* res,
+                                     ::rocksdb::IODebugContext* dbg) override {
+        return target_->AreFilesSame(first, second, options, res, dbg);
+    }
+
+    ::rocksdb::IOStatus LockFile(const std::string& f,
+                                 const ::rocksdb::IOOptions& options,
+                                 ::rocksdb::FileLock** l,
+                                 ::rocksdb::IODebugContext* dbg) override {
+        return target_->LockFile(f, options, l, dbg);
+    }
+
+    ::rocksdb::IOStatus UnlockFile(::rocksdb::FileLock* l,
+                                   const ::rocksdb::IOOptions& options,
+                                   ::rocksdb::IODebugContext* dbg) override {
+        return target_->UnlockFile(l, options, dbg);
+    }
+
+    ::rocksdb::IOStatus GetTestDirectory(
+        const ::rocksdb::IOOptions& options, std::string* path,
+        ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetTestDirectory(options, path, dbg);
+    }
+
+    ::rocksdb::IOStatus NewLogger(const std::string& fname,
+                                  const ::rocksdb::IOOptions& options,
+                                  std::shared_ptr<::rocksdb::Logger>* result,
+                                  ::rocksdb::IODebugContext* dbg) override {
+        return target_->NewLogger(fname, options, result, dbg);
+    }
+
+    void SanitizeFileOptions(::rocksdb::FileOptions* opts) const override {
+        target_->SanitizeFileOptions(opts);
+    }
+
+    ::rocksdb::FileOptions OptimizeForLogRead(
+        const ::rocksdb::FileOptions& file_options) const override {
+        return target_->OptimizeForLogRead(file_options);
+    }
+
+    ::rocksdb::FileOptions OptimizeForManifestRead(
+        const ::rocksdb::FileOptions& file_options) const override {
+        return target_->OptimizeForManifestRead(file_options);
+    }
+
+    ::rocksdb::FileOptions OptimizeForLogWrite(
+        const ::rocksdb::FileOptions& file_options,
+        const ::rocksdb::DBOptions& db_options) const override {
+        return target_->OptimizeForLogWrite(file_options, db_options);
+    }
+
+    ::rocksdb::FileOptions OptimizeForManifestWrite(
+        const ::rocksdb::FileOptions& file_options) const override {
+        return target_->OptimizeForManifestWrite(file_options);
+    }
+
+    ::rocksdb::FileOptions OptimizeForCompactionTableWrite(
+        const ::rocksdb::FileOptions& file_options,
+        const ::rocksdb::ImmutableDBOptions& immutable_opts) const override {
+        return target_->OptimizeForCompactionTableWrite(file_options,
+                                                        immutable_opts);
+    }
+
+    ::rocksdb::FileOptions OptimizeForCompactionTableRead(
+        const ::rocksdb::FileOptions& file_options,
+        const ::rocksdb::ImmutableDBOptions& db_options) const override {
+        return target_->OptimizeForCompactionTableRead(file_options,
+                                                       db_options);
+    }
+
+    ::rocksdb::FileOptions OptimizeForBlobFileRead(
+        const ::rocksdb::FileOptions& file_options,
+        const ::rocksdb::ImmutableDBOptions& db_options) const override {
+        return target_->OptimizeForBlobFileRead(file_options, db_options);
+    }
+
+    ::rocksdb::IOStatus GetFreeSpace(const std::string& path,
+                                     const ::rocksdb::IOOptions& options,
+                                     uint64_t* diskfree,
+                                     ::rocksdb::IODebugContext* dbg) override {
+        return target_->GetFreeSpace(path, options, diskfree, dbg);
+    }
+
+    ::rocksdb::IOStatus IsDirectory(const std::string& path,
+                                    const ::rocksdb::IOOptions& options,
+                                    bool* is_dir,
+                                    ::rocksdb::IODebugContext* dbg) override {
+        return target_->IsDirectory(path, options, is_dir, dbg);
+    }
+
+    const ::rocksdb::Customizable* Inner() const override {
+        return target_.get();
+    }
+
+    ::rocksdb::Status PrepareOptions(
+        const ::rocksdb::ConfigOptions& options) override {
+        return target_->PrepareOptions(options);
+    }
+
+    std::string SerializeOptions(const ::rocksdb::ConfigOptions& config_options,
+                                 const std::string& header) const override {
+        return ::rocksdb::FileSystem::SerializeOptions(config_options, header);
+    }
+
+    ::rocksdb::IOStatus Poll(std::vector<void*>& io_handles,
+                             size_t min_completions) override {
+        return target_->Poll(io_handles, min_completions);
+    }
+
+    ::rocksdb::IOStatus AbortIO(std::vector<void*>& io_handles) override {
+        return target_->AbortIO(io_handles);
+    }
+
+    void DiscardCacheForDirectory(const std::string& path) override {
+        target_->DiscardCacheForDirectory(path);
+    }
+
+    void SupportedOps(int64_t& supported_ops) override {
+        target_->SupportedOps(supported_ops);
+    }
+
+   protected:
+    std::shared_ptr<::rocksdb::FileSystem> target_;
+};
+
+class DfTracerFileSystem final : public LocalFileSystemWrapper {
+   public:
+    explicit DfTracerFileSystem(
+        const std::shared_ptr<::rocksdb::FileSystem>& target)
+        : LocalFileSystemWrapper(target), fallback_pool_(4) {
+        fallback_pool_.start();
+    }
+
+    ~DfTracerFileSystem() override { fallback_pool_.stop(); }
+
+    static const char* kClassName() { return "DfTracerFileSystem"; }
+
+    const char* Name() const override { return kClassName(); }
+
+    bool IsInstanceOf(const std::string& name) const override {
+        return name == kClassName() ||
+               LocalFileSystemWrapper::IsInstanceOf(name);
+    }
+
+    void SupportedOps(int64_t& supported_ops) override {
+        supported_ops = 0;
+        supported_ops |= (1 << ::rocksdb::FSSupportedOps::kAsyncIO);
+        supported_ops |= (1 << ::rocksdb::FSSupportedOps::kFSPrefetch);
+    }
+
+    ::rocksdb::IOStatus NewSequentialFile(
+        const std::string& fname, const ::rocksdb::FileOptions&,
+        std::unique_ptr<::rocksdb::FSSequentialFile>* result,
+        ::rocksdb::IODebugContext*) override {
+        int fd = ::open(fname.c_str(), O_RDONLY | O_CLOEXEC);
+        if (fd < 0) {
+            return io_error("open", fname);
+        }
+        result->reset(new DfTracerSequentialFile(fname, fd));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus NewRandomAccessFile(
+        const std::string& fname, const ::rocksdb::FileOptions&,
+        std::unique_ptr<::rocksdb::FSRandomAccessFile>* result,
+        ::rocksdb::IODebugContext*) override {
+        int fd = ::open(fname.c_str(), O_RDONLY | O_CLOEXEC);
+        if (fd < 0) {
+            return io_error("open", fname);
+        }
+        result->reset(new DfTracerRandomAccessFile(this, fname, fd));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus NewWritableFile(
+        const std::string& fname, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* result,
+        ::rocksdb::IODebugContext*) override {
+        int fd =
+            ::open(fname.c_str(), O_CREAT | O_TRUNC | O_RDWR | O_CLOEXEC, 0644);
+        if (fd < 0) {
+            return io_error("open", fname);
+        }
+        result->reset(new DfTracerWritableFile(fname, fd, file_opts));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus ReopenWritableFile(
+        const std::string& fname, const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* result,
+        ::rocksdb::IODebugContext*) override {
+        int fd = ::open(fname.c_str(), O_CREAT | O_RDWR | O_CLOEXEC, 0644);
+        if (fd < 0) {
+            return io_error("open", fname);
+        }
+        result->reset(new DfTracerWritableFile(fname, fd, file_opts));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus ReuseWritableFile(
+        const std::string& fname, const std::string& old_fname,
+        const ::rocksdb::FileOptions& file_opts,
+        std::unique_ptr<::rocksdb::FSWritableFile>* result,
+        ::rocksdb::IODebugContext*) override {
+        ::unlink(fname.c_str());
+        if (::rename(old_fname.c_str(), fname.c_str()) != 0) {
+            return io_error("rename", old_fname);
+        }
+        return ReopenWritableFile(fname, file_opts, result, nullptr);
+    }
+
+    ::rocksdb::IOStatus Poll(std::vector<void*>& io_handles,
+                             size_t min_completions) override {
+        const size_t target = std::min(min_completions, io_handles.size());
+        if (target == 0) {
+            return ::rocksdb::IOStatus::OK();
+        }
+        std::unique_lock<std::mutex> lock(completions_mutex_);
+        completions_cv_.wait(lock, [&] {
+            size_t completed = 0;
+            for (void* io_handle : io_handles) {
+                auto* handle = static_cast<AsyncReadHandle*>(io_handle);
+                std::lock_guard<std::mutex> handle_lock(handle->mutex);
+                if (handle->finished && !handle->callback_delivered) {
+                    ++completed;
+                }
+            }
+            return completed >= target;
+        });
+        lock.unlock();
+
+        for (void* io_handle : io_handles) {
+            auto* handle = static_cast<AsyncReadHandle*>(io_handle);
+            std::unique_lock<std::mutex> handle_lock(handle->mutex);
+            if (!handle->finished || handle->callback_delivered ||
+                handle->aborted) {
+                continue;
+            }
+            handle->callback_delivered = true;
+            auto callback = handle->callback;
+            auto callback_arg = handle->callback_arg;
+            ::rocksdb::FSReadRequest req;
+            req.offset = handle->offset;
+            req.len = handle->len;
+            req.scratch = handle->scratch;
+            req.result = handle->result;
+            req.status = handle->status;
+            handle_lock.unlock();
+            callback(req, callback_arg);
+        }
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    ::rocksdb::IOStatus AbortIO(std::vector<void*>& io_handles) override {
+        for (void* io_handle : io_handles) {
+            auto* handle = static_cast<AsyncReadHandle*>(io_handle);
+            std::lock_guard<std::mutex> lock(handle->mutex);
+            handle->aborted = true;
+        }
+
+        for (void* io_handle : io_handles) {
+            auto* handle = static_cast<AsyncReadHandle*>(io_handle);
+            std::unique_lock<std::mutex> lock(handle->mutex);
+            handle->cv.wait(lock, [&] { return handle->finished; });
+            handle->callback_delivered = true;
+        }
+
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    void submit_async_read(AsyncReadHandle* handle, int fd, std::string path,
+                           ::rocksdb::IODebugContext* dbg) {
+        {
+            std::lock_guard<std::mutex> lock(handle->mutex);
+            handle->running = true;
+            handle->path = path;
+        }
+        if (auto* backend = current_io_backend(); backend != nullptr) {
+            backend->submit_pread_callback(fd, handle->scratch, handle->len,
+                                           static_cast<off_t>(handle->offset),
+                                           &DfTracerFileSystem::on_pread_done,
+                                           handle);
+            return;
+        }
+
+        fallback_pool_.submit([this, handle, fd, path = std::move(path), dbg] {
+            ::rocksdb::Slice result;
+            auto status = read_async_impl(fd, path, handle->offset, handle->len,
+                                          &result, handle->scratch, dbg);
+            complete_async_read(handle, status, result);
+        });
+    }
+
+    static ::rocksdb::IOStatus read_async_impl(
+        int fd, std::string_view path, std::uint64_t offset, std::size_t n,
+        ::rocksdb::Slice* result, char* scratch, ::rocksdb::IODebugContext*) {
+        const ssize_t bytes =
+            ::pread(fd, scratch, n, static_cast<off_t>(offset));
+        if (bytes < 0) {
+            return io_error("pread", path);
+        }
+        *result = ::rocksdb::Slice(scratch, static_cast<std::size_t>(bytes));
+        return ::rocksdb::IOStatus::OK();
+    }
+
+    static void delete_async_read_handle(void* io_handle) {
+        delete static_cast<AsyncReadHandle*>(io_handle);
+    }
+
+   private:
+    void complete_async_read(AsyncReadHandle* handle,
+                             const ::rocksdb::IOStatus& status,
+                             const ::rocksdb::Slice& result) {
+        {
+            std::lock_guard<std::mutex> lock(handle->mutex);
+            handle->result = result;
+            handle->status = status;
+            handle->running = false;
+            handle->finished = true;
+        }
+        handle->cv.notify_all();
+
+        std::lock_guard<std::mutex> lock(completions_mutex_);
+        completions_cv_.notify_all();
+    }
+
+    static void on_pread_done(void* context, ssize_t result) noexcept {
+        auto* handle = static_cast<AsyncReadHandle*>(context);
+        ::rocksdb::IOStatus status = ::rocksdb::IOStatus::OK();
+        ::rocksdb::Slice slice;
+        if (result < 0) {
+            errno = static_cast<int>(-result);
+            status = io_error("pread", handle->path);
+        } else {
+            slice = ::rocksdb::Slice(handle->scratch,
+                                     static_cast<std::size_t>(result));
+        }
+        handle->owner->complete_async_read(handle, status, slice);
+    }
+
+    io::IoThreadPool fallback_pool_;
+    std::mutex completions_mutex_;
+    std::condition_variable completions_cv_;
+};
+
+::rocksdb::IOStatus DfTracerRandomAccessFile::ReadAsync(
+    ::rocksdb::FSReadRequest& req, const ::rocksdb::IOOptions&,
+    std::function<void(::rocksdb::FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, ::rocksdb::IOHandleDeleter* del_fn,
+    ::rocksdb::IODebugContext* dbg) {
+    auto* handle = new AsyncReadHandle(owner_);
+    handle->offset = req.offset;
+    handle->len = req.len;
+    handle->scratch = req.scratch;
+    handle->callback = std::move(cb);
+    handle->callback_arg = cb_arg;
+    *io_handle = static_cast<void*>(handle);
+    *del_fn = &DfTracerFileSystem::delete_async_read_handle;
+    owner_->submit_async_read(handle, fd_, path_, dbg);
+    return ::rocksdb::IOStatus::OK();
+}
+
+}  // namespace
+
+std::shared_ptr<::rocksdb::FileSystem> make_dftracer_file_system() {
+    return std::make_shared<DfTracerFileSystem>(
+        ::rocksdb::FileSystem::Default());
+}
+
+std::unique_ptr<::rocksdb::Env> make_dftracer_env(
+    const std::shared_ptr<::rocksdb::FileSystem>& file_system) {
+    return ::rocksdb::NewCompositeEnv(file_system);
+}
+
+}  // namespace dftracer::utils::rocksdb
diff --git a/src/dftracer/utils/core/rocksdb/key_codec.cpp b/src/dftracer/utils/core/rocksdb/key_codec.cpp
new file mode 100644
index 00000000..15240cdd
--- /dev/null
+++ b/src/dftracer/utils/core/rocksdb/key_codec.cpp
@@ -0,0 +1,88 @@
+#include <dftracer/utils/core/rocksdb/key_codec.h>
+
+#include <stdexcept>
+
+namespace dftracer::utils::rocksdb {
+
+namespace {
+
+template <typename T>
+T decode_big_endian(std::string_view bytes) {
+    if (bytes.size() != sizeof(T)) {
+        throw std::invalid_argument(
+            "KeyCodec: invalid big-endian integer width");
+    }
+
+    T value = 0;
+    for (unsigned char byte : bytes) {
+        value = static_cast<T>((value << 8U) | byte);
+    }
+    return value;
+}
+
+}  // namespace
+
+std::string KeyCodec::encode_be32(std::uint32_t value) {
+    std::string out;
+    out.reserve(sizeof(value));
+    append_be32(out, value);
+    return out;
+}
+
+std::string KeyCodec::encode_be64(std::uint64_t value) {
+    std::string out;
+    out.reserve(sizeof(value));
+    append_be64(out, value);
+    return out;
+}
+
+std::uint32_t KeyCodec::decode_be32(std::string_view bytes) {
+    return decode_big_endian<std::uint32_t>(bytes);
+}
+
+std::uint64_t KeyCodec::decode_be64(std::string_view bytes) {
+    return decode_big_endian<std::uint64_t>(bytes);
+}
+
+void KeyCodec::append_be32(std::string& out, std::uint32_t value) {
+    for (int shift = 24; shift >= 0; shift -= 8) {
+        out.push_back(static_cast<char>((value >> shift) & 0xFFU));
+    }
+}
+
+void KeyCodec::append_be64(std::string& out, std::uint64_t value) {
+    for (int shift = 56; shift >= 0; shift -= 8) {
+        out.push_back(static_cast<char>((value >> shift) & 0xFFU));
+    }
+}
+
+KeyBuilder& KeyBuilder::append_tag(std::string_view tag) {
+    key_.append(tag);
+    return *this;
+}
+
+KeyBuilder& KeyBuilder::append_separator() {
+    key_.push_back('\0');
+    return *this;
+}
+
+KeyBuilder& KeyBuilder::append_string(std::string_view value) {
+    key_.append(value);
+    return *this;
+}
+
+KeyBuilder& KeyBuilder::append_be32(std::uint32_t value) {
+    KeyCodec::append_be32(key_, value);
+    return *this;
+}
+
+KeyBuilder& KeyBuilder::append_be64(std::uint64_t value) {
+    KeyCodec::append_be64(key_, value);
+    return *this;
+}
+
+std::string KeyBuilder::build() const { return key_; }
+
+void KeyBuilder::clear() { key_.clear(); }
+
+}  // namespace dftracer::utils::rocksdb
diff --git a/src/dftracer/utils/core/runtime.cpp b/src/dftracer/utils/core/runtime.cpp
index 8199f820..b09c113e 100644
--- a/src/dftracer/utils/core/runtime.cpp
+++ b/src/dftracer/utils/core/runtime.cpp
@@ -64,8 +64,12 @@ TaskHandle Runtime::submit(coro::CoroTask<void> task, std::string name) {
            std::shared_ptr<std::atomic<TaskIndex>> task_id) -> coro::Coro {
         try {
             co_await std::move(t);
+            t = coro::CoroTask<void>{
+                std::coroutine_handle<coro::CoroTask<void>::promise_type>{}};
             exec->mark_coro_completed(task_id->load(std::memory_order_acquire));
         } catch (...) {
+            t = coro::CoroTask<void>{
+                std::coroutine_handle<coro::CoroTask<void>::promise_type>{}};
             exec->mark_coro_completed(task_id->load(std::memory_order_acquire));
             p->set_exception(std::current_exception());
             co_return;
diff --git a/src/dftracer/utils/core/sqlite/async.cpp b/src/dftracer/utils/core/sqlite/async.cpp
deleted file mode 100644
index 04ba7a77..00000000
--- a/src/dftracer/utils/core/sqlite/async.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <dftracer/utils/core/io/io_thread_pool.h>
-#include <dftracer/utils/core/pipeline/executor.h>
-#include <dftracer/utils/core/sqlite/async.h>
-
-namespace dftracer::utils::sqlite {
-
-io::IoThreadPool *get_sqlite_pool() {
-    auto *exec = Executor::current();
-    if (exec == nullptr) {
-        return nullptr;
-    }
-    return exec->sqlite_pool();
-}
-
-void sqlite_async_submit(io::IoThreadPool *pool, std::function<void()> fn) {
-    pool->submit(std::move(fn));
-}
-
-void sqlite_async_resume_on(void *executor, std::coroutine_handle<> h) {
-    auto *exec = static_cast<Executor *>(executor);
-    if (exec != nullptr) {
-        exec->enqueue(h);
-    } else {
-        h.resume();
-    }
-}
-
-void *get_current_executor_opaque() {
-    return static_cast<void *>(Executor::current());
-}
-
-}  // namespace dftracer::utils::sqlite
diff --git a/src/dftracer/utils/core/sqlite/database.cpp b/src/dftracer/utils/core/sqlite/database.cpp
deleted file mode 100644
index 0e8d24a7..00000000
--- a/src/dftracer/utils/core/sqlite/database.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#include <dftracer/utils/core/common/filesystem.h>
-#include <dftracer/utils/core/sqlite/database.h>
-#include <dftracer/utils/core/sqlite/error.h>
-
-#include <utility>
-
-namespace dftracer::utils::sqlite {
-
-SqliteDatabase::SqliteDatabase() : db_path_(""), db_(nullptr) {}
-
-SqliteDatabase::SqliteDatabase(const std::string &db_path)
-    : db_path_(db_path), db_(nullptr) {
-    open(db_path);
-}
-
-SqliteDatabase::~SqliteDatabase() { close(); }
-
-SqliteDatabase::SqliteDatabase(SqliteDatabase &&other) noexcept
-    : db_path_(std::move(other.db_path_)), db_(other.db_) {
-    other.db_ = nullptr;
-}
-
-SqliteDatabase &SqliteDatabase::operator=(SqliteDatabase &&other) noexcept {
-    if (this != &other) {
-        close();
-        db_path_ = std::move(other.db_path_);
-        db_ = other.db_;
-        other.db_ = nullptr;
-    }
-    return *this;
-}
-
-bool SqliteDatabase::open(const std::string &db_path) {
-    if (is_open()) {
-        close();
-    }
-
-    db_path_ = db_path;
-
-    // Ensure parent directory exists (SQLite cannot create it)
-    std::error_code ec;
-    fs::create_directories(fs::path(db_path_).parent_path(), ec);
-
-    if (sqlite3_open(db_path_.c_str(), &db_) != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::OPEN_ERROR,
-            "Failed to open database: " + std::string(sqlite3_errmsg(db_)));
-    }
-    return true;
-}
-
-void SqliteDatabase::close() {
-    if (db_) {
-        sqlite3_close(db_);
-        db_ = nullptr;
-    }
-}
-
-sqlite3 *SqliteDatabase::get() const { return db_; }
-
-bool SqliteDatabase::is_open() const { return db_ != nullptr; }
-
-bool SqliteDatabase::open_with_vfs(const std::string &db_path,
-                                   const char *vfs_name) {
-    if (is_open()) {
-        close();
-    }
-    db_path_ = db_path;
-
-    std::error_code ec;
-    fs::create_directories(fs::path(db_path_).parent_path(), ec);
-
-    int rc =
-        sqlite3_open_v2(db_path_.c_str(), &db_,
-                        SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, vfs_name);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(SqliteError::Type::OPEN_ERROR,
-                          "Failed to open database with VFS '" +
-                              std::string(vfs_name) +
-                              "': " + std::string(sqlite3_errmsg(db_)));
-    }
-    return true;
-}
-
-}  // namespace dftracer::utils::sqlite
diff --git a/src/dftracer/utils/core/sqlite/error.cpp b/src/dftracer/utils/core/sqlite/error.cpp
deleted file mode 100644
index 9945564e..00000000
--- a/src/dftracer/utils/core/sqlite/error.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <dftracer/utils/core/sqlite/error.h>
-
-namespace dftracer::utils::sqlite {
-std::string SqliteError::format_message(Type type, const std::string &message) {
-    const char *prefix = "";
-    switch (type) {
-        case DATABASE_ERROR:
-            prefix = "SQLite database error";
-            break;
-        case STATEMENT_ERROR:
-            prefix = "SQLite statement error";
-            break;
-        case OPEN_ERROR:
-            prefix = "SQLite open error";
-            break;
-        case VFS_ERROR:
-            prefix = "SQLite VFS error";
-            break;
-        case UNKNOWN_ERROR:
-            prefix = "SQLite unknown error";
-            break;
-    }
-    return std::string(prefix) + ": " + message;
-}
-}  // namespace dftracer::utils::sqlite
diff --git a/src/dftracer/utils/core/sqlite/statement.cpp b/src/dftracer/utils/core/sqlite/statement.cpp
deleted file mode 100644
index b01c9b92..00000000
--- a/src/dftracer/utils/core/sqlite/statement.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <dftracer/utils/core/sqlite/database.h>
-#include <dftracer/utils/core/sqlite/error.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-
-#include <cstddef>
-#include <span>
-
-namespace dftracer::utils::sqlite {
-
-SqliteStmt::SqliteStmt(const SqliteDatabase &db, const char *sql) {
-    sqlite3 *raw_db = db.get();
-    if (sqlite3_prepare_v2(raw_db, sql, -1, &stmt_, nullptr) != SQLITE_OK) {
-        stmt_ = nullptr;
-        throw SqliteError(SqliteError::Type::STATEMENT_ERROR,
-                          "Failed to prepare SQL statement: " +
-                              std::string(sqlite3_errmsg(raw_db)));
-    }
-}
-
-SqliteStmt::SqliteStmt(sqlite3 *db, const char *sql) {
-    if (sqlite3_prepare_v2(db, sql, -1, &stmt_, nullptr) != SQLITE_OK) {
-        stmt_ = nullptr;
-        throw SqliteError(SqliteError::Type::STATEMENT_ERROR,
-                          "Failed to prepare SQL statement: " +
-                              std::string(sqlite3_errmsg(db)));
-    }
-}
-
-SqliteStmt::~SqliteStmt() {
-    if (stmt_) {
-        sqlite3_finalize(stmt_);
-    }
-}
-
-SqliteStmt::operator sqlite3_stmt *() { return stmt_; }
-
-sqlite3_stmt *SqliteStmt::get() { return stmt_; }
-
-void SqliteStmt::reset() { sqlite3_reset(stmt_); }
-
-void SqliteStmt::bind_int(int index, int value) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_int(stmt_, index, value);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind int parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_int64(int index, int64_t value) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_int64(stmt_, index, value);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind int64 parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_double(int index, double value) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_double(stmt_, index, value);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(SqliteError::Type::STATEMENT_ERROR,
-                          "Failed to bind double parameter at index " +
-                              std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_text(int index, const std::string &text) {
-    validate_parameter_index(index);
-    int rc =
-        sqlite3_bind_text(stmt_, index, text.c_str(),
-                          static_cast<int>(text.length()), SQLITE_TRANSIENT);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind text parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_text(int index, std::string_view text) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_text(stmt_, index, text.data(),
-                               static_cast<int>(text.size()), SQLITE_TRANSIENT);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind text parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_text(int index, const char *text, int length,
-                           void (*destructor)(void *)) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_text(stmt_, index, text, length, destructor);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind text parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_blob(int index, const void *blob, int length) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_blob(stmt_, index, blob, length, SQLITE_TRANSIENT);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind blob parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_blob(int index, std::span<const std::byte> data) {
-    bind_blob(index, data.data(), static_cast<int>(data.size()));
-}
-
-void SqliteStmt::bind_blob(int index, std::span<const unsigned char> data) {
-    bind_blob(index, data.data(), static_cast<int>(data.size()));
-}
-
-void SqliteStmt::bind_blob_static(int index, const void *blob, int length) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_blob(stmt_, index, blob, length, SQLITE_STATIC);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind blob parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_text_static(int index, std::string_view text) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_text(stmt_, index, text.data(),
-                               static_cast<int>(text.size()), SQLITE_STATIC);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind text parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::bind_null(int index) {
-    validate_parameter_index(index);
-    int rc = sqlite3_bind_null(stmt_, index);
-    if (rc != SQLITE_OK) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Failed to bind null parameter at index " + std::to_string(index));
-    }
-}
-
-void SqliteStmt::clear_bindings() { sqlite3_clear_bindings(stmt_); }
-
-int SqliteStmt::bind_parameter_count() {
-    return sqlite3_bind_parameter_count(stmt_);
-}
-
-void SqliteStmt::validate_parameter_index(int index) {
-    if (index < 1) {
-        throw SqliteError(
-            SqliteError::Type::STATEMENT_ERROR,
-            "Parameter index must be >= 1 (got " + std::to_string(index) + ")");
-    }
-    int param_count = sqlite3_bind_parameter_count(stmt_);
-    if (index > param_count) {
-        throw SqliteError(SqliteError::Type::STATEMENT_ERROR,
-                          "Parameter index " + std::to_string(index) +
-                              " exceeds parameter count " +
-                              std::to_string(param_count));
-    }
-}
-
-}  // namespace dftracer::utils::sqlite
diff --git a/src/dftracer/utils/core/sqlite/vfs.cpp b/src/dftracer/utils/core/sqlite/vfs.cpp
deleted file mode 100644
index 40fc122d..00000000
--- a/src/dftracer/utils/core/sqlite/vfs.cpp
+++ /dev/null
@@ -1,620 +0,0 @@
-#include <dftracer/utils/core/io/io_backend.h>
-#include <dftracer/utils/core/pipeline/executor.h>
-#include <dftracer/utils/core/sqlite/vfs.h>
-#include <fcntl.h>
-#include <sqlite3.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <cerrno>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-namespace dftracer::utils::sqlite {
-
-// Forward declarations of all VFS methods
-static int dftracer_sqlite_vfs_open(sqlite3_vfs *pVfs, const char *zName,
-                                    sqlite3_file *pFile, int flags,
-                                    int *pOutFlags);
-static int dftracer_sqlite_vfs_delete(sqlite3_vfs *pVfs, const char *zPath,
-                                      int dirSync);
-static int dftracer_sqlite_vfs_access(sqlite3_vfs *pVfs, const char *zPath,
-                                      int flags, int *pResOut);
-static int dftracer_sqlite_vfs_fullpathname(sqlite3_vfs *pVfs,
-                                            const char *zName, int nOut,
-                                            char *zOut);
-static int dftracer_sqlite_vfs_get_last_error(sqlite3_vfs *pVfs, int nBuf,
-                                              char *zBuf);
-
-// Forward declarations of all io_methods
-static int dftracer_sqlite_vfs_close(sqlite3_file *pFile);
-static int dftracer_sqlite_vfs_read(sqlite3_file *pFile, void *buf, int amt,
-                                    sqlite3_int64 offset);
-static int dftracer_sqlite_vfs_write(sqlite3_file *pFile, const void *buf,
-                                     int amt, sqlite3_int64 offset);
-static int dftracer_sqlite_vfs_truncate(sqlite3_file *pFile,
-                                        sqlite3_int64 size);
-static int dftracer_sqlite_vfs_sync(sqlite3_file *pFile, int flags);
-static int dftracer_sqlite_vfs_file_size(sqlite3_file *pFile,
-                                         sqlite3_int64 *pSize);
-static int dftracer_sqlite_vfs_lock(sqlite3_file *pFile, int eLock);
-static int dftracer_sqlite_vfs_unlock(sqlite3_file *pFile, int eLock);
-static int dftracer_sqlite_vfs_check_reserved_lock(sqlite3_file *pFile,
-                                                   int *pResOut);
-static int dftracer_sqlite_vfs_file_control(sqlite3_file *pFile, int op,
-                                            void *pArg);
-static int dftracer_sqlite_vfs_sector_size(sqlite3_file *pFile);
-static int dftracer_sqlite_vfs_device_characteristics(sqlite3_file *pFile);
-static int dftracer_sqlite_vfs_shm_map(sqlite3_file *pFile, int iRegion,
-                                       int szRegion, int bExtend,
-                                       void volatile **pp);
-static int dftracer_sqlite_vfs_shm_lock(sqlite3_file *pFile, int offset, int n,
-                                        int flags);
-static void dftracer_sqlite_vfs_shm_barrier(sqlite3_file *pFile);
-static int dftracer_sqlite_vfs_shm_unmap(sqlite3_file *pFile, int deleteFlag);
-static int dftracer_sqlite_vfs_fetch(sqlite3_file *pFile, sqlite3_int64 offset,
-                                     int amt, void **pp);
-static int dftracer_sqlite_vfs_unfetch(sqlite3_file *pFile,
-                                       sqlite3_int64 offset, void *p);
-
-// Static io_methods struct (iVersion=3 for WAL + mmap)
-static sqlite3_io_methods dftracer_sqlite_vfs_io_methods = {
-    3,                                           // iVersion
-    dftracer_sqlite_vfs_close,                   // xClose
-    dftracer_sqlite_vfs_read,                    // xRead
-    dftracer_sqlite_vfs_write,                   // xWrite
-    dftracer_sqlite_vfs_truncate,                // xTruncate
-    dftracer_sqlite_vfs_sync,                    // xSync
-    dftracer_sqlite_vfs_file_size,               // xFileSize
-    dftracer_sqlite_vfs_lock,                    // xLock
-    dftracer_sqlite_vfs_unlock,                  // xUnlock
-    dftracer_sqlite_vfs_check_reserved_lock,     // xCheckReservedLock
-    dftracer_sqlite_vfs_file_control,            // xFileControl
-    dftracer_sqlite_vfs_sector_size,             // xSectorSize
-    dftracer_sqlite_vfs_device_characteristics,  // xDeviceCharacteristics
-    dftracer_sqlite_vfs_shm_map,                 // xShmMap
-    dftracer_sqlite_vfs_shm_lock,                // xShmLock
-    dftracer_sqlite_vfs_shm_barrier,             // xShmBarrier
-    dftracer_sqlite_vfs_shm_unmap,               // xShmUnmap
-    dftracer_sqlite_vfs_fetch,                   // xFetch
-    dftracer_sqlite_vfs_unfetch,                 // xUnfetch
-};
-
-// Static VFS instance and app data
-static sqlite3_vfs dftracer_vfs_instance;
-static DfTracerSqliteVfsAppData *dftracer_vfs_app_data = nullptr;
-static bool dftracer_vfs_registered = false;
-
-// ============================================================================
-// sqlite3_io_methods implementations
-// ============================================================================
-
-static int dftracer_sqlite_vfs_close(sqlite3_file *pFile) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    // Clean up SHM resources
-    for (int i = 0; i < vf->n_shm_region; ++i) {
-        if (vf->shm_regions[i] != nullptr) {
-            ::munmap(vf->shm_regions[i], 32768);
-            vf->shm_regions[i] = nullptr;
-        }
-    }
-    if (vf->shm_fd >= 0) {
-        ::close(vf->shm_fd);
-        vf->shm_fd = -1;
-    }
-
-    if (vf->fd >= 0) {
-        ::close(vf->fd);
-        vf->fd = -1;
-    }
-
-    vf->path[0] = '\0';
-
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_read(sqlite3_file *pFile, void *buf, int amt,
-                                    sqlite3_int64 offset) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (vf->backend == nullptr) {
-        ssize_t n = ::pread(vf->fd, buf, static_cast<std::size_t>(amt),
-                            static_cast<off_t>(offset));
-        if (n == amt) return SQLITE_OK;
-        if (n >= 0) {
-            std::memset(static_cast<char *>(buf) + n, 0, amt - n);
-            return SQLITE_IOERR_SHORT_READ;
-        }
-        return SQLITE_IOERR_READ;
-    }
-
-    ssize_t result = vf->backend->submit_read_sync(
-        vf->fd, buf, static_cast<std::size_t>(amt), static_cast<off_t>(offset));
-
-    if (result == amt) return SQLITE_OK;
-    if (result >= 0) {
-        std::memset(static_cast<char *>(buf) + result, 0, amt - result);
-        return SQLITE_IOERR_SHORT_READ;
-    }
-    return SQLITE_IOERR_READ;
-}
-
-static int dftracer_sqlite_vfs_write(sqlite3_file *pFile, const void *buf,
-                                     int amt, sqlite3_int64 offset) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (vf->backend == nullptr) {
-        ssize_t n = ::pwrite(vf->fd, buf, static_cast<std::size_t>(amt),
-                             static_cast<off_t>(offset));
-        if (n == amt) return SQLITE_OK;
-        return SQLITE_IOERR_WRITE;
-    }
-
-    ssize_t result = vf->backend->submit_write_sync(
-        vf->fd, buf, static_cast<std::size_t>(amt), static_cast<off_t>(offset));
-
-    if (result == amt) return SQLITE_OK;
-    return SQLITE_IOERR_WRITE;
-}
-
-static int dftracer_sqlite_vfs_truncate(sqlite3_file *pFile,
-                                        sqlite3_int64 size) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (vf->backend == nullptr) {
-        if (::ftruncate(vf->fd, static_cast<off_t>(size)) != 0) {
-            return SQLITE_IOERR_TRUNCATE;
-        }
-        return SQLITE_OK;
-    }
-
-    int rc =
-        vf->backend->submit_ftruncate_sync(vf->fd, static_cast<off_t>(size));
-    if (rc != 0) return SQLITE_IOERR_TRUNCATE;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_sync(sqlite3_file *pFile, int /*flags*/) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (vf->backend == nullptr) {
-        if (::fsync(vf->fd) != 0) return SQLITE_IOERR_FSYNC;
-        return SQLITE_OK;
-    }
-
-    int rc = vf->backend->submit_fsync_sync(vf->fd);
-    if (rc != 0) return SQLITE_IOERR_FSYNC;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_file_size(sqlite3_file *pFile,
-                                         sqlite3_int64 *pSize) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-    struct stat st;
-
-    if (vf->backend == nullptr) {
-        if (::fstat(vf->fd, &st) != 0) return SQLITE_IOERR_FSTAT;
-        *pSize = st.st_size;
-        return SQLITE_OK;
-    }
-
-    int rc = vf->backend->submit_fstat_sync(vf->fd, &st);
-    if (rc != 0) return SQLITE_IOERR_FSTAT;
-    *pSize = st.st_size;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_lock(sqlite3_file *pFile, int eLock) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    struct flock fl;
-    std::memset(&fl, 0, sizeof(fl));
-
-    if (eLock == SQLITE_LOCK_NONE) {
-        return SQLITE_OK;
-    }
-
-    if (eLock == SQLITE_LOCK_SHARED) {
-        fl.l_type = F_RDLCK;
-    } else {
-        fl.l_type = F_WRLCK;
-    }
-    fl.l_whence = SEEK_SET;
-    fl.l_start = 0;
-    fl.l_len = 0;
-
-    if (::fcntl(vf->fd, F_SETLK, &fl) == -1) {
-        if (errno == EACCES || errno == EAGAIN) {
-            return SQLITE_BUSY;
-        }
-        return SQLITE_IOERR_LOCK;
-    }
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_unlock(sqlite3_file *pFile, int /*eLock*/) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    struct flock fl;
-    std::memset(&fl, 0, sizeof(fl));
-    fl.l_type = F_UNLCK;
-    fl.l_whence = SEEK_SET;
-    fl.l_start = 0;
-    fl.l_len = 0;
-
-    if (::fcntl(vf->fd, F_SETLK, &fl) == -1) {
-        return SQLITE_IOERR_UNLOCK;
-    }
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_check_reserved_lock(sqlite3_file *pFile,
-                                                   int *pResOut) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    struct flock fl;
-    std::memset(&fl, 0, sizeof(fl));
-    fl.l_type = F_WRLCK;
-    fl.l_whence = SEEK_SET;
-    fl.l_start = 0;
-    fl.l_len = 1;
-
-    if (::fcntl(vf->fd, F_GETLK, &fl) == -1) {
-        *pResOut = 0;
-        return SQLITE_IOERR_CHECKRESERVEDLOCK;
-    }
-
-    *pResOut = (fl.l_type != F_UNLCK) ? 1 : 0;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_file_control(sqlite3_file * /*pFile*/, int op,
-                                            void * /*pArg*/) {
-    if (op == SQLITE_FCNTL_LOCKSTATE) {
-        return SQLITE_OK;
-    }
-    return SQLITE_NOTFOUND;
-}
-
-static int dftracer_sqlite_vfs_sector_size(sqlite3_file * /*pFile*/) {
-    return 4096;
-}
-
-static int dftracer_sqlite_vfs_device_characteristics(
-    sqlite3_file * /*pFile*/) {
-    return SQLITE_IOCAP_ATOMIC512 | SQLITE_IOCAP_SAFE_APPEND;
-}
-
-// ============================================================================
-// SHM methods (WAL shared memory)
-// ============================================================================
-
-static int dftracer_sqlite_vfs_shm_map(sqlite3_file *pFile, int iRegion,
-                                       int szRegion, int bExtend,
-                                       void volatile **pp) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (iRegion >= 32) {
-        *pp = nullptr;
-        return SQLITE_IOERR;
-    }
-
-    // Open SHM file if not yet opened
-    if (vf->shm_fd < 0) {
-        char shm_path[1024];
-        std::snprintf(shm_path, sizeof(shm_path), "%s-shm", vf->path);
-        int oflags = O_RDWR | O_CREAT;
-        vf->shm_fd = ::open(shm_path, oflags, 0644);
-        if (vf->shm_fd < 0) {
-            *pp = nullptr;
-            return SQLITE_IOERR;
-        }
-    }
-
-    // Extend file if needed
-    off_t required_size =
-        static_cast<off_t>(iRegion + 1) * static_cast<off_t>(szRegion);
-    struct stat st;
-    if (::fstat(vf->shm_fd, &st) != 0) {
-        *pp = nullptr;
-        return SQLITE_IOERR;
-    }
-    if (st.st_size < required_size) {
-        if (!bExtend) {
-            *pp = nullptr;
-            return SQLITE_OK;
-        }
-        if (::ftruncate(vf->shm_fd, required_size) != 0) {
-            *pp = nullptr;
-            return SQLITE_IOERR;
-        }
-    }
-
-    // Map the region if not already mapped
-    if (iRegion >= vf->n_shm_region || vf->shm_regions[iRegion] == nullptr) {
-        off_t map_offset =
-            static_cast<off_t>(iRegion) * static_cast<off_t>(szRegion);
-        void *mapped =
-            ::mmap(nullptr, static_cast<std::size_t>(szRegion),
-                   PROT_READ | PROT_WRITE, MAP_SHARED, vf->shm_fd, map_offset);
-        if (mapped == MAP_FAILED) {
-            *pp = nullptr;
-            return SQLITE_IOERR;
-        }
-        vf->shm_regions[iRegion] = mapped;
-        if (iRegion >= vf->n_shm_region) {
-            vf->n_shm_region = iRegion + 1;
-        }
-    }
-
-    *pp = vf->shm_regions[iRegion];
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_shm_lock(sqlite3_file *pFile, int offset, int n,
-                                        int flags) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    if (vf->shm_fd < 0) return SQLITE_IOERR;
-
-    struct flock fl;
-    std::memset(&fl, 0, sizeof(fl));
-
-    if (flags & SQLITE_SHM_UNLOCK) {
-        fl.l_type = F_UNLCK;
-    } else if (flags & SQLITE_SHM_EXCLUSIVE) {
-        fl.l_type = F_WRLCK;
-    } else {
-        fl.l_type = F_RDLCK;
-    }
-    fl.l_whence = SEEK_SET;
-    fl.l_start = offset;
-    fl.l_len = n;
-
-    if (::fcntl(vf->shm_fd, F_SETLK, &fl) == -1) {
-        if (errno == EACCES || errno == EAGAIN) {
-            return SQLITE_BUSY;
-        }
-        return SQLITE_IOERR;
-    }
-    return SQLITE_OK;
-}
-
-static void dftracer_sqlite_vfs_shm_barrier(sqlite3_file * /*pFile*/) {
-    __atomic_thread_fence(__ATOMIC_SEQ_CST);
-}
-
-static int dftracer_sqlite_vfs_shm_unmap(sqlite3_file *pFile, int deleteFlag) {
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    for (int i = 0; i < vf->n_shm_region; ++i) {
-        if (vf->shm_regions[i] != nullptr) {
-            ::munmap(vf->shm_regions[i], 32768);
-            vf->shm_regions[i] = nullptr;
-        }
-    }
-    vf->n_shm_region = 0;
-
-    if (vf->shm_fd >= 0) {
-        ::close(vf->shm_fd);
-        if (deleteFlag) {
-            char shm_path[1024];
-            std::snprintf(shm_path, sizeof(shm_path), "%s-shm", vf->path);
-            ::unlink(shm_path);
-        }
-        vf->shm_fd = -1;
-    }
-
-    return SQLITE_OK;
-}
-
-// ============================================================================
-// mmap methods (version 3)
-// ============================================================================
-
-static int dftracer_sqlite_vfs_fetch(sqlite3_file * /*pFile*/,
-                                     sqlite3_int64 /*offset*/, int /*amt*/,
-                                     void **pp) {
-    // Disable mmap — returning nullptr tells SQLite to use
-    // xRead instead. This avoids tracking mmap sizes for munmap.
-    *pp = nullptr;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_unfetch(sqlite3_file * /*pFile*/,
-                                       sqlite3_int64 /*offset*/, void *p) {
-    (void)p;
-    return SQLITE_OK;
-}
-
-// ============================================================================
-// sqlite3_vfs implementations
-// ============================================================================
-
-static int dftracer_sqlite_vfs_open(sqlite3_vfs *pVfs, const char *zName,
-                                    sqlite3_file *pFile, int flags,
-                                    int *pOutFlags) {
-    auto *app = static_cast<DfTracerSqliteVfsAppData *>(pVfs->pAppData);
-    auto *vf = reinterpret_cast<DfTracerSqliteVfsFile *>(pFile);
-
-    // Zero the sqlite3_file base (C struct, safe to memset)
-    std::memset(&vf->base, 0, sizeof(vf->base));
-    vf->backend = nullptr;
-    vf->executor = nullptr;
-    vf->fd = -1;
-    vf->read_only = false;
-    vf->shm_fd = -1;
-    vf->n_shm_region = 0;
-    for (int i = 0; i < 32; ++i) {
-        vf->shm_regions[i] = nullptr;
-    }
-
-    std::snprintf(vf->path, sizeof(vf->path), "%s", zName ? zName : "");
-
-    vf->backend = app ? app->backend : nullptr;
-    vf->executor = app ? app->executor : nullptr;
-    vf->read_only = (flags & SQLITE_OPEN_READONLY) != 0;
-
-    // Build open flags
-    int oflags = 0;
-    if (flags & SQLITE_OPEN_EXCLUSIVE) {
-        oflags |= O_EXCL;
-    }
-    if (flags & SQLITE_OPEN_CREATE) {
-        oflags |= O_CREAT;
-    }
-    if (flags & SQLITE_OPEN_READONLY) {
-        oflags = O_RDONLY;
-    } else if (flags & SQLITE_OPEN_READWRITE) {
-        oflags |= O_RDWR;
-    }
-
-    // Handle temp/journal files without a name
-    if (zName == nullptr) {
-        const char *tmpdir = std::getenv("TMPDIR");
-        if (!tmpdir) tmpdir = "/tmp";
-        std::snprintf(vf->path, sizeof(vf->path), "%s/dftracer_sqlite_XXXXXX",
-                      tmpdir);
-        vf->fd = ::mkstemp(vf->path);
-        if (vf->fd < 0) {
-            return SQLITE_CANTOPEN;
-        }
-        ::unlink(vf->path);
-    } else {
-        vf->fd = ::open(zName, oflags, 0644);
-        if (vf->fd < 0) {
-            return SQLITE_CANTOPEN;
-        }
-    }
-
-    if (pOutFlags != nullptr) {
-        *pOutFlags = flags;
-    }
-
-    pFile->pMethods = &dftracer_sqlite_vfs_io_methods;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_delete(sqlite3_vfs * /*pVfs*/, const char *zPath,
-                                      int dirSync) {
-    if (::unlink(zPath) != 0) {
-        if (errno == ENOENT) return SQLITE_OK;
-        return SQLITE_IOERR_DELETE;
-    }
-
-    if (dirSync) {
-        // Sync the parent directory
-        char dir[1024];
-        std::snprintf(dir, sizeof(dir), "%s", zPath);
-        char *slash = std::strrchr(dir, '/');
-        if (slash) {
-            if (slash == dir) {
-                dir[1] = '\0';  // root "/"
-            } else {
-                *slash = '\0';
-            }
-        } else {
-            dir[0] = '.';
-            dir[1] = '\0';
-        }
-        int dfd = ::open(dir, O_RDONLY);
-        if (dfd >= 0) {
-            ::fsync(dfd);
-            ::close(dfd);
-        }
-    }
-
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_access(sqlite3_vfs * /*pVfs*/, const char *zPath,
-                                      int flags, int *pResOut) {
-    int mode = F_OK;
-    if (flags == SQLITE_ACCESS_READWRITE) {
-        mode = R_OK | W_OK;
-    } else if (flags == SQLITE_ACCESS_READ) {
-        mode = R_OK;
-    }
-
-    *pResOut = (::access(zPath, mode) == 0) ? 1 : 0;
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_fullpathname(sqlite3_vfs * /*pVfs*/,
-                                            const char *zName, int nOut,
-                                            char *zOut) {
-    char *resolved = ::realpath(zName, nullptr);
-    if (resolved != nullptr) {
-        std::strncpy(zOut, resolved, static_cast<std::size_t>(nOut));
-        zOut[nOut - 1] = '\0';
-        ::free(resolved);
-    } else {
-        // If realpath fails (file doesn't exist yet), copy as-is
-        std::strncpy(zOut, zName, static_cast<std::size_t>(nOut));
-        zOut[nOut - 1] = '\0';
-    }
-    return SQLITE_OK;
-}
-
-static int dftracer_sqlite_vfs_get_last_error(sqlite3_vfs * /*pVfs*/, int nBuf,
-                                              char *zBuf) {
-    if (nBuf > 0 && zBuf != nullptr) {
-        std::strncpy(zBuf, std::strerror(errno),
-                     static_cast<std::size_t>(nBuf));
-        zBuf[nBuf - 1] = '\0';
-    }
-    return errno;
-}
-
-// ============================================================================
-// VFS Registration
-// ============================================================================
-
-void register_dftracer_sqlite_vfs(io::IoBackend *backend, Executor *executor) {
-    if (dftracer_vfs_registered) return;
-
-    sqlite3_vfs *default_vfs = sqlite3_vfs_find(nullptr);
-
-    dftracer_vfs_app_data = new DfTracerSqliteVfsAppData{backend, executor};
-
-    std::memset(&dftracer_vfs_instance, 0, sizeof(dftracer_vfs_instance));
-    dftracer_vfs_instance.iVersion = 3;
-    dftracer_vfs_instance.szOsFile =
-        static_cast<int>(sizeof(DfTracerSqliteVfsFile));
-    dftracer_vfs_instance.mxPathname = VFS_MAX_PATHNAME;
-    dftracer_vfs_instance.pNext = nullptr;
-    dftracer_vfs_instance.zName = "dftracer_sqlite";
-    dftracer_vfs_instance.pAppData = dftracer_vfs_app_data;
-    dftracer_vfs_instance.xOpen = dftracer_sqlite_vfs_open;
-    dftracer_vfs_instance.xDelete = dftracer_sqlite_vfs_delete;
-    dftracer_vfs_instance.xAccess = dftracer_sqlite_vfs_access;
-    dftracer_vfs_instance.xFullPathname = dftracer_sqlite_vfs_fullpathname;
-    dftracer_vfs_instance.xGetLastError = dftracer_sqlite_vfs_get_last_error;
-
-    // Delegate time/random/sleep to default VFS
-    if (default_vfs != nullptr) {
-        dftracer_vfs_instance.xRandomness = default_vfs->xRandomness;
-        dftracer_vfs_instance.xSleep = default_vfs->xSleep;
-        dftracer_vfs_instance.xCurrentTime = default_vfs->xCurrentTime;
-        dftracer_vfs_instance.xCurrentTimeInt64 =
-            default_vfs->xCurrentTimeInt64;
-    }
-
-    sqlite3_vfs_register(&dftracer_vfs_instance, 0);
-    dftracer_vfs_registered = true;
-}
-
-void unregister_dftracer_sqlite_vfs() {
-    if (!dftracer_vfs_registered) return;
-
-    sqlite3_vfs_unregister(&dftracer_vfs_instance);
-
-    delete dftracer_vfs_app_data;
-    dftracer_vfs_app_data = nullptr;
-    dftracer_vfs_registered = false;
-}
-
-}  // namespace dftracer::utils::sqlite
diff --git a/src/dftracer/utils/python/indexer.cpp b/src/dftracer/utils/python/indexer.cpp
index c82a3525..071a6986 100644
--- a/src/dftracer/utils/python/indexer.cpp
+++ b/src/dftracer/utils/python/indexer.cpp
@@ -2,6 +2,7 @@
 #include <dftracer/utils/python/indexer.h>
 #include <dftracer/utils/python/indexer_checkpoint.h>
 #include <dftracer/utils/python/runtime.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/index_builder_utility.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
@@ -11,14 +12,27 @@
 
 static void Indexer_dealloc(IndexerObject *self) {
     if (self->handle) {
+        // The Python wrapper owns only the native indexer handle. The
+        // underlying RocksDB instance remains manager-owned and may continue to
+        // live process-wide for the same .dftindex path.
         dft_indexer_destroy(self->handle);
+        self->handle = NULL;
     }
     Py_XDECREF(self->gz_path);
-    Py_XDECREF(self->idx_path);
+    Py_XDECREF(self->index_path);
     Py_XDECREF(self->runtime_obj);
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
 
+static void Indexer_release_handle(IndexerObject *self) {
+    if (self->handle) {
+        // Releasing the handle drops this wrapper's native indexer state only.
+        // Shared RocksDB lifetime is managed separately by RocksDBManager.
+        dft_indexer_destroy(self->handle);
+        self->handle = NULL;
+    }
+}
+
 static PyObject *Indexer_new(PyTypeObject *type, PyObject *args,
                              PyObject *kwds) {
     IndexerObject *self;
@@ -26,7 +40,7 @@ static PyObject *Indexer_new(PyTypeObject *type, PyObject *args,
     if (self != NULL) {
         self->handle = NULL;
         self->gz_path = NULL;
-        self->idx_path = NULL;
+        self->index_path = NULL;
         self->checkpoint_size = 0;
         self->build_bloom = 0;
         self->build_manifest = 0;
@@ -39,11 +53,11 @@ static PyObject *Indexer_new(PyTypeObject *type, PyObject *args,
 
 static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
     static const char *kwlist[] = {
-        "gz_path",         "idx_path",    "checkpoint_size",
+        "gz_path",         "index_path",  "checkpoint_size",
         "force_rebuild",   "build_bloom", "build_manifest",
         "index_threshold", "runtime",     NULL};
     const char *gz_path;
-    const char *idx_path = NULL;
+    const char *index_path = NULL;
     std::uint64_t checkpoint_size =
         dftracer::utils::constants::indexer::DEFAULT_CHECKPOINT_SIZE;
     int force_rebuild = 0;
@@ -54,7 +68,7 @@ static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
     PyObject *runtime_arg = NULL;
 
     if (!PyArg_ParseTupleAndKeywords(
-            args, kwds, "s|snpppnO", (char **)kwlist, &gz_path, &idx_path,
+            args, kwds, "s|snpppnO", (char **)kwlist, &gz_path, &index_path,
             &checkpoint_size, &force_rebuild, &build_bloom, &build_manifest,
             &index_threshold, &runtime_arg)) {
         return -1;
@@ -82,15 +96,15 @@ static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
         return -1;
     }
 
-    if (idx_path) {
-        self->idx_path = PyUnicode_FromString(idx_path);
+    if (index_path) {
+        self->index_path = PyUnicode_FromString(index_path);
     } else {
-        PyObject *gz_path_obj = PyUnicode_FromString(gz_path);
-        self->idx_path = PyUnicode_FromFormat("%U.idx", gz_path_obj);
-        Py_DECREF(gz_path_obj);
+        const std::string index_path = dftracer::utils::utilities::composites::
+            dft::internal::determine_index_path(gz_path, "");
+        self->index_path = PyUnicode_FromString(index_path.c_str());
     }
 
-    if (!self->idx_path) {
+    if (!self->index_path) {
         Py_DECREF(self->gz_path);
         return -1;
     }
@@ -100,12 +114,12 @@ static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
     self->build_manifest = build_manifest;
     self->index_threshold = index_threshold;
 
-    const char *idx_path_str = PyUnicode_AsUTF8(self->idx_path);
-    if (!idx_path_str) {
+    const char *index_path_str = PyUnicode_AsUTF8(self->index_path);
+    if (!index_path_str) {
         return -1;
     }
 
-    self->handle = dft_indexer_create(gz_path, idx_path_str, checkpoint_size,
+    self->handle = dft_indexer_create(gz_path, index_path_str, checkpoint_size,
                                       force_rebuild);
     if (!self->handle) {
         PyErr_SetString(PyExc_RuntimeError, "Failed to create indexer");
@@ -133,7 +147,7 @@ static PyObject *Indexer_build(IndexerObject *self,
     using namespace dftracer::utils::utilities::indexer;
 
     const char *gz = PyUnicode_AsUTF8(self->gz_path);
-    const char *idx = PyUnicode_AsUTF8(self->idx_path);
+    const char *idx = PyUnicode_AsUTF8(self->index_path);
     if (!gz || !idx) {
         return NULL;
     }
@@ -296,7 +310,7 @@ static PyObject *Indexer_get_checkpoints(IndexerObject *self,
 }
 
 static PyObject *Indexer_has_bloom(IndexerObject *self, void *closure) {
-    const char *idx = PyUnicode_AsUTF8(self->idx_path);
+    const char *idx = PyUnicode_AsUTF8(self->index_path);
     const char *gz = PyUnicode_AsUTF8(self->gz_path);
     if (!idx || !gz) {
         Py_RETURN_FALSE;
@@ -304,7 +318,8 @@ static PyObject *Indexer_has_bloom(IndexerObject *self, void *closure) {
     try {
         using namespace dftracer::utils::utilities::indexer;
         using namespace dftracer::utils::utilities::indexer::internal;
-        IndexDatabase db(idx);
+        IndexDatabase db(
+            idx, dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
         std::string logical = get_logical_path(gz);
         int fid = db.get_file_info_id(logical);
         if (fid >= 0 && db.has_bloom_data(fid)) {
@@ -316,7 +331,7 @@ static PyObject *Indexer_has_bloom(IndexerObject *self, void *closure) {
 }
 
 static PyObject *Indexer_has_manifest(IndexerObject *self, void *closure) {
-    const char *idx = PyUnicode_AsUTF8(self->idx_path);
+    const char *idx = PyUnicode_AsUTF8(self->index_path);
     const char *gz = PyUnicode_AsUTF8(self->gz_path);
     if (!idx || !gz) {
         Py_RETURN_FALSE;
@@ -324,7 +339,8 @@ static PyObject *Indexer_has_manifest(IndexerObject *self, void *closure) {
     try {
         using namespace dftracer::utils::utilities::indexer;
         using namespace dftracer::utils::utilities::indexer::internal;
-        IndexDatabase db(idx);
+        IndexDatabase db(
+            idx, dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
         std::string logical = get_logical_path(gz);
         int fid = db.get_file_info_id(logical);
         if (fid >= 0 && db.has_manifest_data(fid)) {
@@ -340,9 +356,9 @@ static PyObject *Indexer_gz_path(IndexerObject *self, void *closure) {
     return self->gz_path;
 }
 
-static PyObject *Indexer_idx_path(IndexerObject *self, void *closure) {
-    Py_INCREF(self->idx_path);
-    return self->idx_path;
+static PyObject *Indexer_index_path(IndexerObject *self, void *closure) {
+    Py_INCREF(self->index_path);
+    return self->index_path;
 }
 
 static PyObject *Indexer_checkpoint_size(IndexerObject *self, void *closure) {
@@ -355,7 +371,14 @@ static PyObject *Indexer_enter(IndexerObject *self,
     return (PyObject *)self;
 }
 
+static PyObject *Indexer_close(IndexerObject *self,
+                               PyObject *Py_UNUSED(ignored)) {
+    Indexer_release_handle(self);
+    Py_RETURN_NONE;
+}
+
 static PyObject *Indexer_exit(IndexerObject *self, PyObject *args) {
+    Indexer_release_handle(self);
     Py_RETURN_NONE;
 }
 
@@ -368,7 +391,7 @@ static PyMethodDef Indexer_methods[] = {
     {"need_rebuild", (PyCFunction)Indexer_need_rebuild, METH_NOARGS,
      "Check if a rebuild is needed."},
     {"exists", (PyCFunction)Indexer_exists, METH_NOARGS,
-     "Check if the index file exists."},
+     "Check if the .dftindex store exists."},
     {"get_max_bytes", (PyCFunction)Indexer_get_max_bytes, METH_NOARGS,
      "Get the maximum uncompressed bytes in the indexed file."},
     {"get_num_lines", (PyCFunction)Indexer_get_num_lines, METH_NOARGS,
@@ -380,17 +403,25 @@ static PyMethodDef Indexer_methods[] = {
      "    offset (int): Uncompressed byte offset.\n"},
     {"get_checkpoints", (PyCFunction)Indexer_get_checkpoints, METH_NOARGS,
      "Get all checkpoints for this file as a list."},
+    {"close", (PyCFunction)Indexer_close, METH_NOARGS,
+     "Release this Python wrapper's native indexer handle.\n"
+     "\n"
+     "The shared RocksDB instance for the same .dftindex path remains managed\n"
+     "by the native RocksDBManager cache."},
     {"__enter__", (PyCFunction)Indexer_enter, METH_NOARGS,
      "Enter the runtime context for the with statement."},
     {"__exit__", (PyCFunction)Indexer_exit, METH_VARARGS,
-     "Exit the runtime context for the with statement."},
+     "Release this Python wrapper on context exit.\n"
+     "\n"
+     "This does not force-close the shared RocksDB instance for the same\n"
+     ".dftindex path."},
     {NULL} /* Sentinel */
 };
 
 static PyGetSetDef Indexer_getsetters[] = {
     {"gz_path", (getter)Indexer_gz_path, NULL, "Path to the gzip file", NULL},
-    {"idx_path", (getter)Indexer_idx_path, NULL, "Path to the index file",
-     NULL},
+    {"index_path", (getter)Indexer_index_path, NULL,
+     "Path to the .dftindex store", NULL},
     {"checkpoint_size", (getter)Indexer_checkpoint_size, NULL,
      "Checkpoint size in bytes", NULL},
     {"has_bloom", (getter)Indexer_has_bloom, NULL,
@@ -420,7 +451,7 @@ PyTypeObject IndexerType = {
     0,                                                /* tp_setattro */
     0,                                                /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /* tp_flags */
-    "Indexer(gz_path: str, idx_path: str | None = None,\n"
+    "Indexer(gz_path: str, index_path: str | None = None,\n"
     "       checkpoint_size: int = 1048576,\n"
     "       force_rebuild: bool = False, build_bloom: bool = False,\n"
     "       build_manifest: bool = False,\n"
@@ -428,20 +459,20 @@ PyTypeObject IndexerType = {
     "       runtime: Runtime | None = None)\n"
     "--\n"
     "\n"
-    "Indexer for creating and managing gzip file indices.\n"
+    "Indexer for creating and managing gzip trace index stores.\n"
     "\n"
     "Args:\n"
     "    gz_path (str): Path to the gzip trace file.\n"
-    "    idx_path (str or None): Path to the index file. If None,\n"
-    "        uses gz_path + \".idx\".\n"
+    "    index_path (str or None): Path to the .dftindex store. If None,\n"
+    "        uses the root-local \".dftindex\" next to gz_path.\n"
     "    checkpoint_size (int): Checkpoint size in bytes for index\n"
     "        building (default 1 MB).\n"
     "    force_rebuild (bool): If True, rebuild the index even if it\n"
     "        exists.\n"
     "    build_bloom (bool): If True, build bloom filter data in the\n"
-    "        index.\n"
+    "        store.\n"
     "    build_manifest (bool): If True, build manifest data in the\n"
-    "        index.\n"
+    "        store.\n"
     "    index_threshold (int): Skip indexing for files smaller than\n"
     "        this (default 1 MB).\n"
     "    runtime (Runtime or None): Runtime instance for thread pool\n"
diff --git a/src/dftracer/utils/python/indexer.h b/src/dftracer/utils/python/indexer.h
index 536ca97e..d31d0ccf 100644
--- a/src/dftracer/utils/python/indexer.h
+++ b/src/dftracer/utils/python/indexer.h
@@ -9,7 +9,7 @@
 typedef struct {
     PyObject_HEAD dft_indexer_handle_t handle;
     PyObject *gz_path;
-    PyObject *idx_path;
+    PyObject *index_path;
     std::uint64_t checkpoint_size;
     int build_bloom;
     int build_manifest;
diff --git a/src/dftracer/utils/python/trace_reader.cpp b/src/dftracer/utils/python/trace_reader.cpp
index 5d0dcb75..f50c0e33 100644
--- a/src/dftracer/utils/python/trace_reader.cpp
+++ b/src/dftracer/utils/python/trace_reader.cpp
@@ -895,7 +895,8 @@ static PyObject *TraceReader_iter_lines(TraceReaderObject *self, PyObject *args,
 
     Runtime *rt = get_runtime(self);
     try {
-        rt->submit(produce_lines(state, cfg, rc), "iter_lines");
+        auto handle = rt->submit(produce_lines(state, cfg, rc), "iter_lines");
+        state->task_future = handle.future;
     } catch (const std::exception &e) {
         PyErr_SetString(PyExc_RuntimeError, e.what());
         return NULL;
@@ -954,7 +955,8 @@ static PyObject *TraceReader_iter_raw(TraceReaderObject *self, PyObject *args,
 
     Runtime *rt = get_runtime(self);
     try {
-        rt->submit(produce_raw(state, cfg, rc), "iter_raw");
+        auto handle = rt->submit(produce_raw(state, cfg, rc), "iter_raw");
+        state->task_future = handle.future;
     } catch (const std::exception &e) {
         PyErr_SetString(PyExc_RuntimeError, e.what());
         return NULL;
@@ -1026,7 +1028,9 @@ static PyObject *TraceReader_iter_lines_json(TraceReaderObject *self,
 
     Runtime *rt = get_runtime(self);
     try {
-        rt->submit(produce_lines(state, cfg, rc), "iter_lines_json");
+        auto handle =
+            rt->submit(produce_lines(state, cfg, rc), "iter_lines_json");
+        state->task_future = handle.future;
     } catch (const std::exception &e) {
         PyErr_SetString(PyExc_RuntimeError, e.what());
         return NULL;
@@ -1100,10 +1104,12 @@ static PyObject *TraceReader_iter_arrow(TraceReaderObject *self, PyObject *args,
 
     Runtime *rt = get_runtime(self);
     try {
-        rt->submit(produce_arrow_batches(state, cfg, rc,
-                                         static_cast<std::size_t>(batch_size),
-                                         flatten_objects != 0, normalize != 0),
-                   "iter_arrow");
+        auto handle =
+            rt->submit(produce_arrow_batches(
+                           state, cfg, rc, static_cast<std::size_t>(batch_size),
+                           flatten_objects != 0, normalize != 0),
+                       "iter_arrow");
+        state->task_future = handle.future;
     } catch (const std::exception &e) {
         PyErr_SetString(PyExc_RuntimeError, e.what());
         return NULL;
@@ -1294,7 +1300,10 @@ static PyMethodDef TraceReader_methods[] = {
     {"__enter__", (PyCFunction)TraceReader_enter, METH_NOARGS,
      "Enter the runtime context for the with statement."},
     {"__exit__", (PyCFunction)TraceReader_exit, METH_VARARGS,
-     "Exit the runtime context for the with statement."},
+     "Exit the runtime context for the with statement.\n"
+     "\n"
+     "TraceReader does not own the shared RocksDB instance for an index path;\n"
+     "any shared DB lifetime remains manager-owned on the native side."},
     {NULL}};
 
 static PyGetSetDef TraceReader_getsetters[] = {
@@ -1336,13 +1345,13 @@ PyTypeObject TraceReaderType = {
     "--\n"
     "\n"
     "Smart trace file reader that auto-selects sequential or indexed\n"
-    "reading based on whether an ``.idx`` sidecar exists.\n"
+    "reading based on whether a ``.dftindex`` store exists.\n"
     "\n"
     "Args:\n"
     "    file_path (str): Path to the trace file (.pfw.gz or plain "
     "text).\n"
-    "    index_dir (str): Directory to search for ``.idx`` sidecar "
-    "files.\n"
+    "    index_dir (str): Directory to search for ``.dftindex`` "
+    "stores.\n"
     "        Empty string (default) searches next to the trace file.\n"
     "    checkpoint_size (int): Checkpoint interval in bytes for index\n"
     "        building (default 32 MB).\n"
diff --git a/src/dftracer/utils/python/trace_reader_iterator.cpp b/src/dftracer/utils/python/trace_reader_iterator.cpp
index f3a669db..87bf54a5 100644
--- a/src/dftracer/utils/python/trace_reader_iterator.cpp
+++ b/src/dftracer/utils/python/trace_reader_iterator.cpp
@@ -138,17 +138,37 @@ PyTypeObject ArrowBatchCapsuleType = {
 static void TraceReaderIterator_dealloc(TraceReaderIteratorObject *self) {
 #ifdef DFTRACER_UTILS_ENABLE_ARROW
     if (self->arrow_state) {
+        auto task_future = self->arrow_state->task_future;
         self->arrow_state->cancelled.store(true, std::memory_order_release);
         self->arrow_state->cv_producer.notify_all();
         self->arrow_state->cv_consumer.notify_all();  // wake blocked __next__
-        self->arrow_state.reset();
+        Py_BEGIN_ALLOW_THREADS {
+            std::unique_lock<std::mutex> lock(self->arrow_state->mtx);
+            self->arrow_state->cv_consumer.wait(lock, [self] {
+                return self->arrow_state->done.load(std::memory_order_acquire);
+            });
+        }
+        if (task_future.valid()) {
+            task_future.wait();
+        }
+        Py_END_ALLOW_THREADS self->arrow_state.reset();
     }
 #endif
     if (self->state) {
+        auto task_future = self->state->task_future;
         self->state->cancelled.store(true, std::memory_order_release);
         self->state->cv_producer.notify_all();
         self->state->cv_consumer.notify_all();  // wake blocked __next__
-        self->state.reset();
+        Py_BEGIN_ALLOW_THREADS {
+            std::unique_lock<std::mutex> lock(self->state->mtx);
+            self->state->cv_consumer.wait(lock, [self] {
+                return self->state->done.load(std::memory_order_acquire);
+            });
+        }
+        if (task_future.valid()) {
+            task_future.wait();
+        }
+        Py_END_ALLOW_THREADS self->state.reset();
     }
     Py_TYPE(self)->tp_free((PyObject *)self);
 }
diff --git a/src/dftracer/utils/python/trace_reader_iterator.h b/src/dftracer/utils/python/trace_reader_iterator.h
index 8c985721..11941fd8 100644
--- a/src/dftracer/utils/python/trace_reader_iterator.h
+++ b/src/dftracer/utils/python/trace_reader_iterator.h
@@ -2,6 +2,7 @@
 #define DFTRACER_UTILS_PYTHON_TRACE_READER_ITERATOR_H
 
 #include <Python.h>
+#include <dftracer/utils/core/task_handle.h>
 
 #include <atomic>
 #include <condition_variable>
@@ -40,6 +41,7 @@ struct IteratorState {
     std::atomic<bool> cancelled{false};
     std::atomic<bool> done{false};
     std::size_t max_queue_size = 64;
+    std::shared_future<void> task_future;
 };
 
 #ifdef DFTRACER_UTILS_ENABLE_ARROW
@@ -54,6 +56,7 @@ struct ArrowIteratorState {
     std::atomic<bool> cancelled{false};
     std::atomic<bool> done{false};
     std::size_t max_queue_size = 8;
+    std::shared_future<void> task_future;
 };
 #endif
 
diff --git a/src/dftracer/utils/python/utilities/aggregator.cpp b/src/dftracer/utils/python/utilities/aggregator.cpp
index 02154697..6f7799d4 100644
--- a/src/dftracer/utils/python/utilities/aggregator.cpp
+++ b/src/dftracer/utils/python/utilities/aggregator.cpp
@@ -302,7 +302,7 @@ static PyMethodDef Aggregator_methods[] = {
      "    group_keys (list[str] or None): Extra grouping dims (default None).\n"
      "    categories (list[str] or None): Category filter (default None).\n"
      "    names (list[str] or None): Name filter (default None).\n"
-     "    index_dir (str): Index sidecar directory (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "    checkpoint_size (int): Checkpoint size (default 33554432).\n"
      "    force_rebuild (bool): Force index rebuild (default False).\n"
      "    chunk_size_mb (int): Target chunk size in MB (default 64).\n"
@@ -334,7 +334,7 @@ static PyMethodDef Aggregator_methods[] = {
      "    group_keys (list[str] or None): Extra grouping dims (default None).\n"
      "    categories (list[str] or None): Category filter (default None).\n"
      "    names (list[str] or None): Name filter (default None).\n"
-     "    index_dir (str): Index sidecar directory (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "    checkpoint_size (int): Checkpoint size (default 33554432).\n"
      "    force_rebuild (bool): Force index rebuild (default False).\n"
      "    chunk_size_mb (int): Target chunk size in MB (default 64).\n"
diff --git a/src/dftracer/utils/python/utilities/comparator.cpp b/src/dftracer/utils/python/utilities/comparator.cpp
index b9b842b4..c377fac7 100644
--- a/src/dftracer/utils/python/utilities/comparator.cpp
+++ b/src/dftracer/utils/python/utilities/comparator.cpp
@@ -195,7 +195,7 @@ CoroTask<EventAggregatorUtilityOutput> run_aggregation(
                                     -> CoroTask<void> {
                         [[maybe_unused]] auto producer_guard = ch.guard();
 
-                        std::string idx_path =
+                        std::string index_path =
                             composites::dft::internal::determine_index_path(
                                 file_path, index_dir);
 
@@ -204,7 +204,7 @@ CoroTask<EventAggregatorUtilityOutput> run_aggregation(
                                 from_file(file_path)
                                     .with_checkpoint_size(checkpoint_size)
                                     .with_force_rebuild(force_rebuild)
-                                    .with_index(idx_path);
+                                    .with_index(index_path);
                         auto metadata =
                             co_await composites::dft::MetadataCollectorUtility{}
                                 .process(meta_input);
@@ -392,6 +392,14 @@ static int run_comparison_pipeline(ComparatorObject *self,
 
             // Build indexes upfront
             {
+                if (config.force_rebuild && !baseline_files.empty()) {
+                    const std::string shared_index_path =
+                        composites::dft::internal::determine_index_path(
+                            baseline_files.front(), config.index_dir);
+                    if (fs::exists(shared_index_path)) {
+                        fs::remove_all(shared_index_path);
+                    }
+                }
                 std::unordered_set<std::string> seen;
                 std::vector<std::string> all_files;
                 for (const auto &f : baseline_files) {
@@ -406,7 +414,7 @@ static int run_comparison_pipeline(ComparatorObject *self,
                     idx_configs.push_back(
                         indexer::IndexBuildConfig::for_file(file_path)
                             .with_checkpoint_size(config.checkpoint_size)
-                            .with_force_rebuild(config.force_rebuild)
+                            .with_force_rebuild(false)
                             .with_index_dir(config.index_dir));
                 }
                 std::vector<CoroTask<indexer::IndexBuildResult>> idx_tasks;
@@ -646,7 +654,7 @@ static const char *COMPARE_DOC =
     "(default 5000).\n"
     "    threshold (float): Hide changes below this pct.\n"
     "    executor_threads (int): Parallel threads (0=auto).\n"
-    "    index_dir (str): Index sidecar directory.\n"
+    "    index_dir (str): Directory for .dftindex stores.\n"
     "    force_rebuild (bool): Force index rebuild.\n"
     "    config (str): JSON config file path.\n"
     "\n"
@@ -673,7 +681,7 @@ static const char *COMPARE_JSON_DOC =
     "(default 5000).\n"
     "    threshold (float): Hide changes below this pct.\n"
     "    executor_threads (int): Parallel threads (0=auto).\n"
-    "    index_dir (str): Index sidecar directory.\n"
+    "    index_dir (str): Directory for .dftindex stores.\n"
     "    force_rebuild (bool): Force index rebuild.\n"
     "    config (str): JSON config file path.\n"
     "\n"
@@ -700,7 +708,7 @@ static const char *COMPARE_TABLE_DOC =
     "(default 5000).\n"
     "    threshold (float): Hide changes below this pct.\n"
     "    executor_threads (int): Parallel threads (0=auto).\n"
-    "    index_dir (str): Index sidecar directory.\n"
+    "    index_dir (str): Directory for .dftindex stores.\n"
     "    force_rebuild (bool): Force index rebuild.\n"
     "    config (str): JSON config file path.\n"
     "\n"
diff --git a/src/dftracer/utils/python/utilities/metadata_collector.cpp b/src/dftracer/utils/python/utilities/metadata_collector.cpp
index 529728a3..7776c399 100644
--- a/src/dftracer/utils/python/utilities/metadata_collector.cpp
+++ b/src/dftracer/utils/python/utilities/metadata_collector.cpp
@@ -4,6 +4,7 @@
 #include <dftracer/utils/core/runtime.h>
 #include <dftracer/utils/python/runtime.h>
 #include <dftracer/utils/python/utilities/metadata_collector.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/metadata_collector_utility.h>
 
 #include <string>
@@ -74,6 +75,7 @@ static PyObject *MetadataCollector_collect(MetadataCollectorObject *self,
         return NULL;
 
     std::string file_path_str(file_path);
+    std::string index_dir_str(index_dir);
     std::string error_msg;
     MetadataCollectorUtilityOutput output;
 
@@ -82,7 +84,8 @@ static PyObject *MetadataCollector_collect(MetadataCollectorObject *self,
 
         MetadataCollectorUtilityInput input;
         input.file_path = file_path_str;
-        input.idx_path = file_path_str + ".idx";
+        input.index_path = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(file_path_str, index_dir_str);
 
         auto *out_p = &output;
         auto input_copy = input;
@@ -160,7 +163,7 @@ static PyObject *MetadataCollector_collect(MetadataCollectorObject *self,
     } while (0)
 
     SET_STR("file_path", output.file_path.c_str());
-    SET_STR("idx_path", output.idx_path.c_str());
+    SET_STR("index_path", output.index_path.c_str());
     SET_DBL("size_mb", output.size_mb);
     SET_SZT("start_line", output.start_line);
     SET_SZT("end_line", output.end_line);
@@ -199,7 +202,7 @@ static PyMethodDef MetadataCollector_methods[] = {
      "\n"
      "Args:\n"
      "    file_path (str): Path to the trace file.\n"
-     "    index_dir (str): Directory for index sidecars.\n"},
+     "    index_dir (str): Directory for .dftindex stores.\n"},
     {NULL}};
 
 PyTypeObject MetadataCollectorType = {
@@ -231,7 +234,7 @@ PyTypeObject MetadataCollectorType = {
     "\n"
     "process(file_path, index_dir='') -> dict\n"
     "    file_path (str): Path to the trace file.\n"
-    "    index_dir (str): Directory for index sidecar files.\n",
+    "    index_dir (str): Directory for .dftindex stores.\n",
     0,                                /* tp_traverse */
     0,                                /* tp_clear */
     0,                                /* tp_richcompare */
diff --git a/src/dftracer/utils/python/utilities/reconstruction_planner.cpp b/src/dftracer/utils/python/utilities/reconstruction_planner.cpp
index 47c6e30a..38100877 100644
--- a/src/dftracer/utils/python/utilities/reconstruction_planner.cpp
+++ b/src/dftracer/utils/python/utilities/reconstruction_planner.cpp
@@ -210,7 +210,7 @@ static PyMethodDef ReconstructionPlanner_methods[] = {
      "\n"
      "Args:\n"
      "    reorganized_files (list[str]): Paths to reorganized files.\n"
-     "    index_dir (str): Directory for index sidecars (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "\n"
      "Returns:\n"
      "    dict: Reconstruction plan.\n"},
@@ -250,7 +250,7 @@ PyTypeObject ReconstructionPlannerType = {
     "\n"
     "process(reorganized_files, index_dir='') -> dict\n"
     "    reorganized_files (list[str]): Paths to reorganized trace files.\n"
-    "    index_dir (str): Directory containing provenance index sidecars.\n",
+    "    index_dir (str): Directory containing `.dftindex` stores.\n",
     /* tp_doc */
     0,                                    /* tp_traverse */
     0,                                    /* tp_clear */
diff --git a/src/dftracer/utils/python/utilities/reorganization_planner.cpp b/src/dftracer/utils/python/utilities/reorganization_planner.cpp
index 848f0bd5..929bfe79 100644
--- a/src/dftracer/utils/python/utilities/reorganization_planner.cpp
+++ b/src/dftracer/utils/python/utilities/reorganization_planner.cpp
@@ -178,8 +178,8 @@ static PyObject *ReorganizationPlanner_plan(ReorganizationPlannerObject *self,
         }
         PyDict_SetItemString(entry, "file_path",
                              PyUnicode_FromString(sf.file_path.c_str()));
-        PyDict_SetItemString(entry, "idx_path",
-                             PyUnicode_FromString(sf.idx_path.c_str()));
+        PyDict_SetItemString(entry, "index_path",
+                             PyUnicode_FromString(sf.index_path.c_str()));
         PyDict_SetItemString(entry, "num_checkpoints",
                              PyLong_FromSize_t(sf.num_checkpoints));
         PyDict_SetItemString(entry, "uncompressed_size",
@@ -254,7 +254,7 @@ static PyMethodDef ReorganizationPlanner_methods[] = {
      "    source_files (list[str]): Paths to source trace files.\n"
      "    groups (list[dict] or None): Predicate group definitions\n"
      "        (default None).\n"
-     "    index_dir (str): Directory for index sidecars (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "\n"
      "Returns:\n"
      "    dict: Extraction plan.\n"},
diff --git a/src/dftracer/utils/python/utilities/statistics_aggregator.cpp b/src/dftracer/utils/python/utilities/statistics_aggregator.cpp
index 8ea1ab4b..660d6149 100644
--- a/src/dftracer/utils/python/utilities/statistics_aggregator.cpp
+++ b/src/dftracer/utils/python/utilities/statistics_aggregator.cpp
@@ -3,6 +3,7 @@
 #include <dftracer/utils/core/runtime.h>
 #include <dftracer/utils/python/runtime.h>
 #include <dftracer/utils/python/utilities/statistics_aggregator.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/trace_statistics.h>
 
@@ -83,7 +84,8 @@ static PyObject *StatisticsAggregator_compute(StatisticsAggregatorObject *self,
         StatisticsAggregatorInput input;
         input.file_path = file_path_str;
         input.index_dir = index_dir_str;
-        input.idx_path = file_path_str + ".idx";
+        input.index_path = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(file_path_str, index_dir_str);
 
         auto *stats_p = &stats;
         auto input_copy = input;
@@ -199,7 +201,7 @@ static PyMethodDef StatisticsAggregator_methods[] = {
      "\n"
      "Args:\n"
      "    file_path (str): Path to the trace file.\n"
-     "    index_dir (str): Directory for index sidecars (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "\n"
      "Returns:\n"
      "    dict: Aggregated statistics.\n"},
diff --git a/src/dftracer/utils/python/utilities/statistics_query.cpp b/src/dftracer/utils/python/utilities/statistics_query.cpp
index a7e27265..95a6c173 100644
--- a/src/dftracer/utils/python/utilities/statistics_query.cpp
+++ b/src/dftracer/utils/python/utilities/statistics_query.cpp
@@ -3,6 +3,7 @@
 #include <dftracer/utils/core/runtime.h>
 #include <dftracer/utils/python/runtime.h>
 #include <dftracer/utils/python/utilities/statistics_query.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/statistics_query_utility.h>
 
@@ -117,7 +118,8 @@ static PyObject *StatisticsQuery_query(StatisticsQueryObject *self,
         StatisticsAggregatorInput agg_input;
         agg_input.file_path = file_path_str;
         agg_input.index_dir = index_dir_str;
-        agg_input.idx_path = file_path_str + ".idx";
+        agg_input.index_path = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(file_path_str, index_dir_str);
 
         auto *stats_p = &stats;
         auto agg_task = [stats_p, agg_input]() -> CoroTask<void> {
@@ -251,7 +253,7 @@ static PyMethodDef StatisticsQuery_methods[] = {
      "        'time_range', 'duration_stats', 'top_n_names',\n"
      "        'top_n_categories', 'detailed'.\n"
      "    top_n (int): Top results for ranked queries (default 10).\n"
-     "    index_dir (str): Directory for index sidecars (default '').\n"
+     "    index_dir (str): Directory for .dftindex stores (default '').\n"
      "\n"
      "Returns:\n"
      "    dict: Query results.\n"},
diff --git a/src/dftracer/utils/server/trace_api.cpp b/src/dftracer/utils/server/trace_api.cpp
index 9663ed24..1b8f3ca0 100644
--- a/src/dftracer/utils/server/trace_api.cpp
+++ b/src/dftracer/utils/server/trace_api.cpp
@@ -76,7 +76,7 @@ static const std::unordered_set<std::string> HASH_METADATA_NAMES = {"FH", "HH",
 using dftracer::utils::utilities::common::json::JsonDocGuard;
 using dftracer::utils::utilities::common::query::Query;
 
-/// Direct-scan a small file without any sidecar index.
+/// Direct-scan a small file without any `.dftindex` store.
 /// Streams via async_streaming_gz_lines(), parses JSON, applies
 /// predicate filters, collects matching events as raw JSON strings.
 static coro::CoroTask<void> direct_scan_events(
@@ -413,7 +413,8 @@ static coro::AsyncGenerator<StreamChunk> stream_events(
         ViewBuilderInput builder_input;
         builder_input.with_view(ev_view)
             .with_file_path(file_info->path)
-            .with_idx_path(file_info->has_bloom_data ? file_info->idx_path : "")
+            .with_index_path(file_info->has_bloom_data ? file_info->index_path
+                                                       : "")
             .with_uncompressed_size(file_info->uncompressed_size)
             .with_num_checkpoints(file_info->num_checkpoints)
             .with_bloom_cache(bloom_cache)
@@ -428,7 +429,7 @@ static coro::AsyncGenerator<StreamChunk> stream_events(
 
             ViewReaderInput reader_input;
             reader_input.with_file_path(file_info->path)
-                .with_idx_path(file_info->idx_path)
+                .with_index_path(file_info->index_path)
                 .with_byte_range(candidate.start_byte, candidate.end_byte)
                 .with_checkpoint_idx(candidate.checkpoint_idx)
                 .with_view(ev_view);
@@ -529,7 +530,7 @@ static coro::CoroTask<HttpResponse> handle_stats(const HttpRequest& req,
         for (auto* file_info : stat_files) {
             StatisticsAggregatorInput agg_input;
             agg_input.file_path = file_info->path;
-            agg_input.idx_path = file_info->idx_path;
+            agg_input.index_path = file_info->index_path;
             agg_input.index_dir = index.index_dir();
 
             StatisticsAggregatorUtility aggregator;
@@ -569,7 +570,7 @@ static coro::CoroTask<HttpResponse> handle_stats(const HttpRequest& req,
 
                     StatisticsAggregatorInput agg_input;
                     agg_input.file_path = file_info->path;
-                    agg_input.idx_path = file_info->idx_path;
+                    agg_input.index_path = file_info->index_path;
                     agg_input.index_dir = *index_dir_ptr;
 
                     StatisticsAggregatorUtility aggregator;
diff --git a/src/dftracer/utils/server/trace_index.cpp b/src/dftracer/utils/server/trace_index.cpp
index e521e3c7..9ccde5d3 100644
--- a/src/dftracer/utils/server/trace_index.cpp
+++ b/src/dftracer/utils/server/trace_index.cpp
@@ -4,6 +4,7 @@
 #include <dftracer/utils/core/io/io_backend.h>
 #include <dftracer/utils/core/pipeline/pipeline.h>
 #include <dftracer/utils/core/pipeline/pipeline_config.h>
+#include <dftracer/utils/core/rocksdb/async.h>
 #include <dftracer/utils/core/tasks/coro_scope.h>
 #include <dftracer/utils/core/tasks/task.h>
 #include <dftracer/utils/server/trace_index.h>
@@ -50,7 +51,7 @@ coro::CoroTask<void> TraceIndex::initialize() {
     for (const auto& entry : entries) {
         FileInfo info;
         info.path = entry.path.string();
-        info.idx_path = internal::determine_index_path(info.path, index_dir_);
+        info.index_path = internal::determine_index_path(info.path, index_dir_);
 
         std::error_code ec;
         auto fsize = fs::file_size(info.path, ec);
@@ -68,8 +69,8 @@ coro::CoroTask<void> TraceIndex::initialize() {
                 static_cast<double>(info.compressed_size) / (1024.0 * 1024.0);
             small_count++;
         } else {
-            info.has_bloom_data = fs::exists(info.idx_path);
-            info.has_checkpoint_index = fs::exists(info.idx_path);
+            info.has_bloom_data = fs::exists(info.index_path);
+            info.has_checkpoint_index = fs::exists(info.index_path);
             if (!info.has_bloom_data) {
                 needs_build.push_back(idx);
             } else {
@@ -83,7 +84,7 @@ coro::CoroTask<void> TraceIndex::initialize() {
     if (small_count > 0) {
         DFTRACER_UTILS_LOG_INFO(
             "TraceIndex: %zu small file(s) (< %zu bytes) will be "
-            "streamed directly (no sidecar indexes)",
+            "streamed directly (no .dftindex database)",
             small_count, INDEX_SIZE_THRESHOLD);
     }
 
@@ -155,12 +156,12 @@ coro::CoroTask<void> TraceIndex::initialize() {
                                             co_await builder.process(config);
 
                                         if (result.success) {
-                                            info->idx_path =
+                                            info->index_path =
                                                 internal::determine_index_path(
                                                     info->path, *index_dir_ptr);
                                             info->has_bloom_data = true;
                                             info->has_checkpoint_index =
-                                                fs::exists(info->idx_path);
+                                                fs::exists(info->index_path);
                                         } else {
                                             DFTRACER_UTILS_LOG_WARN(
                                                 "TraceIndex: failed to "
@@ -209,29 +210,43 @@ coro::CoroTask<void> TraceIndex::initialize() {
 
                                     if (info->has_bloom_data) {
                                         try {
-                                            indexer::IndexDatabase idx_db(
-                                                info->idx_path);
-                                            auto logical = indexer::internal::
-                                                get_logical_path(info->path);
-                                            int fid = idx_db.get_file_info_id(
-                                                logical);
-                                            if (fid >= 0) {
-                                                auto tb =
-                                                    idx_db.query_time_bounds(
-                                                        fid);
-                                                if (tb.valid) {
-                                                    info->min_timestamp_us =
-                                                        tb.min_timestamp_us;
-                                                    info->max_timestamp_us =
-                                                        tb.max_timestamp_us;
-                                                }
+                                            const std::string path = info->path;
+                                            const std::string index_path =
+                                                info->index_path;
+                                            const auto* path_ptr = &path;
+                                            const auto* index_path_ptr =
+                                                &index_path;
+                                            auto bounds = co_await rocksdb::run(
+                                                [path_ptr, index_path_ptr] {
+                                                    indexer::IndexDatabase
+                                                        idx_db(*index_path_ptr);
+                                                    auto logical =
+                                                        indexer::internal::
+                                                            get_logical_path(
+                                                                *path_ptr);
+                                                    int fid =
+                                                        idx_db.get_file_info_id(
+                                                            logical);
+                                                    if (fid < 0) {
+                                                        return indexer::
+                                                            IndexDatabase::
+                                                                TimeBounds{};
+                                                    }
+                                                    return idx_db
+                                                        .query_time_bounds(fid);
+                                                });
+                                            if (bounds.valid) {
+                                                info->min_timestamp_us =
+                                                    bounds.min_timestamp_us;
+                                                info->max_timestamp_us =
+                                                    bounds.max_timestamp_us;
                                             }
                                         } catch (const std::exception& e) {
                                             DFTRACER_UTILS_LOG_WARN(
                                                 "TraceIndex: failed to "
                                                 "read time bounds from "
                                                 "%s: %s",
-                                                info->idx_path.c_str(),
+                                                info->index_path.c_str(),
                                                 e.what());
                                         }
                                     }
@@ -239,7 +254,7 @@ coro::CoroTask<void> TraceIndex::initialize() {
                                     auto meta_input =
                                         MetadataCollectorUtilityInput::
                                             from_file(info->path)
-                                                .with_index(info->idx_path);
+                                                .with_index(info->index_path);
                                     auto metadata =
                                         co_await MetadataCollectorUtility{}
                                             .process(meta_input);
diff --git a/src/dftracer/utils/server/viz_api.cpp b/src/dftracer/utils/server/viz_api.cpp
index 5df0efbb..9917c765 100644
--- a/src/dftracer/utils/server/viz_api.cpp
+++ b/src/dftracer/utils/server/viz_api.cpp
@@ -209,7 +209,7 @@ static void apply_filters(std::string& dsl, std::string_view filters_str) {
     }
 }
 
-/// Direct-scan a small file without any sidecar index.
+/// Direct-scan a small file without any `.dftindex` store.
 /// Streams via async_streaming_gz_lines(), parses JSON, applies
 /// predicate filters, collects matching events as raw JSON strings.
 static coro::CoroTask<void> direct_scan_events(
@@ -447,8 +447,8 @@ static coro::CoroTask<HttpResponse> handle_viz_events(
                 ViewBuilderInput builder_input;
                 builder_input.with_view(view)
                     .with_file_path(file_info->path)
-                    .with_idx_path(
-                        file_info->has_bloom_data ? file_info->idx_path : "")
+                    .with_index_path(
+                        file_info->has_bloom_data ? file_info->index_path : "")
                     .with_uncompressed_size(file_info->uncompressed_size)
                     .with_num_checkpoints(file_info->num_checkpoints)
                     .with_bloom_cache(&index.bloom_cache())
@@ -467,7 +467,7 @@ static coro::CoroTask<HttpResponse> handle_viz_events(
                     }
                     ViewReaderInput reader_input;
                     reader_input.with_file_path(file_info->path)
-                        .with_idx_path(file_info->idx_path)
+                        .with_index_path(file_info->index_path)
                         .with_byte_range(candidate.start_byte,
                                          candidate.end_byte)
                         .with_checkpoint_idx(candidate.checkpoint_idx)
@@ -555,9 +555,9 @@ static coro::CoroTask<HttpResponse> handle_viz_events(
                         ViewBuilderInput builder_input;
                         builder_input.with_view(*view_ptr)
                             .with_file_path(file_info->path)
-                            .with_idx_path(file_info->has_bloom_data
-                                               ? file_info->idx_path
-                                               : "")
+                            .with_index_path(file_info->has_bloom_data
+                                                 ? file_info->index_path
+                                                 : "")
                             .with_uncompressed_size(
                                 file_info->uncompressed_size)
                             .with_num_checkpoints(file_info->num_checkpoints)
@@ -577,7 +577,7 @@ static coro::CoroTask<HttpResponse> handle_viz_events(
 
                             ViewReaderInput reader_input;
                             reader_input.with_file_path(file_info->path)
-                                .with_idx_path(file_info->idx_path)
+                                .with_index_path(file_info->index_path)
                                 .with_byte_range(candidate.start_byte,
                                                  candidate.end_byte)
                                 .with_checkpoint_idx(candidate.checkpoint_idx)
diff --git a/src/dftracer/utils/utilities/call_tree/call_tree_internal.cpp b/src/dftracer/utils/utilities/call_tree/call_tree_internal.cpp
index 98bfbaca..a81d2c23 100644
--- a/src/dftracer/utils/utilities/call_tree/call_tree_internal.cpp
+++ b/src/dftracer/utils/utilities/call_tree/call_tree_internal.cpp
@@ -8,6 +8,7 @@
 #include <dftracer/utils/core/common/format_detector.h>
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/reader/internal/line_processor.h>
 #include <dftracer/utils/utilities/reader/internal/reader_factory.h>
 #include <yyjson.h>
@@ -300,7 +301,7 @@ bool TraceReader::read_with_reader(const std::string& trace_file,
         auto format = dftracer::utils::FormatDetector::detect(trace_file);
 
         // For GZIP files, skip Reader API and use direct zlib decompression
-        // since Reader API requires .idx files
+        // since this path expects a prebuilt `.dftindex` store.
         if (format == dftracer::utils::ArchiveFormat::GZIP) {
             return false;  // Will trigger fallback to read_direct which handles
                            // gzip
@@ -313,13 +314,13 @@ bool TraceReader::read_with_reader(const std::string& trace_file,
             return false;
         }
 
-        // Generate index file path
-        std::string idx_file = trace_file + ".idx";
+        std::string index_path = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(trace_file, "");
 
         // Create reader (this will auto-build index if needed)
         auto reader =
             dftracer::utils::utilities::reader::internal::ReaderFactory::create(
-                trace_file, idx_file);
+                trace_file, index_path);
         if (!reader || !reader->is_valid()) {
             DFTRACER_UTILS_LOG_ERROR("Failed to create reader for %s",
                                      trace_file.c_str());
@@ -881,4 +882,4 @@ void CallTree::print_calls_recursive(const ProcessCallTree& graph,
 }
 
 }  // namespace internal
-}  // namespace dftracer::utils::call_tree
\ No newline at end of file
+}  // namespace dftracer::utils::call_tree
diff --git a/src/dftracer/utils/utilities/call_tree/call_tree_mpi.cpp b/src/dftracer/utils/utilities/call_tree/call_tree_mpi.cpp
index ff7ea5e9..8a6fe6de 100644
--- a/src/dftracer/utils/utilities/call_tree/call_tree_mpi.cpp
+++ b/src/dftracer/utils/utilities/call_tree/call_tree_mpi.cpp
@@ -241,10 +241,11 @@ bool MPIFilteredTraceReader::read(const std::string& trace_file,
     ArchiveFormat format = FormatDetector::detect(trace_file);
 
     if (format == ArchiveFormat::GZIP) {
-        // Try to use indexer
-        std::string idx_file = trace_file + ".idx";
-        if (fs::exists(idx_file)) {
-            return read_with_indexer(trace_file, idx_file, graph);
+        std::string index_path =
+            utilities::composites::dft::internal::determine_index_path(
+                trace_file, "");
+        if (fs::exists(index_path)) {
+            return read_with_indexer(trace_file, index_path, graph);
         }
     }
 
@@ -610,12 +611,14 @@ std::set<std::uint32_t> MPICallTreeBuilder::scan_file_for_pids(
 
     // Check if it's a gzip file with an index
     ArchiveFormat format = FormatDetector::detect(trace_file);
-    std::string idx_file = trace_file + ".idx";
+    std::string index_path =
+        utilities::composites::dft::internal::determine_index_path(trace_file,
+                                                                   "");
 
-    if (format == ArchiveFormat::GZIP && fs::exists(idx_file)) {
+    if (format == ArchiveFormat::GZIP && fs::exists(index_path)) {
         try {
             auto reader = utilities::reader::internal::ReaderFactory::create(
-                trace_file, idx_file);
+                trace_file, index_path);
             if (reader && reader->is_valid()) {
                 // Read first N lines to discover PIDs
                 std::size_t num_lines = reader->get_num_lines();
diff --git a/src/dftracer/utils/utilities/composites/dft/aggregators/aggregator_utility.cpp b/src/dftracer/utils/utilities/composites/dft/aggregators/aggregator_utility.cpp
index c4bec8af..465d4029 100644
--- a/src/dftracer/utils/utilities/composites/dft/aggregators/aggregator_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/aggregators/aggregator_utility.cpp
@@ -257,6 +257,15 @@ coro::AsyncGenerator<AggregationBatch> AggregatorUtility::process(
     EventAggregatorUtility merger;
     std::atomic<int> global_chunk_idx{0};
 
+    if (input.force_rebuild && !input_files.empty()) {
+        const std::string shared_index_path =
+            composites::dft::internal::determine_index_path(
+                input_files.front(), effective_index_dir);
+        if (fs::exists(shared_index_path)) {
+            fs::remove_all(shared_index_path);
+        }
+    }
+
     for (const auto& file_path : input_files) {
         bool is_compressed =
             file_path.size() >= 3 &&
@@ -268,7 +277,7 @@ coro::AsyncGenerator<AggregationBatch> AggregatorUtility::process(
                 file_path, effective_index_dir);
             auto idx_input = indexer::IndexBuildConfig::for_file(file_path)
                                  .with_checkpoint_size(input.checkpoint_size)
-                                 .with_force_rebuild(input.force_rebuild)
+                                 .with_force_rebuild(false)
                                  .with_index_dir(effective_index_dir);
             co_await indexer::IndexBuilderUtility{}.process(idx_input);
         }
@@ -277,7 +286,7 @@ coro::AsyncGenerator<AggregationBatch> AggregatorUtility::process(
         auto meta_input =
             composites::dft::MetadataCollectorUtilityInput::from_file(file_path)
                 .with_checkpoint_size(input.checkpoint_size)
-                .with_force_rebuild(input.force_rebuild)
+                .with_force_rebuild(false)
                 .with_index(idx_path);
         auto metadata =
             co_await composites::dft::MetadataCollectorUtility{}.process(
diff --git a/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.cpp b/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.cpp
index c156263c..4618338d 100644
--- a/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_aggregator_utility.cpp
@@ -206,9 +206,9 @@ coro::CoroTask<ChunkAggregationOutput> ChunkAggregatorUtility::process(
 
     TraceReaderConfig reader_cfg;
     reader_cfg.file_path = input.file_path;
-    if (!input.idx_path.empty()) {
+    if (!input.index_path.empty()) {
         reader_cfg.index_dir =
-            input.idx_path.substr(0, input.idx_path.rfind('/'));
+            input.index_path.substr(0, input.index_path.rfind('/'));
     }
     reader_cfg.checkpoint_size = input.checkpoint_size;
     TraceReader trace_reader(reader_cfg);
diff --git a/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_mapper_utility.cpp b/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_mapper_utility.cpp
index 085e95d1..a516fa0f 100644
--- a/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_mapper_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/aggregators/chunk_mapper_utility.cpp
@@ -28,7 +28,7 @@ coro::CoroTask<FileChunkMapperOutput> FileChunkMapperUtility::process(
         FileChunkMapperOutput chunks;
         ChunkAggregatorInput chunk;
         chunk.with_file_path(meta.file_path)
-            .with_idx_path(meta.idx_path)
+            .with_index_path(meta.index_path)
             .with_byte_range(0, 0)
             .with_line_range(0, 0)
             .with_chunk_index(input.start_chunk_index)
@@ -65,7 +65,7 @@ coro::CoroTask<FileChunkMapperOutput> FileChunkMapperUtility::process(
 
         ChunkAggregatorInput chunk;
         chunk.with_file_path(meta.file_path)
-            .with_idx_path(meta.idx_path)
+            .with_index_path(meta.index_path)
             .with_byte_range(start_byte, end_byte)
             .with_line_range(start_line, end_line)
             .with_chunk_index(input.start_chunk_index + static_cast<int>(i))
diff --git a/src/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.cpp b/src/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.cpp
index f5931984..f3c752c9 100644
--- a/src/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/chunk_extractor_utility.cpp
@@ -60,7 +60,7 @@ ChunkExtractorUtility::extract_and_write(
             auto reader_config =
                 StreamingLineReaderConfig()
                     .with_file(spec.file_path)
-                    .with_index(spec.idx_path)
+                    .with_index(spec.index_path)
                     .with_line_range(spec.start_line, spec.end_line);
             auto line_gen = StreamingLineReader::read_async(reader_config);
 
@@ -84,9 +84,9 @@ ChunkExtractorUtility::extract_and_write(
                 }
             }
         } else {
-            if (!spec.idx_path.empty()) {
+            if (!spec.index_path.empty()) {
                 auto reader = reader::internal::ReaderFactory::create(
-                    spec.file_path, spec.idx_path);
+                    spec.file_path, spec.index_path);
                 auto line_gen = sources::async_indexed_file_bytes(
                     reader, spec.start_byte, spec.end_byte);
 
diff --git a/src/dftracer/utils/utilities/composites/dft/chunk_manifest_mapper_utility.cpp b/src/dftracer/utils/utilities/composites/dft/chunk_manifest_mapper_utility.cpp
index af440ec2..b0261dd5 100644
--- a/src/dftracer/utils/utilities/composites/dft/chunk_manifest_mapper_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/chunk_manifest_mapper_utility.cpp
@@ -78,7 +78,7 @@ ChunkManifestMapperUtility::process(
                 static_cast<double>(line_end - file.start_line + 1) *
                 bytes_per_line);
 
-            internal::DFTracerChunkSpec spec(file.file_path, file.idx_path,
+            internal::DFTracerChunkSpec spec(file.file_path, file.index_path,
                                              size_to_take, start_byte, end_byte,
                                              current_start, line_end);
 
diff --git a/src/dftracer/utils/utilities/composites/dft/event_collector_utility.cpp b/src/dftracer/utils/utilities/composites/dft/event_collector_utility.cpp
index 84743909..5a6ebee1 100644
--- a/src/dftracer/utils/utilities/composites/dft/event_collector_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/event_collector_utility.cpp
@@ -95,10 +95,10 @@ EventCollectorFromMetadataUtility::process(
 #endif
         EventIdCollector collector(events, input.trim_commas);
 
-        if (!file.idx_path.empty()) {
+        if (!file.index_path.empty()) {
             // Indexed/compressed file
             auto reader = reader::internal::ReaderFactory::create(
-                file.file_path, file.idx_path);
+                file.file_path, file.index_path);
             if (!reader) {
                 DFTRACER_UTILS_LOG_ERROR("Failed to create reader for file: %s",
                                          file.file_path.c_str());
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.cpp
index 13e18d43..8f9ce32a 100644
--- a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_indexer_utility.cpp
@@ -153,7 +153,7 @@ coro::CoroTask<ChunkIndexerOutput> ChunkIndexerUtility::process(
         auto reader_input =
             composites::IndexedReadInput::from_file(input.file_path)
                 .with_checkpoint_size(input.checkpoint_size)
-                .with_index(input.idx_path);
+                .with_index(input.index_path);
 
         composites::IndexedFileReaderUtility reader_utility;
         reader = co_await reader_utility.process(reader_input);
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.cpp
index 051e95b6..b30ecbf7 100644
--- a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.cpp
@@ -1,5 +1,5 @@
 #include <dftracer/utils/core/common/logging.h>
-#include <dftracer/utils/core/sqlite/async.h>
+#include <dftracer/utils/core/rocksdb/async.h>
 #include <dftracer/utils/utilities/common/query/ast.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
@@ -48,11 +48,11 @@ struct PrunerContext {
 
     // Hash resolution: human-readable value → hash strings
     std::unordered_map<std::string, std::vector<std::string>> hash_cache;
-    const sqlite::SqliteDatabase* db = nullptr;
+    const IndexDatabase* db = nullptr;
     int fid = -1;
 
     BloomFilterCache* cache;
-    std::string idx_path;
+    std::string index_path;
 
     // Resolve a value for a hash dimension.
     // Returns the hash strings if the dimension is a hash dim and
@@ -67,7 +67,7 @@ struct PrunerContext {
         if (it != hash_cache.end()) return it->second;
 
         if (db) {
-            auto hashes = queries::query_hash_by_resolved(*db, dim, val);
+            auto hashes = db->query_hash_by_resolved(dim, val);
             auto& cached = hash_cache[key];
             cached = std::move(hashes);
             return cached;
@@ -347,7 +347,7 @@ coro::CoroTask<ChunkPrunerOutput> ChunkPrunerUtility::process(
         out.file_may_match = false;
 
         try {
-            IndexDatabase idx_db(input.idx_path);
+            IndexDatabase idx_db(input.index_path);
             int fid =
                 idx_db.get_file_info_id(get_logical_path(input.file_path));
             if (fid < 0) {
@@ -357,14 +357,13 @@ coro::CoroTask<ChunkPrunerOutput> ChunkPrunerUtility::process(
             }
 
             // Load chunk dimension stats
-            auto dim_stats_rows =
-                queries::query_chunk_dimension_stats(idx_db.sql_db(), fid);
+            auto dim_stats_rows = idx_db.query_chunk_dimension_stats(fid);
 
             PrunerContext ctx;
             ctx.file_info_id = fid;
             ctx.cache = input.cache;
-            ctx.idx_path = input.idx_path;
-            ctx.db = &idx_db.sql_db();
+            ctx.index_path = input.index_path;
+            ctx.db = &idx_db;
             ctx.fid = fid;
 
             for (const auto& ds : dim_stats_rows) {
@@ -373,10 +372,9 @@ coro::CoroTask<ChunkPrunerOutput> ChunkPrunerUtility::process(
             }
 
             // Load bloom filters for all dimensions
-            auto indexed_dims =
-                queries::query_index_dimensions(idx_db.sql_db(), fid);
-            auto all_chunk_blooms = queries::query_chunk_bloom_filters_batch(
-                idx_db.sql_db(), fid, indexed_dims);
+            auto indexed_dims = idx_db.query_index_dimensions(fid);
+            auto all_chunk_blooms =
+                idx_db.query_chunk_bloom_filters_batch(fid, indexed_dims);
 
             for (const auto& [dim, chunk_blooms] : all_chunk_blooms) {
                 for (const auto& cb : chunk_blooms) {
@@ -384,8 +382,8 @@ coro::CoroTask<ChunkPrunerOutput> ChunkPrunerUtility::process(
                     BloomFilter bf = BloomFilter::from_blob(
                         cb.bloom_data.data(), cb.bloom_data.size());
                     if (input.cache) {
-                        input.cache->put(input.idx_path, dim, cb.checkpoint_idx,
-                                         bf);
+                        input.cache->put(input.index_path, dim,
+                                         cb.checkpoint_idx, bf);
                     }
                     ctx.bloom_filters[dim][cb.checkpoint_idx] = std::move(bf);
                 }
@@ -428,7 +426,7 @@ coro::CoroTask<ChunkPrunerOutput> ChunkPrunerUtility::process(
         return out;
     };
 
-    co_return co_await sqlite::run(do_query);
+    co_return co_await rocksdb::run(do_query);
 }
 
 }  // namespace dftracer::utils::utilities::composites::dft::indexing
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.cpp
index 9ee51be6..6aa594a9 100644
--- a/src/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.cpp
@@ -5,6 +5,7 @@
 #include <charconv>
 #include <cmath>
 #include <cstring>
+#include <limits>
 #include <stdexcept>
 
 namespace dftracer::utils::utilities::composites::dft::indexing {
@@ -15,11 +16,18 @@ void ChunkStatistics::update_from_event(std::string_view name,
                                         std::uint64_t dur) {
     ++total_events;
 
-    // 20 digits max per uint64 + ':' separator = 41 chars max
-    char pt_buf[52];
+    constexpr std::size_t pid_tid_buf_size =
+        (2 * std::numeric_limits<std::uint64_t>::digits10) + 3;
+    char pt_buf[pid_tid_buf_size];
     auto [pp, ec1] = std::to_chars(pt_buf, pt_buf + sizeof(pt_buf), pid);
+    if (ec1 != std::errc{} || pp == pt_buf + sizeof(pt_buf)) {
+        throw std::runtime_error("failed to format pid");
+    }
     *pp++ = ':';
     auto [tp, ec2] = std::to_chars(pp, pt_buf + sizeof(pt_buf), tid);
+    if (ec2 != std::errc{}) {
+        throw std::runtime_error("failed to format tid");
+    }
     std::string_view pt_sv(pt_buf, tp - pt_buf);
 
     category_counts[std::string(cat)]++;
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_bloom_filters.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_bloom_filters.cpp
deleted file mode 100644
index 45c6b7b8..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_bloom_filters.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_chunk_bloom_filters(const SqliteDatabase& db, int file_info_id,
-                                std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "DELETE FROM chunk_bloom_filters "
-                    "WHERE file_info_id = ? AND dimension = ?;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete chunk bloom filters: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_dimension_stats.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_dimension_stats.cpp
deleted file mode 100644
index f804a3ff..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_dimension_stats.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_chunk_dimension_stats(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(
-        db, "DELETE FROM chunk_dimension_stats WHERE file_info_id = ?;");
-    stmt.bind_int(1, file_info_id);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete chunk dimension stats: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_statistics.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_statistics.cpp
deleted file mode 100644
index 801ac157..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_chunk_statistics.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_chunk_statistics(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db, "DELETE FROM chunk_statistics WHERE file_info_id = ?;");
-
-    stmt.bind_int(1, file_info_id);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete chunk statistics: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_event_ranges.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_event_ranges.cpp
deleted file mode 100644
index f7228e0f..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_event_ranges.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_event_ranges(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db,
-                    "DELETE FROM checkpoint_event_ranges "
-                    "WHERE file_info_id = ?;");
-    stmt.bind_int(1, file_info_id);
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete event ranges: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_file_bloom_filter.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_file_bloom_filter.cpp
deleted file mode 100644
index ca480c1d..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_file_bloom_filter.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "DELETE FROM file_bloom_filters "
-                    "WHERE file_info_id = ? AND dimension = ?;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete file bloom filter: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_hash_resolutions.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_hash_resolutions.cpp
deleted file mode 100644
index 5b5a58ad..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_hash_resolutions.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_hash_resolutions(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db, "DELETE FROM hash_resolutions WHERE file_info_id = ?;");
-
-    stmt.bind_int(1, file_info_id);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete hash resolutions: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_metadata_lines.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_metadata_lines.cpp
deleted file mode 100644
index 8972a08f..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/delete_metadata_lines.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void delete_metadata_lines(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db,
-                    "DELETE FROM checkpoint_metadata_lines "
-                    "WHERE file_info_id = ?;");
-    stmt.bind_int(1, file_info_id);
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to delete metadata lines: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_bloom_filter.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_bloom_filter.cpp
deleted file mode 100644
index c389141d..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_bloom_filter.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <span>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_chunk_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               const void* blob_data, int blob_size,
-                               std::uint64_t num_entries) {
-    SqliteStmt stmt(
-        db,
-        "INSERT OR REPLACE INTO chunk_bloom_filters"
-        "(file_info_id, checkpoint_idx, dimension, bloom_data, num_entries) "
-        "VALUES(?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_text(3, dimension);
-    stmt.bind_blob(4, blob_data, blob_size);
-    stmt.bind_int64(5, static_cast<std::int64_t>(num_entries));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert chunk bloom filter: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_chunk_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               std::span<const unsigned char> blob_data,
-                               std::uint64_t num_entries) {
-    insert_chunk_bloom_filter(db, file_info_id, checkpoint_idx, dimension,
-                              blob_data.data(),
-                              static_cast<int>(blob_data.size()), num_entries);
-}
-
-SqliteStmt prepare_insert_chunk_bloom_filter(const SqliteDatabase& db) {
-    return SqliteStmt(
-        db,
-        "INSERT OR REPLACE INTO chunk_bloom_filters"
-        "(file_info_id, checkpoint_idx, dimension, bloom_data, num_entries) "
-        "VALUES(?, ?, ?, ?, ?);");
-}
-
-void insert_chunk_bloom_filter(SqliteStmt& stmt, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               const void* blob_data, int blob_size,
-                               std::uint64_t num_entries) {
-    stmt.reset();
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_text_static(3, dimension);
-    stmt.bind_blob_static(4, blob_data, blob_size);
-    stmt.bind_int64(5, static_cast<std::int64_t>(num_entries));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert chunk bloom filter");
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_dimension_stats.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_dimension_stats.cpp
deleted file mode 100644
index 453612e6..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_dimension_stats.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_chunk_dimension_stats(const SqliteDatabase& db, int file_info_id,
-                                  std::uint64_t checkpoint_idx,
-                                  const ChunkDimensionStats& stats,
-                                  std::size_t value_counts_cap) {
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO chunk_dimension_stats"
-                    "(file_info_id, checkpoint_idx, dimension, distinct_count, "
-                    "value_counts, min_value, max_value, value_type) "
-                    "VALUES(?, ?, ?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_text_static(3, stats.dimension);
-    stmt.bind_int64(4, static_cast<std::int64_t>(stats.distinct_count));
-
-    auto compressed = stats.compress_value_counts(value_counts_cap);
-    if (compressed) {
-        stmt.bind_blob_static(5, compressed->data(),
-                              static_cast<int>(compressed->size()));
-    } else {
-        stmt.bind_null(5);
-    }
-
-    stmt.bind_text_static(6, stats.min_value);
-    stmt.bind_text_static(7, stats.max_value);
-    stmt.bind_text_static(8, stats.value_type);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert chunk dimension stats: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-SqliteStmt prepare_insert_chunk_dimension_stats(const SqliteDatabase& db) {
-    return SqliteStmt(
-        db,
-        "INSERT OR REPLACE INTO chunk_dimension_stats"
-        "(file_info_id, checkpoint_idx, dimension, distinct_count, "
-        "value_counts, min_value, max_value, value_type) "
-        "VALUES(?, ?, ?, ?, ?, ?, ?, ?);");
-}
-
-void insert_chunk_dimension_stats(SqliteStmt& stmt, int file_info_id,
-                                  std::uint64_t checkpoint_idx,
-                                  const ChunkDimensionStats& stats,
-                                  std::size_t value_counts_cap) {
-    stmt.reset();
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_text_static(3, stats.dimension);
-    stmt.bind_int64(4, static_cast<std::int64_t>(stats.distinct_count));
-
-    auto compressed = stats.compress_value_counts(value_counts_cap);
-    if (compressed) {
-        stmt.bind_blob_static(5, compressed->data(),
-                              static_cast<int>(compressed->size()));
-    } else {
-        stmt.bind_null(5);
-    }
-
-    stmt.bind_text_static(6, stats.min_value);
-    stmt.bind_text_static(7, stats.max_value);
-    stmt.bind_text_static(8, stats.value_type);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert chunk dimension stats");
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_statistics.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_statistics.cpp
deleted file mode 100644
index ddca1618..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_chunk_statistics.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_chunk_statistics(const SqliteDatabase& db, int file_info_id,
-                             std::uint64_t checkpoint_idx,
-                             const ChunkStatistics& stats) {
-    SqliteStmt stmt(
-        db,
-        "INSERT OR REPLACE INTO chunk_statistics"
-        "(file_info_id, checkpoint_idx, total_events, "
-        "min_timestamp_us, max_timestamp_us, "
-        "duration_sum_us, duration_min_us, duration_max_us, duration_count, "
-        "duration_m2, duration_sketch, duration_histogram, "
-        "name_duration_sketches, name_duration_histograms, "
-        "name_duration_sums, name_duration_sum_sqs, name_category) "
-        "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_int64(3, static_cast<std::int64_t>(stats.total_events));
-
-    if (stats.min_timestamp_us != std::numeric_limits<std::uint64_t>::max()) {
-        stmt.bind_int64(4, static_cast<std::int64_t>(stats.min_timestamp_us));
-    } else {
-        stmt.bind_null(4);
-    }
-
-    if (stats.max_timestamp_us != 0) {
-        stmt.bind_int64(5, static_cast<std::int64_t>(stats.max_timestamp_us));
-    } else {
-        stmt.bind_null(5);
-    }
-
-    stmt.bind_int64(6, stats.duration_sum_us);
-
-    if (stats.duration_min_us != std::numeric_limits<std::uint64_t>::max()) {
-        stmt.bind_int64(7, static_cast<std::int64_t>(stats.duration_min_us));
-    } else {
-        stmt.bind_null(7);
-    }
-
-    if (stats.duration_max_us != 0) {
-        stmt.bind_int64(8, static_cast<std::int64_t>(stats.duration_max_us));
-    } else {
-        stmt.bind_null(8);
-    }
-
-    stmt.bind_int64(9, static_cast<std::int64_t>(stats.duration_count));
-    stmt.bind_double(10, stats.duration_m2);
-
-    if (!stats.duration_sketch.empty()) {
-        auto blob = stats.duration_sketch.serialize();
-        stmt.bind_blob(11, blob.data(), static_cast<int>(blob.size()));
-    } else {
-        stmt.bind_null(11);
-    }
-
-    stmt.bind_text(12, stats.duration_histogram.to_json());
-
-    if (!stats.name_duration_sketches.empty()) {
-        auto blob = stats.serialize_name_duration_sketches();
-        stmt.bind_blob(13, blob.data(), static_cast<int>(blob.size()));
-    } else {
-        stmt.bind_null(13);
-    }
-
-    stmt.bind_text(14, stats.name_duration_histograms_json());
-    stmt.bind_text(15, stats.name_duration_sums_json());
-    stmt.bind_text(16, stats.name_duration_sum_sqs_json());
-    stmt.bind_text(17, stats.name_category_json());
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert chunk statistics: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_event_range.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_event_range.cpp
deleted file mode 100644
index f44fcf70..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_event_range.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstring>
-#include <span>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-std::vector<unsigned char> pack_line_numbers(
-    const std::vector<std::uint32_t>& lines) {
-    std::vector<unsigned char> blob(lines.size() * sizeof(std::uint32_t));
-    std::memcpy(blob.data(), lines.data(), blob.size());
-    return blob;
-}
-
-std::vector<std::uint32_t> unpack_line_numbers(const unsigned char* data,
-                                               std::size_t size) {
-    std::size_t count = size / sizeof(std::uint32_t);
-    std::vector<std::uint32_t> lines(count);
-    std::memcpy(lines.data(), data, size);
-    return lines;
-}
-
-void insert_event_range(const SqliteDatabase& db, int file_info_id,
-                        std::uint64_t checkpoint_idx, std::string_view cat,
-                        std::string_view name,
-                        const std::vector<std::uint32_t>& line_numbers) {
-    auto blob = pack_line_numbers(line_numbers);
-
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO checkpoint_event_ranges"
-                    "(checkpoint_idx, file_info_id, cat, name, "
-                    "line_numbers, event_count) "
-                    "VALUES(?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int64(1, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_int(2, file_info_id);
-    stmt.bind_text(3, cat);
-    stmt.bind_text(4, name);
-    stmt.bind_blob(5, blob.data(), static_cast<int>(blob.size()));
-    stmt.bind_int64(6, static_cast<std::int64_t>(line_numbers.size()));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert event range: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_event_range(const SqliteDatabase& db, int file_info_id,
-                        std::uint64_t checkpoint_idx, std::string_view cat,
-                        std::string_view name,
-                        std::span<const std::uint32_t> line_numbers) {
-    std::vector<std::uint32_t> vec(line_numbers.begin(), line_numbers.end());
-    insert_event_range(db, file_info_id, checkpoint_idx, cat, name, vec);
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_file_bloom_filter.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_file_bloom_filter.cpp
deleted file mode 100644
index cf7c0ff8..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_file_bloom_filter.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <span>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension, const void* blob_data,
-                              int blob_size, std::uint64_t num_entries) {
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO file_bloom_filters"
-                    "(file_info_id, dimension, bloom_data, num_entries) "
-                    "VALUES(?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-    stmt.bind_blob(3, blob_data, blob_size);
-    stmt.bind_int64(4, static_cast<std::int64_t>(num_entries));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert file bloom filter: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension,
-                              std::span<const unsigned char> blob_data,
-                              std::uint64_t num_entries) {
-    insert_file_bloom_filter(db, file_info_id, dimension, blob_data.data(),
-                             static_cast<int>(blob_data.size()), num_entries);
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_hash_resolution.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_hash_resolution.cpp
deleted file mode 100644
index f96fdb99..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_hash_resolution.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_hash_resolution(const SqliteDatabase& db, int file_info_id,
-                            std::string_view dimension,
-                            std::string_view hash_value,
-                            std::string_view resolved_value) {
-    SqliteStmt stmt(db,
-                    "INSERT OR IGNORE INTO hash_resolutions"
-                    "(file_info_id, dimension, hash_value, resolved_value) "
-                    "VALUES(?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-    stmt.bind_text(3, hash_value);
-    stmt.bind_text(4, resolved_value);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert hash resolution: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-SqliteStmt prepare_insert_hash_resolution(const SqliteDatabase& db) {
-    return SqliteStmt(db,
-                      "INSERT OR IGNORE INTO hash_resolutions"
-                      "(file_info_id, dimension, hash_value, resolved_value) "
-                      "VALUES(?, ?, ?, ?);");
-}
-
-void insert_hash_resolution(SqliteStmt& stmt, int file_info_id,
-                            std::string_view dimension,
-                            std::string_view hash_value,
-                            std::string_view resolved_value) {
-    stmt.reset();
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text_static(2, dimension);
-    stmt.bind_text_static(3, hash_value);
-    stmt.bind_text_static(4, resolved_value);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert hash resolution");
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_index_dimension.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_index_dimension.cpp
deleted file mode 100644
index 48801f10..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_index_dimension.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_index_dimension(const SqliteDatabase& db, int file_info_id,
-                            std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "INSERT OR IGNORE INTO index_dimensions"
-                    "(file_info_id, dimension) VALUES(?, ?);");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert index dimension: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_metadata_lines.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_metadata_lines.cpp
deleted file mode 100644
index a13546d7..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_metadata_lines.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <span>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_metadata_lines(const SqliteDatabase& db, int file_info_id,
-                           std::uint64_t checkpoint_idx,
-                           std::string_view meta_type,
-                           const std::vector<std::uint32_t>& line_numbers) {
-    auto blob = pack_line_numbers(line_numbers);
-
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO checkpoint_metadata_lines"
-                    "(checkpoint_idx, file_info_id, meta_type, "
-                    "line_numbers) "
-                    "VALUES(?, ?, ?, ?);");
-
-    stmt.bind_int64(1, static_cast<std::int64_t>(checkpoint_idx));
-    stmt.bind_int(2, file_info_id);
-    stmt.bind_text(3, meta_type);
-    stmt.bind_blob(4, blob.data(), static_cast<int>(blob.size()));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert metadata lines: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_metadata_lines(const SqliteDatabase& db, int file_info_id,
-                           std::uint64_t checkpoint_idx,
-                           std::string_view meta_type,
-                           std::span<const std::uint32_t> line_numbers) {
-    std::vector<std::uint32_t> vec(line_numbers.begin(), line_numbers.end());
-    insert_metadata_lines(db, file_info_id, checkpoint_idx, meta_type, vec);
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_provenance.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_provenance.cpp
deleted file mode 100644
index f7e6d314..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/insert_provenance.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-using indexer::internal::IndexerError;
-
-void insert_provenance_info(const SqliteDatabase& db, std::string_view key,
-                            std::string_view value) {
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO provenance_info(key, value) "
-                    "VALUES(?, ?);");
-
-    stmt.bind_text(1, key);
-    stmt.bind_text(2, value);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert provenance info: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_provenance_source(const SqliteDatabase& db, int file_info_id,
-                              int source_idx, std::string_view path,
-                              int num_checkpoints,
-                              std::string_view event_hash) {
-    SqliteStmt stmt(db,
-                    "INSERT OR REPLACE INTO provenance_sources"
-                    "(source_idx, file_info_id, path, "
-                    "num_checkpoints, event_hash) "
-                    "VALUES(?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, source_idx);
-    stmt.bind_int(2, file_info_id);
-    stmt.bind_text(3, path);
-    stmt.bind_int(4, num_checkpoints);
-    stmt.bind_text(5, event_hash);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert provenance source: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_provenance_group(const SqliteDatabase& db, std::string_view name,
-                             std::string_view predicate) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO provenance_group(name, predicate) "
-                    "VALUES(?, ?);");
-
-    stmt.bind_text(1, name);
-    stmt.bind_text(2, predicate);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert provenance group: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-void insert_provenance_segment(const SqliteDatabase& db, int source_idx,
-                               int source_checkpoint, int output_line_start,
-                               int output_line_end, int event_count) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO provenance_segments"
-                    "(source_idx, source_checkpoint, "
-                    "output_line_start, output_line_end, "
-                    "event_count) "
-                    "VALUES(?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, source_idx);
-    stmt.bind_int(2, source_checkpoint);
-    stmt.bind_int(3, output_line_start);
-    stmt.bind_int(4, output_line_end);
-    stmt.bind_int(5, event_count);
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert provenance segment: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h b/src/dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h
index b5648fdd..4c3af623 100644
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h
+++ b/src/dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h
@@ -1,49 +1,32 @@
 #ifndef DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_INDEXING_MANIFEST_QUERIES_H
 #define DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_INDEXING_MANIFEST_QUERIES_H
 
-#include <dftracer/utils/core/sqlite/database.h>
-
 #include <cstdint>
-#include <span>
+#include <cstring>
 #include <string>
-#include <string_view>
 #include <vector>
 
 namespace dftracer::utils::utilities::composites::dft::indexing::queries {
 
-using dftracer::utils::sqlite::SqliteDatabase;
-
 // --- Packed line numbers helpers ---
 
-std::vector<unsigned char> pack_line_numbers(
-    const std::vector<std::uint32_t>& lines);
-
-std::vector<std::uint32_t> unpack_line_numbers(const unsigned char* data,
-                                               std::size_t size);
-
-// --- Insert operations ---
-
-void insert_event_range(const SqliteDatabase& db, int file_info_id,
-                        std::uint64_t checkpoint_idx, std::string_view cat,
-                        std::string_view name,
-                        const std::vector<std::uint32_t>& line_numbers);
-
-void insert_event_range(const SqliteDatabase& db, int file_info_id,
-                        std::uint64_t checkpoint_idx, std::string_view cat,
-                        std::string_view name,
-                        std::span<const std::uint32_t> line_numbers);
-
-void insert_metadata_lines(const SqliteDatabase& db, int file_info_id,
-                           std::uint64_t checkpoint_idx,
-                           std::string_view meta_type,
-                           const std::vector<std::uint32_t>& line_numbers);
-
-void insert_metadata_lines(const SqliteDatabase& db, int file_info_id,
-                           std::uint64_t checkpoint_idx,
-                           std::string_view meta_type,
-                           std::span<const std::uint32_t> line_numbers);
-
-// --- Query operations ---
+inline std::vector<unsigned char> pack_line_numbers(
+    const std::vector<std::uint32_t>& lines) {
+    std::vector<unsigned char> blob(lines.size() * sizeof(std::uint32_t));
+    if (!blob.empty()) {
+        std::memcpy(blob.data(), lines.data(), blob.size());
+    }
+    return blob;
+}
+
+inline std::vector<std::uint32_t> unpack_line_numbers(const unsigned char* data,
+                                                      std::size_t size) {
+    std::vector<std::uint32_t> lines(size / sizeof(std::uint32_t));
+    if (!lines.empty()) {
+        std::memcpy(lines.data(), data, lines.size() * sizeof(std::uint32_t));
+    }
+    return lines;
+}
 
 struct EventRangeResult {
     std::uint64_t checkpoint_idx;
@@ -53,46 +36,12 @@ struct EventRangeResult {
     std::uint64_t event_count;
 };
 
-std::vector<EventRangeResult> query_event_ranges(const SqliteDatabase& db,
-                                                 int file_info_id);
-
-std::vector<EventRangeResult> query_event_ranges_for_checkpoint(
-    const SqliteDatabase& db, int file_info_id, std::uint64_t checkpoint_idx);
-
 struct MetadataLinesResult {
     std::uint64_t checkpoint_idx;
     std::string meta_type;
     std::vector<std::uint32_t> line_numbers;
 };
 
-std::vector<MetadataLinesResult> query_metadata_lines(const SqliteDatabase& db,
-                                                      int file_info_id);
-
-std::vector<MetadataLinesResult> query_metadata_lines_for_checkpoint(
-    const SqliteDatabase& db, int file_info_id, std::uint64_t checkpoint_idx);
-
-// --- Delete operations ---
-
-void delete_event_ranges(const SqliteDatabase& db, int file_info_id);
-
-void delete_metadata_lines(const SqliteDatabase& db, int file_info_id);
-
-// --- Provenance operations ---
-
-void insert_provenance_info(const SqliteDatabase& db, std::string_view key,
-                            std::string_view value);
-
-void insert_provenance_source(const SqliteDatabase& db, int file_info_id,
-                              int source_idx, std::string_view path,
-                              int num_checkpoints, std::string_view event_hash);
-
-void insert_provenance_group(const SqliteDatabase& db, std::string_view name,
-                             std::string_view predicate);
-
-void insert_provenance_segment(const SqliteDatabase& db, int source_idx,
-                               int source_checkpoint, int output_line_start,
-                               int output_line_end, int event_count);
-
 struct ProvenanceSource {
     int source_idx;
     std::string path;
@@ -100,9 +49,6 @@ struct ProvenanceSource {
     std::string event_hash;
 };
 
-std::vector<ProvenanceSource> query_provenance_sources(const SqliteDatabase& db,
-                                                       int file_info_id);
-
 struct ProvenanceSegment {
     int source_idx;
     int source_checkpoint;
@@ -111,19 +57,6 @@ struct ProvenanceSegment {
     int event_count;
 };
 
-std::vector<ProvenanceSegment> query_provenance_segments(
-    const SqliteDatabase& db, int source_idx);
-
-std::vector<ProvenanceSegment> query_all_provenance_segments(
-    const SqliteDatabase& db);
-
-std::string query_provenance_info(const SqliteDatabase& db,
-                                  std::string_view key);
-
-std::string query_provenance_group_name(const SqliteDatabase& db);
-
-std::string query_provenance_group_predicate(const SqliteDatabase& db);
-
 }  // namespace
    // dftracer::utils::utilities::composites::dft::indexing::queries
 
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/queries.h b/src/dftracer/utils/utilities/composites/dft/indexing/queries/queries.h
index 09dcd6fc..2e392a40 100644
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/queries.h
+++ b/src/dftracer/utils/utilities/composites/dft/indexing/queries/queries.h
@@ -1,168 +1,38 @@
 #ifndef DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_INDEXING_QUERIES_H
 #define DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_INDEXING_QUERIES_H
 
-#include <dftracer/utils/core/sqlite/database.h>
-#include <dftracer/utils/core/sqlite/statement.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h>
 
 #include <cstdint>
 #include <limits>
-#include <optional>
-#include <span>
 #include <string>
-#include <string_view>
-#include <unordered_map>
 #include <vector>
 
 namespace dftracer::utils::utilities::composites::dft::indexing::queries {
 
-using dftracer::utils::sqlite::SqliteDatabase;
-using dftracer::utils::sqlite::SqliteStmt;
-
-// --- Insert operations ---
-
-void insert_chunk_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               const void* blob_data, int blob_size,
-                               std::uint64_t num_entries);
-
-void insert_chunk_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               std::span<const unsigned char> blob_data,
-                               std::uint64_t num_entries);
-
-void insert_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension, const void* blob_data,
-                              int blob_size, std::uint64_t num_entries);
-
-void insert_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension,
-                              std::span<const unsigned char> blob_data,
-                              std::uint64_t num_entries);
-
-void insert_chunk_statistics(const SqliteDatabase& db, int file_info_id,
-                             std::uint64_t checkpoint_idx,
-                             const ChunkStatistics& stats);
-
-void insert_index_dimension(const SqliteDatabase& db, int file_info_id,
-                            std::string_view dimension);
-
-void insert_hash_resolution(const SqliteDatabase& db, int file_info_id,
-                            std::string_view dimension,
-                            std::string_view hash_value,
-                            std::string_view resolved_value);
-
-SqliteStmt prepare_insert_chunk_bloom_filter(const SqliteDatabase& db);
-void insert_chunk_bloom_filter(SqliteStmt& stmt, int file_info_id,
-                               std::uint64_t checkpoint_idx,
-                               std::string_view dimension,
-                               const void* blob_data, int blob_size,
-                               std::uint64_t num_entries);
-
-SqliteStmt prepare_insert_chunk_dimension_stats(const SqliteDatabase& db);
-void insert_chunk_dimension_stats(SqliteStmt& stmt, int file_info_id,
-                                  std::uint64_t checkpoint_idx,
-                                  const ChunkDimensionStats& stats,
-                                  std::size_t value_counts_cap);
-
-SqliteStmt prepare_insert_hash_resolution(const SqliteDatabase& db);
-void insert_hash_resolution(SqliteStmt& stmt, int file_info_id,
-                            std::string_view dimension,
-                            std::string_view hash_value,
-                            std::string_view resolved_value);
-
-// --- Query operations ---
-
 struct ChunkBloomResult {
     std::uint64_t checkpoint_idx;
     std::vector<unsigned char> bloom_data;
     std::uint64_t num_entries;
 };
 
-std::vector<ChunkBloomResult> query_chunk_bloom_filters(
-    const SqliteDatabase& db, int file_info_id, std::string_view dimension);
-
-/// Fetch chunk bloom filters for ALL specified dimensions in one query.
-std::unordered_map<std::string, std::vector<ChunkBloomResult>>
-query_chunk_bloom_filters_batch(const SqliteDatabase& db, int file_info_id,
-                                const std::vector<std::string>& dimensions);
-
 struct FileBloomResult {
     std::vector<unsigned char> bloom_data;
     std::uint64_t num_entries;
 };
 
-std::optional<FileBloomResult> query_file_bloom_filter(
-    const SqliteDatabase& db, int file_info_id, std::string_view dimension);
-
-/// Fetch file-level bloom filters for ALL specified dimensions in one query.
-std::unordered_map<std::string, FileBloomResult> query_file_bloom_filters_batch(
-    const SqliteDatabase& db, int file_info_id,
-    const std::vector<std::string>& dimensions);
-
-std::vector<std::string> query_index_dimensions(const SqliteDatabase& db,
-                                                int file_info_id);
-
-bool has_index_dimension(const SqliteDatabase& db, int file_info_id,
-                         std::string_view dimension);
-
 struct ChunkStatisticsResult {
     std::uint64_t checkpoint_idx;
     ChunkStatistics stats;
 };
 
-std::vector<ChunkStatisticsResult> query_chunk_statistics(
-    const SqliteDatabase& db, int file_info_id);
-
 struct TimeBounds {
     std::uint64_t min_timestamp_us = std::numeric_limits<std::uint64_t>::max();
     std::uint64_t max_timestamp_us = 0;
     bool valid = false;
 };
 
-/// Fast aggregate query: single-row SELECT MIN/MAX on chunk_statistics.
-TimeBounds query_time_bounds(const SqliteDatabase& db, int file_info_id);
-
-std::vector<std::string> query_hash_by_resolved(
-    const SqliteDatabase& db, std::string_view dimension,
-    std::string_view resolved_value);
-
-std::optional<std::string> query_resolved_by_hash(const SqliteDatabase& db,
-                                                  std::string_view dimension,
-                                                  std::string_view hash_value);
-
-// --- Chunk dimension stats ---
-
-void insert_chunk_dimension_stats(const SqliteDatabase& db, int file_info_id,
-                                  std::uint64_t checkpoint_idx,
-                                  const ChunkDimensionStats& stats,
-                                  std::size_t value_counts_cap = 4096);
-
-std::vector<ChunkDimensionStatsResult> query_chunk_dimension_stats(
-    const SqliteDatabase& db, int file_info_id);
-
-std::vector<ChunkDimensionStatsResult>
-query_chunk_dimension_stats_for_dimension(const SqliteDatabase& db,
-                                          int file_info_id,
-                                          std::string_view dimension);
-
-void delete_chunk_dimension_stats(const SqliteDatabase& db, int file_info_id);
-
-// --- Delete operations ---
-
-void delete_chunk_bloom_filters(const SqliteDatabase& db, int file_info_id,
-                                std::string_view dimension);
-
-void delete_file_bloom_filter(const SqliteDatabase& db, int file_info_id,
-                              std::string_view dimension);
-
-void delete_chunk_statistics(const SqliteDatabase& db, int file_info_id);
-
-void delete_hash_resolutions(const SqliteDatabase& db, int file_info_id);
-
 }  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
 
 #endif  // DFTRACER_UTILS_UTILITIES_COMPOSITES_DFT_INDEXING_QUERIES_H
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters.cpp
deleted file mode 100644
index 7c940afb..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstring>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<ChunkBloomResult> query_chunk_bloom_filters(
-    const SqliteDatabase& db, int file_info_id, std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, bloom_data, num_entries "
-                    "FROM chunk_bloom_filters "
-                    "WHERE file_info_id = ? AND dimension = ? "
-                    "ORDER BY checkpoint_idx;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    std::vector<ChunkBloomResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ChunkBloomResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-
-        const void* blob = sqlite3_column_blob(stmt, 1);
-        int blob_size = sqlite3_column_bytes(stmt, 1);
-        if (blob && blob_size > 0) {
-            r.bloom_data.resize(static_cast<std::size_t>(blob_size));
-            std::memcpy(r.bloom_data.data(), blob,
-                        static_cast<std::size_t>(blob_size));
-        }
-
-        r.num_entries =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        results.push_back(std::move(r));
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters_batch.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters_batch.cpp
deleted file mode 100644
index d8ef11ed..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_bloom_filters_batch.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstring>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::unordered_map<std::string, std::vector<ChunkBloomResult>>
-query_chunk_bloom_filters_batch(const SqliteDatabase& db, int file_info_id,
-                                const std::vector<std::string>& dimensions) {
-    std::unordered_map<std::string, std::vector<ChunkBloomResult>> results;
-    if (dimensions.empty()) return results;
-
-    std::string sql =
-        "SELECT dimension, checkpoint_idx, bloom_data, num_entries "
-        "FROM chunk_bloom_filters "
-        "WHERE file_info_id = ? AND dimension IN (";
-    for (std::size_t i = 0; i < dimensions.size(); ++i) {
-        if (i > 0) sql += ',';
-        sql += '?';
-    }
-    sql += ") ORDER BY dimension, checkpoint_idx;";
-
-    SqliteStmt stmt(db, sql.c_str());
-    stmt.bind_int(1, file_info_id);
-    for (std::size_t i = 0; i < dimensions.size(); ++i) {
-        stmt.bind_text(static_cast<int>(i + 2), dimensions[i]);
-    }
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        const char* dim_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-        std::string dim(dim_text ? dim_text : "");
-
-        ChunkBloomResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-
-        const void* blob = sqlite3_column_blob(stmt, 2);
-        int blob_size = sqlite3_column_bytes(stmt, 2);
-        if (blob && blob_size > 0) {
-            r.bloom_data.resize(static_cast<std::size_t>(blob_size));
-            std::memcpy(r.bloom_data.data(), blob,
-                        static_cast<std::size_t>(blob_size));
-        }
-
-        r.num_entries =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        results[dim].push_back(std::move(r));
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_dimension_stats.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_dimension_stats.cpp
deleted file mode 100644
index 410a3835..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_dimension_stats.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<ChunkDimensionStatsResult> query_chunk_dimension_stats(
-    const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, dimension, distinct_count, "
-                    "min_value, max_value, value_type, value_counts "
-                    "FROM chunk_dimension_stats WHERE file_info_id = ? "
-                    "ORDER BY checkpoint_idx, dimension;");
-
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<ChunkDimensionStatsResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ChunkDimensionStatsResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-
-        const char* dim =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-        r.dimension = dim ? dim : "";
-
-        r.distinct_count =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-
-        const char* min_val =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 3));
-        r.min_value = min_val ? min_val : "";
-
-        const char* max_val =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 4));
-        r.max_value = max_val ? max_val : "";
-
-        const char* vtype =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 5));
-        r.value_type = vtype ? vtype : "string";
-
-        // value_counts BLOB (compressed, may be NULL)
-        if (sqlite3_column_type(stmt, 6) != SQLITE_NULL) {
-            auto* blob =
-                static_cast<const uint8_t*>(sqlite3_column_blob(stmt, 6));
-            auto blob_len =
-                static_cast<std::size_t>(sqlite3_column_bytes(stmt, 6));
-            if (blob && blob_len > 0) {
-                r.value_counts = ChunkDimensionStats::decompress_value_counts(
-                    blob, blob_len);
-            }
-        }
-
-        results.push_back(std::move(r));
-    }
-
-    return results;
-}
-
-std::vector<ChunkDimensionStatsResult>
-query_chunk_dimension_stats_for_dimension(const SqliteDatabase& db,
-                                          int file_info_id,
-                                          std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, dimension, distinct_count, "
-                    "min_value, max_value, value_type, value_counts "
-                    "FROM chunk_dimension_stats "
-                    "WHERE file_info_id = ? AND dimension = ? "
-                    "ORDER BY checkpoint_idx;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    std::vector<ChunkDimensionStatsResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ChunkDimensionStatsResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-
-        const char* dim =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-        r.dimension = dim ? dim : "";
-
-        r.distinct_count =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-
-        const char* min_val =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 3));
-        r.min_value = min_val ? min_val : "";
-
-        const char* max_val =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 4));
-        r.max_value = max_val ? max_val : "";
-
-        const char* vtype =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 5));
-        r.value_type = vtype ? vtype : "string";
-
-        if (sqlite3_column_type(stmt, 6) != SQLITE_NULL) {
-            auto* blob =
-                static_cast<const uint8_t*>(sqlite3_column_blob(stmt, 6));
-            auto blob_len =
-                static_cast<std::size_t>(sqlite3_column_bytes(stmt, 6));
-            if (blob && blob_len > 0) {
-                r.value_counts = ChunkDimensionStats::decompress_value_counts(
-                    blob, blob_len);
-            }
-        }
-
-        results.push_back(std::move(r));
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_statistics.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_statistics.cpp
deleted file mode 100644
index 263f6684..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_chunk_statistics.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <limits>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<ChunkStatisticsResult> query_chunk_statistics(
-    const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, total_events, "
-        "min_timestamp_us, max_timestamp_us, "
-        "duration_sum_us, duration_min_us, duration_max_us, duration_count, "
-        "duration_m2, duration_sketch, duration_histogram, "
-        "name_duration_sketches, name_duration_histograms, "
-        "name_duration_sums, name_duration_sum_sqs, name_category "
-        "FROM chunk_statistics WHERE file_info_id = ? "
-        "ORDER BY checkpoint_idx;");
-
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<ChunkStatisticsResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ChunkStatisticsResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-
-        r.stats.total_events =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-
-        // Timestamps (may be NULL)
-        if (sqlite3_column_type(stmt, 2) != SQLITE_NULL) {
-            r.stats.min_timestamp_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        } else {
-            r.stats.min_timestamp_us =
-                std::numeric_limits<std::uint64_t>::max();
-        }
-
-        if (sqlite3_column_type(stmt, 3) != SQLITE_NULL) {
-            r.stats.max_timestamp_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        } else {
-            r.stats.max_timestamp_us = 0;
-        }
-
-        r.stats.duration_sum_us = sqlite3_column_int64(stmt, 4);
-
-        if (sqlite3_column_type(stmt, 5) != SQLITE_NULL) {
-            r.stats.duration_min_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 5));
-        } else {
-            r.stats.duration_min_us = std::numeric_limits<std::uint64_t>::max();
-        }
-
-        if (sqlite3_column_type(stmt, 6) != SQLITE_NULL) {
-            r.stats.duration_max_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 6));
-        } else {
-            r.stats.duration_max_us = 0;
-        }
-
-        r.stats.duration_count =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        r.stats.duration_m2 = sqlite3_column_double(stmt, 8);
-
-        // duration_sketch BLOB (column 9)
-        if (sqlite3_column_type(stmt, 9) != SQLITE_NULL) {
-            auto* blob =
-                static_cast<const uint8_t*>(sqlite3_column_blob(stmt, 9));
-            auto blob_len =
-                static_cast<std::size_t>(sqlite3_column_bytes(stmt, 9));
-            if (blob && blob_len > 0) {
-                using dftracer::utils::utilities::common::statistics::DDSketch;
-                r.stats.duration_sketch = DDSketch::deserialize(blob, blob_len);
-            }
-        }
-
-        // duration_histogram TEXT (column 10)
-        const char* dh_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 10));
-        if (dh_text) {
-            using dftracer::utils::utilities::common::statistics::Log2Histogram;
-            r.stats.duration_histogram = Log2Histogram::from_json(dh_text);
-        }
-
-        // name_duration_sketches BLOB (column 11)
-        if (sqlite3_column_type(stmt, 11) != SQLITE_NULL) {
-            auto* blob =
-                static_cast<const uint8_t*>(sqlite3_column_blob(stmt, 11));
-            auto blob_len =
-                static_cast<std::size_t>(sqlite3_column_bytes(stmt, 11));
-            if (blob && blob_len > 0) {
-                r.stats.name_duration_sketches =
-                    ChunkStatistics::deserialize_name_duration_sketches(
-                        blob, blob_len);
-            }
-        }
-
-        // name_duration_histograms TEXT (column 12)
-        const char* ndh_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 12));
-        if (ndh_text) {
-            r.stats.name_duration_histograms =
-                ChunkStatistics::parse_histogram_map_json(ndh_text);
-        }
-
-        // name_duration_sums TEXT (column 13)
-        const char* nds_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 13));
-        if (nds_text) {
-            r.stats.name_duration_sums =
-                ChunkStatistics::parse_double_map_json(nds_text);
-        }
-
-        // name_duration_sum_sqs TEXT (column 14)
-        const char* ndss_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 14));
-        if (ndss_text) {
-            r.stats.name_duration_sum_sqs =
-                ChunkStatistics::parse_double_map_json(ndss_text);
-        }
-
-        // name_category TEXT (column 15)
-        const char* nc_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 15));
-        if (nc_text) {
-            r.stats.name_category =
-                ChunkStatistics::parse_string_map_json(nc_text);
-        }
-
-        results.push_back(std::move(r));
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_event_ranges.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_event_ranges.cpp
deleted file mode 100644
index b108758e..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_event_ranges.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<EventRangeResult> query_event_ranges(const SqliteDatabase& db,
-                                                 int file_info_id) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, cat, name, line_numbers, "
-                    "event_count "
-                    "FROM checkpoint_event_ranges "
-                    "WHERE file_info_id = ? "
-                    "ORDER BY checkpoint_idx, cat, name;");
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<EventRangeResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        EventRangeResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        r.cat = reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-        r.name = reinterpret_cast<const char*>(sqlite3_column_text(stmt, 2));
-
-        const auto* blob_data =
-            static_cast<const unsigned char*>(sqlite3_column_blob(stmt, 3));
-        int blob_size = sqlite3_column_bytes(stmt, 3);
-        r.line_numbers =
-            unpack_line_numbers(blob_data, static_cast<std::size_t>(blob_size));
-
-        r.event_count =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        results.push_back(std::move(r));
-    }
-    return results;
-}
-
-std::vector<EventRangeResult> query_event_ranges_for_checkpoint(
-    const SqliteDatabase& db, int file_info_id, std::uint64_t checkpoint_idx) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, cat, name, line_numbers, "
-                    "event_count "
-                    "FROM checkpoint_event_ranges "
-                    "WHERE file_info_id = ? AND checkpoint_idx = ? "
-                    "ORDER BY cat, name;");
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-
-    std::vector<EventRangeResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        EventRangeResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        r.cat = reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-        r.name = reinterpret_cast<const char*>(sqlite3_column_text(stmt, 2));
-
-        const auto* blob_data =
-            static_cast<const unsigned char*>(sqlite3_column_blob(stmt, 3));
-        int blob_size = sqlite3_column_bytes(stmt, 3);
-        r.line_numbers =
-            unpack_line_numbers(blob_data, static_cast<std::size_t>(blob_size));
-
-        r.event_count =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        results.push_back(std::move(r));
-    }
-    return results;
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filter.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filter.cpp
deleted file mode 100644
index bb12e59d..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filter.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstring>
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::optional<FileBloomResult> query_file_bloom_filter(
-    const SqliteDatabase& db, int file_info_id, std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "SELECT bloom_data, num_entries "
-                    "FROM file_bloom_filters "
-                    "WHERE file_info_id = ? AND dimension = ?;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    int rc = sqlite3_step(stmt);
-    if (rc == SQLITE_ROW) {
-        FileBloomResult r;
-        const void* blob = sqlite3_column_blob(stmt, 0);
-        int blob_size = sqlite3_column_bytes(stmt, 0);
-        if (blob && blob_size > 0) {
-            r.bloom_data.resize(static_cast<std::size_t>(blob_size));
-            std::memcpy(r.bloom_data.data(), blob,
-                        static_cast<std::size_t>(blob_size));
-        }
-        r.num_entries =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        return r;
-    }
-
-    return std::nullopt;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filters_batch.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filters_batch.cpp
deleted file mode 100644
index e18c2558..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_file_bloom_filters_batch.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstring>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::unordered_map<std::string, FileBloomResult> query_file_bloom_filters_batch(
-    const SqliteDatabase& db, int file_info_id,
-    const std::vector<std::string>& dimensions) {
-    std::unordered_map<std::string, FileBloomResult> results;
-    if (dimensions.empty()) return results;
-
-    std::string sql =
-        "SELECT dimension, bloom_data, num_entries "
-        "FROM file_bloom_filters "
-        "WHERE file_info_id = ? AND dimension IN (";
-    for (std::size_t i = 0; i < dimensions.size(); ++i) {
-        if (i > 0) sql += ',';
-        sql += '?';
-    }
-    sql += ");";
-
-    SqliteStmt stmt(db, sql.c_str());
-    stmt.bind_int(1, file_info_id);
-    for (std::size_t i = 0; i < dimensions.size(); ++i) {
-        stmt.bind_text(static_cast<int>(i + 2), dimensions[i]);
-    }
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        const char* dim_text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-        std::string dim(dim_text ? dim_text : "");
-
-        FileBloomResult r;
-        const void* blob = sqlite3_column_blob(stmt, 1);
-        int blob_size = sqlite3_column_bytes(stmt, 1);
-        if (blob && blob_size > 0) {
-            r.bloom_data.resize(static_cast<std::size_t>(blob_size));
-            std::memcpy(r.bloom_data.data(), blob,
-                        static_cast<std::size_t>(blob_size));
-        }
-
-        r.num_entries =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        results[dim] = std::move(r);
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_hash_by_resolved.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_hash_by_resolved.cpp
deleted file mode 100644
index d16f8088..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_hash_by_resolved.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<std::string> query_hash_by_resolved(
-    const SqliteDatabase& db, std::string_view dimension,
-    std::string_view resolved_value) {
-    SqliteStmt stmt(db,
-                    "SELECT DISTINCT hash_value FROM hash_resolutions "
-                    "WHERE dimension = ? AND resolved_value = ?;");
-
-    stmt.bind_text(1, dimension);
-    stmt.bind_text(2, resolved_value);
-
-    std::vector<std::string> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        const char* text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-        if (text) {
-            results.emplace_back(text);
-        }
-    }
-
-    return results;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_index_dimensions.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_index_dimensions.cpp
deleted file mode 100644
index afb55d1d..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_index_dimensions.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<std::string> query_index_dimensions(const SqliteDatabase& db,
-                                                int file_info_id) {
-    SqliteStmt stmt(
-        db, "SELECT dimension FROM index_dimensions WHERE file_info_id = ?;");
-
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<std::string> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        const char* text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-        if (text) {
-            results.emplace_back(text);
-        }
-    }
-
-    return results;
-}
-
-bool has_index_dimension(const SqliteDatabase& db, int file_info_id,
-                         std::string_view dimension) {
-    SqliteStmt stmt(db,
-                    "SELECT 1 FROM index_dimensions "
-                    "WHERE file_info_id = ? AND dimension = ? LIMIT 1;");
-
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_text(2, dimension);
-
-    return sqlite3_step(stmt) == SQLITE_ROW;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_metadata_lines.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_metadata_lines.cpp
deleted file mode 100644
index b9b8f325..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_metadata_lines.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<MetadataLinesResult> query_metadata_lines(const SqliteDatabase& db,
-                                                      int file_info_id) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, meta_type, line_numbers "
-                    "FROM checkpoint_metadata_lines "
-                    "WHERE file_info_id = ? "
-                    "ORDER BY checkpoint_idx, meta_type;");
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<MetadataLinesResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        MetadataLinesResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        r.meta_type =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-
-        const auto* blob_data =
-            static_cast<const unsigned char*>(sqlite3_column_blob(stmt, 2));
-        int blob_size = sqlite3_column_bytes(stmt, 2);
-        r.line_numbers =
-            unpack_line_numbers(blob_data, static_cast<std::size_t>(blob_size));
-
-        results.push_back(std::move(r));
-    }
-    return results;
-}
-
-std::vector<MetadataLinesResult> query_metadata_lines_for_checkpoint(
-    const SqliteDatabase& db, int file_info_id, std::uint64_t checkpoint_idx) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_idx, meta_type, line_numbers "
-                    "FROM checkpoint_metadata_lines "
-                    "WHERE file_info_id = ? AND checkpoint_idx = ? "
-                    "ORDER BY meta_type;");
-    stmt.bind_int(1, file_info_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(checkpoint_idx));
-
-    std::vector<MetadataLinesResult> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        MetadataLinesResult r;
-        r.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        r.meta_type =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-
-        const auto* blob_data =
-            static_cast<const unsigned char*>(sqlite3_column_blob(stmt, 2));
-        int blob_size = sqlite3_column_bytes(stmt, 2);
-        r.line_numbers =
-            unpack_line_numbers(blob_data, static_cast<std::size_t>(blob_size));
-
-        results.push_back(std::move(r));
-    }
-    return results;
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_provenance.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_provenance.cpp
deleted file mode 100644
index aab5abfb..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_provenance.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::vector<ProvenanceSource> query_provenance_sources(const SqliteDatabase& db,
-                                                       int file_info_id) {
-    SqliteStmt stmt(db,
-                    "SELECT source_idx, path, num_checkpoints, "
-                    "event_hash "
-                    "FROM provenance_sources "
-                    "WHERE file_info_id = ? "
-                    "ORDER BY source_idx;");
-    stmt.bind_int(1, file_info_id);
-
-    std::vector<ProvenanceSource> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ProvenanceSource s;
-        s.source_idx = sqlite3_column_int(stmt, 0);
-        s.path = reinterpret_cast<const char*>(sqlite3_column_text(stmt, 1));
-        s.num_checkpoints = sqlite3_column_int(stmt, 2);
-        s.event_hash =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 3));
-        results.push_back(std::move(s));
-    }
-    return results;
-}
-
-std::vector<ProvenanceSegment> query_provenance_segments(
-    const SqliteDatabase& db, int source_idx) {
-    SqliteStmt stmt(db,
-                    "SELECT source_idx, source_checkpoint, "
-                    "output_line_start, output_line_end, "
-                    "event_count "
-                    "FROM provenance_segments "
-                    "WHERE source_idx = ? "
-                    "ORDER BY source_checkpoint, "
-                    "output_line_start;");
-    stmt.bind_int(1, source_idx);
-
-    std::vector<ProvenanceSegment> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ProvenanceSegment s;
-        s.source_idx = sqlite3_column_int(stmt, 0);
-        s.source_checkpoint = sqlite3_column_int(stmt, 1);
-        s.output_line_start = sqlite3_column_int(stmt, 2);
-        s.output_line_end = sqlite3_column_int(stmt, 3);
-        s.event_count = sqlite3_column_int(stmt, 4);
-        results.push_back(std::move(s));
-    }
-    return results;
-}
-
-std::vector<ProvenanceSegment> query_all_provenance_segments(
-    const SqliteDatabase& db) {
-    SqliteStmt stmt(db,
-                    "SELECT source_idx, source_checkpoint, "
-                    "output_line_start, output_line_end, "
-                    "event_count "
-                    "FROM provenance_segments "
-                    "ORDER BY output_line_start;");
-
-    std::vector<ProvenanceSegment> results;
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        ProvenanceSegment s;
-        s.source_idx = sqlite3_column_int(stmt, 0);
-        s.source_checkpoint = sqlite3_column_int(stmt, 1);
-        s.output_line_start = sqlite3_column_int(stmt, 2);
-        s.output_line_end = sqlite3_column_int(stmt, 3);
-        s.event_count = sqlite3_column_int(stmt, 4);
-        results.push_back(std::move(s));
-    }
-    return results;
-}
-
-std::string query_provenance_info(const SqliteDatabase& db,
-                                  std::string_view key) {
-    SqliteStmt stmt(db,
-                    "SELECT value FROM provenance_info "
-                    "WHERE key = ?;");
-    stmt.bind_text(1, key);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-    }
-    return "";
-}
-
-std::string query_provenance_group_name(const SqliteDatabase& db) {
-    SqliteStmt stmt(db, "SELECT name FROM provenance_group LIMIT 1;");
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-    }
-    return "";
-}
-
-std::string query_provenance_group_predicate(const SqliteDatabase& db) {
-    SqliteStmt stmt(db,
-                    "SELECT predicate FROM provenance_group "
-                    "LIMIT 1;");
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        if (sqlite3_column_type(stmt, 0) == SQLITE_NULL) {
-            return "";
-        }
-        return reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-    }
-    return "";
-}
-
-}  // namespace
-   // dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_resolved_by_hash.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_resolved_by_hash.cpp
deleted file mode 100644
index 37a918e0..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_resolved_by_hash.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-
-#include <string_view>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-std::optional<std::string> query_resolved_by_hash(const SqliteDatabase& db,
-                                                  std::string_view dimension,
-                                                  std::string_view hash_value) {
-    SqliteStmt stmt(db,
-                    "SELECT resolved_value FROM hash_resolutions "
-                    "WHERE dimension = ? AND hash_value = ? LIMIT 1;");
-
-    stmt.bind_text(1, dimension);
-    stmt.bind_text(2, hash_value);
-
-    int rc = sqlite3_step(stmt);
-    if (rc == SQLITE_ROW) {
-        const char* text =
-            reinterpret_cast<const char*>(sqlite3_column_text(stmt, 0));
-        if (text) {
-            return std::string(text);
-        }
-    }
-
-    return std::nullopt;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_time_bounds.cpp b/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_time_bounds.cpp
deleted file mode 100644
index 3fb128e3..00000000
--- a/src/dftracer/utils/utilities/composites/dft/indexing/queries/query_time_bounds.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
-
-#include <limits>
-
-namespace dftracer::utils::utilities::composites::dft::indexing::queries {
-
-using dftracer::utils::sqlite::SqliteStmt;
-
-TimeBounds query_time_bounds(const SqliteDatabase& db, int file_info_id) {
-    SqliteStmt stmt(db,
-                    "SELECT MIN(min_timestamp_us), MAX(max_timestamp_us) "
-                    "FROM chunk_statistics WHERE file_info_id = ? "
-                    "AND min_timestamp_us IS NOT NULL "
-                    "AND max_timestamp_us IS NOT NULL;");
-
-    stmt.bind_int(1, file_info_id);
-
-    TimeBounds result;
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        if (sqlite3_column_type(stmt, 0) != SQLITE_NULL) {
-            result.min_timestamp_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        }
-        if (sqlite3_column_type(stmt, 1) != SQLITE_NULL) {
-            result.max_timestamp_us =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        }
-        result.valid = (result.min_timestamp_us !=
-                        std::numeric_limits<std::uint64_t>::max());
-    }
-
-    return result;
-}
-
-}  // namespace dftracer::utils::utilities::composites::dft::indexing::queries
diff --git a/src/dftracer/utils/utilities/composites/dft/internal/utils.cpp b/src/dftracer/utils/utilities/composites/dft/internal/utils.cpp
index 6fb8586c..150a61d8 100644
--- a/src/dftracer/utils/utilities/composites/dft/internal/utils.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/internal/utils.cpp
@@ -11,26 +11,14 @@ namespace dftracer::utils::utilities::composites::dft::internal {
 std::string determine_index_path(const std::string& file_path,
                                  const std::string& index_dir) {
     fs::path data_path(file_path);
-    std::string base_name =
-        data_path.filename().string() + constants::indexer::EXTENSION;
-
-    if (!index_dir.empty()) {
-        return (fs::path(index_dir) / base_name).string();
-    }
-
-    return (data_path.parent_path() / base_name).string();
+    fs::path root =
+        index_dir.empty() ? data_path.parent_path() : fs::path(index_dir);
+    return (root / ".dftindex").string();
 }
 
 std::string determine_provenance_index_path(const std::string& data_path,
                                             const std::string& index_dir) {
-    fs::path path(data_path);
-    std::string base_name = path.filename().string() + ".pidx";
-
-    if (!index_dir.empty()) {
-        return (fs::path(index_dir) / base_name).string();
-    }
-
-    return (path.parent_path() / base_name).string();
+    return determine_index_path(data_path, index_dir);
 }
 
 bool is_data_transfer_op(std::string_view cat, std::string_view name) {
diff --git a/src/dftracer/utils/utilities/composites/dft/metadata_collector_utility.cpp b/src/dftracer/utils/utilities/composites/dft/metadata_collector_utility.cpp
index c7b1b4d0..8697b8d8 100644
--- a/src/dftracer/utils/utilities/composites/dft/metadata_collector_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/metadata_collector_utility.cpp
@@ -2,10 +2,12 @@
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
 #include <dftracer/utils/core/utils/string.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/metadata_collector_utility.h>
 #include <dftracer/utils/utilities/composites/indexed_file_reader_utility.h>
 #include <dftracer/utils/utilities/fileio/lines/streaming_line_reader.h>
 #include <dftracer/utils/utilities/hash/hasher_utility.h>
+#include <dftracer/utils/utilities/indexer/internal/helpers.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 
 namespace dftracer::utils::utilities::composites::dft {
@@ -28,11 +30,15 @@ MetadataCollectorUtility::process(const MetadataCollectorUtilityInput& input) {
         if (is_compressed) {
             // Compressed file - generate index path if not provided
             MetadataCollectorUtilityInput modified_input = input;
-            if (modified_input.idx_path.empty()) {
-                // Auto-generate index path
-                modified_input.idx_path = file_path + ".idx";
+            if (modified_input.index_path.empty()) {
+                modified_input.index_path =
+                    internal::determine_index_path(file_path, "");
+            } else {
+                modified_input.index_path =
+                    dftracer::utils::utilities::indexer::internal::
+                        normalize_index_root(modified_input.index_path);
             }
-            meta.idx_path = modified_input.idx_path;
+            meta.index_path = modified_input.index_path;
             co_return co_await process_compressed(modified_input);
         } else {
             // Plain text file
@@ -50,7 +56,7 @@ MetadataCollectorUtility::process_compressed(
     const MetadataCollectorUtilityInput& input) {
     MetadataCollectorUtilityOutput meta;
     meta.file_path = input.file_path;
-    meta.idx_path = input.idx_path;
+    meta.index_path = input.index_path;
 
     try {
         // Detect format
@@ -59,7 +65,7 @@ MetadataCollectorUtility::process_compressed(
         meta.compressed_size = fs::file_size(input.file_path);
 
         // Check if index exists
-        meta.has_index = fs::exists(input.idx_path);
+        meta.has_index = fs::exists(input.index_path);
 
         // Create or load indexer
         std::shared_ptr<dftracer::utils::utilities::indexer::internal::Indexer>
@@ -67,27 +73,27 @@ MetadataCollectorUtility::process_compressed(
         if (!meta.has_index || input.force_rebuild) {
             if (input.force_rebuild && meta.has_index) {
                 DFTRACER_UTILS_LOG_DEBUG("Removing existing index: %s",
-                                         input.idx_path.c_str());
-                fs::remove(input.idx_path);
+                                         input.index_path.c_str());
+                fs::remove_all(input.index_path);
             }
             DFTRACER_UTILS_LOG_DEBUG("Building index for: %s",
                                      input.file_path.c_str());
             indexer = dftracer::utils::utilities::indexer::internal::
-                IndexerFactory::create(input.file_path, input.idx_path,
+                IndexerFactory::create(input.file_path, input.index_path,
                                        input.checkpoint_size, true);
             co_await indexer->build_async();
             meta.has_index = true;
         } else {
             indexer = dftracer::utils::utilities::indexer::internal::
-                IndexerFactory::create(input.file_path, input.idx_path,
+                IndexerFactory::create(input.file_path, input.index_path,
                                        input.checkpoint_size, false);
             if (indexer->need_rebuild()) {
                 DFTRACER_UTILS_LOG_DEBUG("Index needs rebuild: %s",
-                                         input.idx_path.c_str());
+                                         input.index_path.c_str());
                 meta.index_valid = false;
-                fs::remove(input.idx_path);
+                fs::remove_all(input.index_path);
                 indexer = dftracer::utils::utilities::indexer::internal::
-                    IndexerFactory::create(input.file_path, input.idx_path,
+                    IndexerFactory::create(input.file_path, input.index_path,
                                            input.checkpoint_size, true);
                 co_await indexer->build_async();
             }
@@ -124,7 +130,7 @@ MetadataCollectorUtility::process_compressed(
                 auto line_gen = StreamingLineReader::read_async(
                     StreamingLineReaderConfig()
                         .with_file(input.file_path)
-                        .with_index(input.idx_path)
+                        .with_index(input.index_path)
                         .with_line_range(1, total_lines));
                 while (auto line_opt = co_await line_gen.next()) {
                     const auto& line = *line_opt;
@@ -173,7 +179,7 @@ MetadataCollectorUtility::process_plain(
     const MetadataCollectorUtilityInput& input) {
     MetadataCollectorUtilityOutput meta;
     meta.file_path = input.file_path;
-    meta.idx_path = "";
+    meta.index_path = "";
 
     try {
         // Plain file metadata
diff --git a/src/dftracer/utils/utilities/composites/dft/reorganize/event_router.cpp b/src/dftracer/utils/utilities/composites/dft/reorganize/event_router.cpp
index 881f54dd..aabcc54c 100644
--- a/src/dftracer/utils/utilities/composites/dft/reorganize/event_router.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/reorganize/event_router.cpp
@@ -156,9 +156,9 @@ coro::CoroTask<SourceResult> process_source(
             result.output_files.push_back(chunk.path);
         }
 
-        trackers[gi].flush_to_db(plan, plan.groups[gi].name,
-                                 plan.groups[gi].query, writers[gi]->chunks(),
-                                 config.output_dir);
+        co_await trackers[gi].flush_to_db(
+            plan, plan.groups[gi].name, plan.groups[gi].query,
+            writers[gi]->chunks(), config.output_dir);
     }
 
     result.success = true;
@@ -187,12 +187,14 @@ coro::CoroTask<EventRouterResult> route_events(
     futures.reserve(tasks_by_source.size());
 
     for (const auto& [src_idx, src_tasks] : tasks_by_source) {
+        auto* config_ptr = &config;
         futures.push_back(
-            scope.spawn([src_idx, &config, tasks = src_tasks, permits](
+            scope.spawn([src_idx, config_ptr, tasks = src_tasks, permits](
                             CoroScope& s) -> coro::CoroTask<SourceResult> {
                 co_await s.receive(permits);
                 try {
-                    auto r = co_await process_source(src_idx, config, tasks);
+                    auto r =
+                        co_await process_source(src_idx, *config_ptr, tasks);
                     permits->try_send(true);
                     co_return r;
                 } catch (...) {
diff --git a/src/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.cpp b/src/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.cpp
index 99dc4a35..31a97cce 100644
--- a/src/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.cpp
@@ -1,6 +1,8 @@
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
+#include <dftracer/utils/core/rocksdb/async.h>
 #include <dftracer/utils/utilities/composites/dft/reorganize/provenance_tracker.h>
+#include <dftracer/utils/utilities/indexer/internal/transaction_scope.h>
 #include <dftracer/utils/utilities/indexer/provenance_database.h>
 
 namespace dftracer::utils::utilities::composites::dft::reorganize {
@@ -13,7 +15,7 @@ void ProvenanceTracker::record(int source_file_idx, int checkpoint_idx,
                                         output_line_end, event_count});
 }
 
-void ProvenanceTracker::flush_to_db(
+coro::CoroTask<void> ProvenanceTracker::flush_to_db(
     const ExtractionPlan& plan, const std::string& group_name,
     const std::string& group_query,
     const std::vector<fileio::ChunkInfo>& chunks,
@@ -21,44 +23,58 @@ void ProvenanceTracker::flush_to_db(
     using indexer::ProvenanceDatabase;
 
     for (const auto& chunk : chunks) {
-        std::string pidx_path = chunk.path + ".pidx";
+        auto provenance_path = std::make_shared<std::string>(
+            indexer::determine_provenance_index_path(chunk.path));
+        const auto* plan_ptr = &plan;
+        const auto* group_name_ptr = &group_name;
+        const auto* group_query_ptr = &group_query;
+        const auto* chunk_ptr = &chunk;
+        const auto* records_ptr = &records_;
 
         try {
-            ProvenanceDatabase pdb(pidx_path);
-            pdb.init_schema();
+            co_await rocksdb::run([plan_ptr, group_name_ptr, group_query_ptr,
+                                   chunk_ptr, records_ptr, provenance_path] {
+                ProvenanceDatabase pdb(*provenance_path);
+                pdb.init_schema();
 
-            std::uint64_t out_hash = 0;
-            if (fs::exists(chunk.path)) {
-                out_hash =
-                    static_cast<std::uint64_t>(fs::file_size(chunk.path));
-            }
-            int fid = pdb.get_or_create_file_info(chunk.path, out_hash);
+                std::uint64_t out_hash = 0;
+                if (fs::exists(chunk_ptr->path)) {
+                    out_hash = static_cast<std::uint64_t>(
+                        fs::file_size(chunk_ptr->path));
+                }
+                int fid =
+                    pdb.get_or_create_file_info(chunk_ptr->path, out_hash);
 
-            pdb.begin_transaction();
+                indexer::internal::TransactionScope txn(pdb);
+                pdb.insert_info(fid, "version", "2.0");
+                pdb.insert_info(fid, "tool", "dftracer_organize");
+                pdb.insert_group(fid, *group_name_ptr, *group_query_ptr);
 
-            pdb.insert_info("version", "2.0");
-            pdb.insert_info("tool", "dftracer_organize");
-            pdb.insert_group(group_name, group_query);
+                for (std::size_t si = 0; si < plan_ptr->source_files.size();
+                     ++si) {
+                    const auto& src = plan_ptr->source_files[si];
+                    pdb.insert_source(fid, static_cast<int>(si), src.file_path,
+                                      static_cast<int>(src.num_checkpoints));
+                }
 
-            for (std::size_t si = 0; si < plan.source_files.size(); ++si) {
-                const auto& src = plan.source_files[si];
-                pdb.insert_source(fid, static_cast<int>(si), src.file_path,
-                                  static_cast<int>(src.num_checkpoints));
-            }
+                for (const auto& rec : *records_ptr) {
+                    if (rec.output_chunk_idx != chunk_ptr->chunk_index)
+                        continue;
+                    pdb.insert_segment(fid, rec.source_file_idx,
+                                       rec.checkpoint_idx,
+                                       rec.output_line_start,
+                                       rec.output_line_end, rec.event_count);
+                }
 
-            for (const auto& rec : records_) {
-                if (rec.output_chunk_idx != chunk.chunk_index) continue;
-                pdb.insert_segment(rec.source_file_idx, rec.checkpoint_idx,
-                                   rec.output_line_start, rec.output_line_end,
-                                   rec.event_count);
-            }
-
-            pdb.commit_transaction();
+                txn.commit();
+            });
         } catch (const std::exception& e) {
             DFTRACER_UTILS_LOG_ERROR("Provenance write failed for %s: %s",
                                      chunk.path.c_str(), e.what());
         }
     }
+
+    co_return;
 }
 
 }  // namespace dftracer::utils::utilities::composites::dft::reorganize
diff --git a/src/dftracer/utils/utilities/composites/dft/reorganize/reconstruction_planner.cpp b/src/dftracer/utils/utilities/composites/dft/reorganize/reconstruction_planner.cpp
index 54a39cf8..d0a4678c 100644
--- a/src/dftracer/utils/utilities/composites/dft/reorganize/reconstruction_planner.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/reorganize/reconstruction_planner.cpp
@@ -19,21 +19,22 @@ coro::CoroTask<ReconstructionPlan> ReconstructionPlannerUtility::process(
     ReconstructionPlan plan;
 
     for (const auto& reorg_file : input.reorganized_files) {
-        std::string pidx_path = internal::determine_provenance_index_path(
+        std::string provenance_path = internal::determine_provenance_index_path(
             reorg_file, input.index_dir);
 
-        if (!fs::exists(pidx_path)) {
+        if (!fs::exists(provenance_path)) {
             continue;
         }
 
-        ProvenanceDatabase pdb(pidx_path);
-        pdb.init_schema();
+        ProvenanceDatabase pdb(
+            provenance_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
 
         int fid = pdb.get_file_info_id(reorg_file);
         if (fid < 0) continue;
 
         // Check if this file has provenance
-        std::string tool = pdb.query_info("tool");
+        std::string tool = pdb.query_info(fid, "tool");
         if (tool.empty()) continue;
 
         // Read sources
@@ -54,7 +55,7 @@ coro::CoroTask<ReconstructionPlan> ReconstructionPlannerUtility::process(
         }
 
         // Read all segments
-        auto segments = pdb.query_all_segments();
+        auto segments = pdb.query_all_segments(fid);
 
         for (const auto& seg : segments) {
             auto src_it = source_map.find(seg.source_idx);
diff --git a/src/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.cpp b/src/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.cpp
index 98d87f5f..b64904be 100644
--- a/src/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/reorganize/reorganization_planner.cpp
@@ -1,5 +1,6 @@
 #include <dftracer/utils/core/common/constants.h>
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/core/utils/string.h>
 #include <dftracer/utils/utilities/common/json/json_value.h>
 #include <dftracer/utils/utilities/common/query/query.h>
 #include <dftracer/utils/utilities/composites/dft/internal/utils.h>
@@ -12,6 +13,7 @@
 #include <yyjson.h>
 
 #include <algorithm>
+#include <cctype>
 #include <map>
 #include <set>
 #include <stdexcept>
@@ -20,7 +22,6 @@ namespace dftracer::utils::utilities::composites::dft::reorganize {
 
 namespace {
 
-using common::json::JsonValue;
 using common::query::Query;
 using dftracer::utils::utilities::indexer::IndexBuildConfig;
 using dftracer::utils::utilities::indexer::IndexBuilderUtility;
@@ -93,7 +94,7 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
     for (std::size_t fi = 0; fi < input.source_files.size(); ++fi) {
         const auto& file_path = input.source_files[fi];
 
-        // Build .idx if needed
+        // Build the shared `.dftindex` store if needed.
         IndexBuilderUtility idx_builder;
         auto idx_input = IndexBuildConfig::for_file(file_path).with_index_dir(
             input.index_dir);
@@ -109,7 +110,7 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
         MetadataCollectorUtility metadata_collector;
         auto meta_input =
             MetadataCollectorUtilityInput::from_file(file_path).with_index(
-                idx_result.idx_path);
+                idx_result.index_path);
         if (input.checkpoint_size > 0) {
             meta_input.with_checkpoint_size(input.checkpoint_size);
         }
@@ -119,8 +120,8 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
                                      file_path);
         }
 
-        // Determine .idx path (manifest data now lives in .idx)
-        std::string idx_path =
+        // Determine the root-local `.dftindex` store path.
+        std::string index_path =
             internal::determine_index_path(file_path, input.index_dir);
 
         // Effective checkpoint count: treat 0 as 1
@@ -129,29 +130,26 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
 
         SourceFileInfo sfi;
         sfi.file_path = file_path;
-        sfi.idx_path = idx_result.idx_path;
-        sfi.idx_path = idx_path;
+        sfi.index_path = index_path;
         sfi.num_checkpoints = eff_ckpts;
         sfi.uncompressed_size = meta.uncompressed_size;
         sfi.checkpoint_size = meta.checkpoint_size;
         plan.source_files.push_back(std::move(sfi));
 
-        // Open .idx and try manifest-based planning. Fall back to
-        // whole-file streaming when manifest tables are absent
-        // (file was below index_threshold).
-        IndexDatabase idx_db(idx_path);
+        // Open the shared index store and try manifest-based planning. Fall
+        // back to whole-file streaming when manifest tables are absent (file
+        // was below index_threshold).
+        IndexDatabase idx_db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
         int file_info_id = idx_db.get_file_info_id(
             indexer::internal::get_logical_path(file_path));
         if (file_info_id < 0) {
-            throw std::runtime_error("File not found in .idx: " + file_path);
+            throw std::runtime_error("File not found in .dftindex: " +
+                                     file_path);
         }
 
-        bool has_manifest = true;
-        try {
-            idx_db.query_event_ranges_for_checkpoint(file_info_id, 0);
-        } catch (const std::exception&) {
-            has_manifest = false;
-        }
+        const bool has_manifest = idx_db.has_manifest_data(file_info_id);
 
         if (has_manifest) {
             // Manifest-based planning: per-checkpoint extraction tasks.
@@ -240,8 +238,22 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
                 const auto& line = *line_opt;
                 if (line.content.empty()) continue;
 
+                const char* begin = line.content.data();
+                const char* end = begin + line.content.size();
+                while (begin < end &&
+                       std::isspace(static_cast<unsigned char>(*begin))) {
+                    ++begin;
+                }
+                while (end > begin &&
+                       std::isspace(static_cast<unsigned char>(*(end - 1)))) {
+                    --end;
+                }
+                if (begin == end || *begin != '{' || *(end - 1) != '}') {
+                    continue;
+                }
+
                 yyjson_doc* doc =
-                    yyjson_read(line.content.data(), line.content.size(),
+                    yyjson_read(begin, static_cast<size_t>(end - begin),
                                 YYJSON_READ_NOFLAG);
                 if (!doc) continue;
 
@@ -251,43 +263,49 @@ coro::CoroTask<ExtractionPlan> ReorganizationPlannerUtility::process(
                     continue;
                 }
 
-                try {
-                    JsonValue json(root);
-                    std::string_view ph = json["ph"].get<std::string_view>();
-                    auto line_num =
-                        static_cast<std::uint32_t>(line.line_number);
-
-                    if (ph == "M") {
-                        meta_line_numbers.push_back(line_num);
-                    } else {
-                        std::string cat_str(
-                            json["cat"].get<std::string_view>());
-                        std::string name_str(
-                            json["name"].get<std::string_view>());
-
-                        bool matched = false;
-                        for (std::size_t gi = 0; gi < parsed_queries.size();
-                             ++gi) {
-                            const auto& q = parsed_queries[gi];
-                            if (!q) continue;
-                            common::query::ValueMap fields = {
-                                {"cat", cat_str}, {"name", name_str}};
-                            if (q->evaluate(fields)) {
-                                group_lines[plan.groups[gi].name].push_back(
-                                    line_num);
-                                matched = true;
-                                break;
-                            }
-                        }
-                        if (!matched) {
-                            group_lines[remainder_name].push_back(line_num);
-                        }
-                        plan.total_events++;
+                auto line_num = static_cast<std::uint32_t>(line.line_number);
+                yyjson_val* ph_val = yyjson_obj_get(root, "ph");
+                const bool is_metadata =
+                    ph_val && yyjson_is_str(ph_val) &&
+                    std::string_view(yyjson_get_str(ph_val),
+                                     yyjson_get_len(ph_val)) == "M";
+
+                if (is_metadata) {
+                    meta_line_numbers.push_back(line_num);
+                    yyjson_doc_free(doc);
+                    continue;
+                }
+
+                std::string cat_str;
+                if (yyjson_val* cat_val = yyjson_obj_get(root, "cat");
+                    cat_val && yyjson_is_str(cat_val)) {
+                    cat_str.assign(yyjson_get_str(cat_val),
+                                   yyjson_get_len(cat_val));
+                }
+
+                std::string name_str;
+                if (yyjson_val* name_val = yyjson_obj_get(root, "name");
+                    name_val && yyjson_is_str(name_val)) {
+                    name_str.assign(yyjson_get_str(name_val),
+                                    yyjson_get_len(name_val));
+                }
+
+                bool matched = false;
+                for (std::size_t gi = 0; gi < parsed_queries.size(); ++gi) {
+                    const auto& q = parsed_queries[gi];
+                    if (!q) continue;
+                    common::query::ValueMap fields = {{"cat", cat_str},
+                                                      {"name", name_str}};
+                    if (q->evaluate(fields)) {
+                        group_lines[plan.groups[gi].name].push_back(line_num);
+                        matched = true;
+                        break;
                     }
-                } catch (const std::exception&) {
-                    // Skip malformed or partial events without
-                    // aborting the entire plan.
                 }
+                if (!matched) {
+                    group_lines[remainder_name].push_back(line_num);
+                }
+                plan.total_events++;
 
                 yyjson_doc_free(doc);
             }
diff --git a/src/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.cpp b/src/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.cpp
index d0398692..8664442d 100644
--- a/src/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/statistics/chunk_detail_scanner_utility.cpp
@@ -106,7 +106,7 @@ coro::CoroTask<ChunkDetailScanOutput> ChunkDetailScannerUtility::process(
     // Create reader (same pattern as chunk_indexer_utility.cpp)
     auto reader_input = composites::IndexedReadInput::from_file(input.file_path)
                             .with_checkpoint_size(input.checkpoint_size)
-                            .with_index(input.idx_path);
+                            .with_index(input.index_path);
 
     composites::IndexedFileReaderUtility reader_utility;
     auto reader = co_await reader_utility.process(reader_input);
diff --git a/src/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.cpp b/src/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.cpp
index 73fe33ef..6f34cdd0 100644
--- a/src/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.cpp
@@ -1,7 +1,6 @@
 #include <dftracer/utils/core/common/filesystem.h>
-#include <dftracer/utils/core/sqlite/async.h>
+#include <dftracer/utils/core/rocksdb/async.h>
 #include <dftracer/utils/utilities/common/json/json_value.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h>
 #include <dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h>
@@ -21,25 +20,25 @@ coro::CoroTask<TraceStatistics> StatisticsAggregatorUtility::process(
     TraceStatistics result;
     result.file_path = input.file_path;
 
-    if (!input.idx_path.empty()) {
-        result.idx_path = input.idx_path;
+    if (!input.index_path.empty()) {
+        result.index_path =
+            indexer::internal::normalize_index_root(input.index_path);
     } else {
-        result.idx_path =
+        result.index_path =
             internal::determine_index_path(input.file_path, input.index_dir);
     }
 
-    if (!fs::exists(result.idx_path)) {
+    if (!fs::exists(result.index_path)) {
         result.success = false;
-        result.error_message = "Index file not found: " + result.idx_path;
+        result.error_message = "Index store not found: " + result.index_path;
         co_return result;
     }
 
     bool needs_streaming_fallback = false;
-
     auto do_query = [&input, &result,
                      &needs_streaming_fallback]() -> TraceStatistics {
         try {
-            IndexDatabase idx_db(result.idx_path);
+            IndexDatabase idx_db(result.index_path);
 
             int fid =
                 idx_db.get_file_info_id(get_logical_path(input.file_path));
@@ -50,10 +49,9 @@ coro::CoroTask<TraceStatistics> StatisticsAggregatorUtility::process(
                 return result;
             }
 
-            std::vector<indexing::queries::ChunkStatisticsResult> chunks;
+            std::vector<IndexDatabase::ChunkStatisticsResult> chunks;
             try {
-                chunks = indexing::queries::query_chunk_statistics(
-                    idx_db.sql_db(), fid);
+                chunks = idx_db.query_chunk_statistics(fid);
             } catch (const std::exception&) {
                 needs_streaming_fallback = true;
                 return result;
@@ -70,8 +68,7 @@ coro::CoroTask<TraceStatistics> StatisticsAggregatorUtility::process(
                 result.merged.merge_from(chunks[i].stats);
             }
 
-            auto dim_stats = indexing::queries::query_chunk_dimension_stats(
-                idx_db.sql_db(), fid);
+            auto dim_stats = idx_db.query_chunk_dimension_stats(fid);
             for (const auto& ds : dim_stats) {
                 if (!ds.value_counts) continue;
                 if (ds.dimension == "cat") {
@@ -94,7 +91,7 @@ coro::CoroTask<TraceStatistics> StatisticsAggregatorUtility::process(
         return result;
     };
 
-    result = co_await sqlite::run(do_query);
+    result = co_await rocksdb::run(do_query);
 
     if (!needs_streaming_fallback) {
         co_return result;
diff --git a/src/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.cpp b/src/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.cpp
index b8c45c0a..584c3a4c 100644
--- a/src/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/statistics/trace_statistics.cpp
@@ -57,7 +57,7 @@ std::string TraceStatistics::to_json() const {
     yyjson_mut_doc_set_root(doc, root);
 
     yyjson_mut_obj_add_str(doc, root, "file_path", file_path.c_str());
-    yyjson_mut_obj_add_str(doc, root, "idx_path", idx_path.c_str());
+    yyjson_mut_obj_add_str(doc, root, "index_path", index_path.c_str());
     yyjson_mut_obj_add_bool(doc, root, "success", success);
 
     if (!success) {
diff --git a/src/dftracer/utils/utilities/composites/dft/views/view_builder_utility.cpp b/src/dftracer/utils/utilities/composites/dft/views/view_builder_utility.cpp
index 96ca0dbc..8955bc3a 100644
--- a/src/dftracer/utils/utilities/composites/dft/views/view_builder_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/views/view_builder_utility.cpp
@@ -26,8 +26,8 @@ ViewBuilderInput& ViewBuilderInput::with_file_path(const std::string& path) {
     return *this;
 }
 
-ViewBuilderInput& ViewBuilderInput::with_idx_path(const std::string& path) {
-    idx_path = path;
+ViewBuilderInput& ViewBuilderInput::with_index_path(const std::string& path) {
+    index_path = path;
     return *this;
 }
 
@@ -62,10 +62,10 @@ coro::CoroTask<ViewBuilderOutput> ViewBuilderUtility::process(
 
     std::vector<std::uint64_t> candidate_checkpoints;
 
-    if (input.view.query && !input.idx_path.empty()) {
-        indexing::ChunkPrunerInput pruner_input{input.idx_path, input.file_path,
-                                                *input.view.query,
-                                                input.bloom_cache};
+    if (input.view.query && !input.index_path.empty()) {
+        indexing::ChunkPrunerInput pruner_input{
+            input.index_path, input.file_path, *input.view.query,
+            input.bloom_cache};
         indexing::ChunkPrunerUtility pruner;
         auto pruner_output = co_await pruner.process(pruner_input);
 
@@ -96,18 +96,16 @@ coro::CoroTask<ViewBuilderOutput> ViewBuilderUtility::process(
 
     // Chunk-level time range skip: query per-chunk time bounds from
     // the bloom index and remove chunks that don't overlap the query.
-    if (input.time_range && !input.idx_path.empty() &&
+    if (input.time_range && !input.index_path.empty() &&
         !candidate_checkpoints.empty()) {
         auto [t_begin, t_end] = *input.time_range;
         if (t_begin > 0 || t_end > 0) {
             try {
-                IndexDatabase idx_db(input.idx_path);
+                IndexDatabase idx_db(input.index_path);
                 int fid =
                     idx_db.get_file_info_id(get_logical_path(input.file_path));
                 if (fid >= 0) {
-                    auto chunk_stats =
-                        indexing::queries::query_chunk_statistics(
-                            idx_db.sql_db(), fid);
+                    auto chunk_stats = idx_db.query_chunk_statistics(fid);
 
                     std::unordered_map<std::uint64_t,
                                        std::pair<std::uint64_t, std::uint64_t>>
diff --git a/src/dftracer/utils/utilities/composites/dft/views/view_reader_utility.cpp b/src/dftracer/utils/utilities/composites/dft/views/view_reader_utility.cpp
index b3c360f0..5759ff4e 100644
--- a/src/dftracer/utils/utilities/composites/dft/views/view_reader_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/dft/views/view_reader_utility.cpp
@@ -23,8 +23,8 @@ ViewReaderInput& ViewReaderInput::with_file_path(const std::string& path) {
     return *this;
 }
 
-ViewReaderInput& ViewReaderInput::with_idx_path(const std::string& path) {
-    idx_path = path;
+ViewReaderInput& ViewReaderInput::with_index_path(const std::string& path) {
+    index_path = path;
     return *this;
 }
 
@@ -117,7 +117,7 @@ coro::AsyncGenerator<ViewReaderBatch> ViewReaderUtility::process(
         emitted_hashes;
 
     auto reader_input = composites::IndexedReadInput::from_file(input.file_path)
-                            .with_index(input.idx_path);
+                            .with_index(input.index_path);
     if (input.checkpoint_size > 0) {
         reader_input.with_checkpoint_size(input.checkpoint_size);
     }
diff --git a/src/dftracer/utils/utilities/composites/file_merger_utility.cpp b/src/dftracer/utils/utilities/composites/file_merger_utility.cpp
index 6417f334..b86c8e82 100644
--- a/src/dftracer/utils/utilities/composites/file_merger_utility.cpp
+++ b/src/dftracer/utils/utilities/composites/file_merger_utility.cpp
@@ -31,7 +31,7 @@ FileMergeValidatorUtility::process(
             (input.file_path.size() >= 3 &&
              input.file_path.substr(input.file_path.size() - 3) == ".gz");
 
-        std::string effective_idx_path = input.index_path;
+        std::string effective_index_path = input.index_path;
 
         if (is_compressed) {
             // Use IndexBuilderUtility for compressed files
@@ -53,7 +53,7 @@ FileMergeValidatorUtility::process(
                 co_return result;
             }
             // Use the actual idx path produced by the builder
-            effective_idx_path = index_result.idx_path;
+            effective_index_path = index_result.index_path;
         }
 
         // Step 2: Create line processor function that validates JSON
@@ -79,7 +79,7 @@ FileMergeValidatorUtility::process(
         fileio::lines::LineReadInput read_input;
         read_input.file_path = input.file_path;
         if (is_compressed) {
-            read_input.idx_path = effective_idx_path;
+            read_input.index_path = effective_index_path;
         }
 
         auto validated_events = co_await processor.process(read_input);
@@ -110,7 +110,7 @@ FileMergeValidatorUtility::process(
 
         if (is_compressed) {
             auto reader = dftracer::utils::utilities::reader::internal::
-                ReaderFactory::create(input.file_path, effective_idx_path);
+                ReaderFactory::create(input.file_path, effective_index_path);
             if (reader) {
                 result.total_lines = reader->get_num_lines();
             }
diff --git a/src/dftracer/utils/utilities/indexer/index_builder_utility.cpp b/src/dftracer/utils/utilities/indexer/index_builder_utility.cpp
index b851efa6..97b15098 100644
--- a/src/dftracer/utils/utilities/indexer/index_builder_utility.cpp
+++ b/src/dftracer/utils/utilities/indexer/index_builder_utility.cpp
@@ -1,6 +1,7 @@
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
+#include <dftracer/utils/core/rocksdb/async.h>
 #include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/fileio/lines/sources/async_streaming_gz_line_generator.h>
 #include <dftracer/utils/utilities/indexer/index_builder_utility.h>
@@ -8,6 +9,7 @@
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
+#include <dftracer/utils/utilities/indexer/internal/transaction_scope.h>
 #include <dftracer/utils/utilities/indexer/visitors/bloom_visitor.h>
 #include <dftracer/utils/utilities/indexer/visitors/manifest_visitor.h>
 
@@ -18,6 +20,7 @@ namespace dftracer::utils::utilities::indexer {
 
 using composites::dft::internal::determine_index_path;
 using internal::IndexerFactory;
+namespace rocks = dftracer::utils::rocksdb;
 
 // ---------------------------------------------------------------------------
 // IndexBuildConfig builder methods
@@ -78,9 +81,9 @@ coro::CoroTask<IndexBuildResult> IndexBuilderUtility::process(
     result.file_path = config.file_path;
 
     try {
-        std::string idx_path =
+        std::string index_path =
             determine_index_path(config.file_path, config.index_dir);
-        result.idx_path = idx_path;
+        result.index_path = index_path;
 
         // Check compressed file size against threshold (0 = always index).
         std::uintmax_t file_sz = 0;
@@ -93,7 +96,7 @@ coro::CoroTask<IndexBuildResult> IndexBuilderUtility::process(
         auto build_start = std::chrono::steady_clock::now();
 
         auto indexer = IndexerFactory::create(
-            config.file_path, idx_path,
+            config.file_path, index_path,
             static_cast<std::uint64_t>(config.checkpoint_size),
             config.force_rebuild);
 
@@ -113,7 +116,9 @@ coro::CoroTask<IndexBuildResult> IndexBuilderUtility::process(
             auto logical = internal::get_logical_path(config.file_path);
             bool bloom_ok = !config.build_bloom || [&] {
                 try {
-                    IndexDatabase db(idx_path);
+                    IndexDatabase db(index_path,
+                                     dftracer::utils::rocksdb::RocksDatabase::
+                                         OpenMode::ReadOnly);
                     int fid = db.get_file_info_id(logical);
                     return fid >= 0 && db.has_bloom_data(fid);
                 } catch (...) {
@@ -122,7 +127,9 @@ coro::CoroTask<IndexBuildResult> IndexBuilderUtility::process(
             }();
             bool manifest_ok = !config.build_manifest || [&] {
                 try {
-                    IndexDatabase db(idx_path);
+                    IndexDatabase db(index_path,
+                                     dftracer::utils::rocksdb::RocksDatabase::
+                                         OpenMode::ReadOnly);
                     int fid = db.get_file_info_id(logical);
                     return fid >= 0 && db.has_manifest_data(fid);
                 } catch (...) {
@@ -202,33 +209,37 @@ coro::CoroTask<IndexBuildResult> IndexBuilderUtility::process(
         result.chunks_processed =
             static_cast<std::size_t>(indexer->get_checkpoints().size());
 
-        // Persist visitor data into the .idx database only when the file meets
-        // the size threshold (or threshold is disabled).
+        // Persist visitor data into the `.dftindex` store only when the file
+        // meets the size threshold (or threshold is disabled).
         if (!below_threshold && (config.build_bloom || config.build_manifest)) {
-            const std::string& built_idx = indexer->get_idx_path();
+            const std::string& built_index_path = indexer->get_index_path();
 
-            IndexDatabase db(built_idx);
-
-            auto logical = internal::get_logical_path(config.file_path);
-            int fid = db.get_file_info_id(logical);
-            if (fid < 0) {
-                result.error_message =
-                    "File not found in index after build: " + logical;
-                co_return result;
-            }
-
-            db.begin_transaction();
             try {
-                if (config.build_bloom && bloom_visitor) {
-                    db.init_bloom_schema();
-                    db.delete_chunk_statistics(fid);
-                    bloom_visitor->finalize(db, fid);
-                }
-                if (config.build_manifest && manifest_visitor) {
-                    db.init_manifest_schema();
-                    manifest_visitor->finalize(db, fid);
-                }
-                db.commit_transaction();
+                IndexDatabase db(built_index_path);
+                auto logical = internal::get_logical_path(config.file_path);
+                const auto hash =
+                    internal::calculate_file_hash(config.file_path);
+                auto* db_ptr = &db;
+                auto* logical_ptr = &logical;
+                auto* config_ptr = &config;
+                auto* bloom_visitor_ptr = &bloom_visitor;
+                auto* manifest_visitor_ptr = &manifest_visitor;
+                co_await rocks::run([db_ptr, logical_ptr, hash, config_ptr,
+                                     bloom_visitor_ptr, manifest_visitor_ptr] {
+                    int fid =
+                        db_ptr->get_or_create_file_info(*logical_ptr, hash);
+                    internal::TransactionScope txn(*db_ptr);
+                    if (config_ptr->build_bloom && *bloom_visitor_ptr) {
+                        db_ptr->init_bloom_schema();
+                        db_ptr->delete_chunk_statistics(fid);
+                        (*bloom_visitor_ptr)->finalize(*db_ptr, fid);
+                    }
+                    if (config_ptr->build_manifest && *manifest_visitor_ptr) {
+                        db_ptr->init_manifest_schema();
+                        (*manifest_visitor_ptr)->finalize(*db_ptr, fid);
+                    }
+                    txn.commit();
+                });
             } catch (const std::exception& e) {
                 result.error_message =
                     std::string("Failed to persist index data: ") + e.what();
diff --git a/src/dftracer/utils/utilities/indexer/index_database.cpp b/src/dftracer/utils/utilities/indexer/index_database.cpp
index c74a91c3..4cc3165c 100644
--- a/src/dftracer/utils/utilities/indexer/index_database.cpp
+++ b/src/dftracer/utils/utilities/indexer/index_database.cpp
@@ -1,310 +1,773 @@
-#include <dftracer/utils/core/sqlite/statement.h>
+#include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/core/common/logging.h>
+#include <dftracer/utils/core/rocksdb/key_codec.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/error.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
+#include <dftracer/utils/utilities/indexer/internal/scan_prefix.h>
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <limits>
+#include <optional>
+#include <stdexcept>
+#include <utility>
 
 namespace dftracer::utils::utilities::indexer {
 
 namespace queries = composites::dft::indexing::queries;
+namespace rocks = dftracer::utils::rocksdb;
 
-using dftracer::utils::sqlite::SqliteStmt;
 using internal::IndexerError;
 
-// ---------------------------------------------------------------------------
-// Schema strings
-// ---------------------------------------------------------------------------
-
-// Matches GzipIndexer schema (gzip/constants.cpp) so IndexDatabase
-// can open .idx files created by the existing indexer.
-static const char* BASE_SCHEMA = R"(
-    PRAGMA journal_mode=WAL;
-    PRAGMA busy_timeout=5000;
-    PRAGMA foreign_keys=ON;
-
-    CREATE TABLE IF NOT EXISTS files (
-        id            INTEGER PRIMARY KEY,
-        logical_name  TEXT UNIQUE NOT NULL,
-        byte_size     INTEGER NOT NULL DEFAULT 0,
-        mtime_unix    INTEGER NOT NULL DEFAULT 0,
-        hash          INTEGER NOT NULL DEFAULT 0
-    );
-
-    CREATE TABLE IF NOT EXISTS checkpoints (
-        id              INTEGER PRIMARY KEY,
-        file_id         INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
-        checkpoint_idx  INTEGER NOT NULL,
-        uc_offset       INTEGER NOT NULL DEFAULT 0,
-        uc_size         INTEGER NOT NULL DEFAULT 0,
-        c_offset        INTEGER NOT NULL DEFAULT 0,
-        c_size          INTEGER NOT NULL DEFAULT 0,
-        bits            INTEGER NOT NULL DEFAULT 0,
-        dict_compressed BLOB,
-        num_lines       INTEGER NOT NULL DEFAULT 0,
-        first_line_num  INTEGER NOT NULL DEFAULT 0,
-        last_line_num   INTEGER NOT NULL DEFAULT 0
-    );
-
-    CREATE INDEX IF NOT EXISTS checkpoints_file_idx
-        ON checkpoints(file_id, checkpoint_idx);
-    CREATE INDEX IF NOT EXISTS checkpoints_file_uc_off_idx
-        ON checkpoints(file_id, uc_offset);
-    CREATE INDEX IF NOT EXISTS checkpoints_line_range_idx
-        ON checkpoints(file_id, first_line_num, last_line_num);
-
-    CREATE TABLE IF NOT EXISTS metadata (
-        file_id         INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
-        checkpoint_size INTEGER NOT NULL DEFAULT 0,
-        total_lines     INTEGER NOT NULL DEFAULT 0,
-        total_uc_size   INTEGER NOT NULL DEFAULT 0,
-        PRIMARY KEY(file_id)
-    );
-)";
-
-static const char* BLOOM_SCHEMA = R"(
-    CREATE TABLE IF NOT EXISTS chunk_bloom_filters (
-        id INTEGER PRIMARY KEY,
-        file_info_id INTEGER NOT NULL,
-        checkpoint_idx INTEGER NOT NULL,
-        dimension TEXT NOT NULL,
-        bloom_data BLOB NOT NULL,
-        num_entries INTEGER NOT NULL,
-        UNIQUE(file_info_id, checkpoint_idx, dimension)
-    );
-
-    CREATE TABLE IF NOT EXISTS file_bloom_filters (
-        id INTEGER PRIMARY KEY,
-        file_info_id INTEGER NOT NULL,
-        dimension TEXT NOT NULL,
-        bloom_data BLOB NOT NULL,
-        num_entries INTEGER NOT NULL,
-        UNIQUE(file_info_id, dimension)
-    );
-
-    CREATE TABLE IF NOT EXISTS chunk_statistics (
-        id INTEGER PRIMARY KEY,
-        file_info_id INTEGER NOT NULL,
-        checkpoint_idx INTEGER NOT NULL,
-        total_events INTEGER NOT NULL DEFAULT 0,
-        min_timestamp_us INTEGER,
-        max_timestamp_us INTEGER,
-        duration_sum_us INTEGER NOT NULL DEFAULT 0,
-        duration_min_us INTEGER,
-        duration_max_us INTEGER,
-        duration_count INTEGER NOT NULL DEFAULT 0,
-        duration_m2 REAL NOT NULL DEFAULT 0,
-        duration_sketch BLOB,
-        duration_histogram TEXT NOT NULL DEFAULT '[]',
-        name_duration_sketches BLOB,
-        name_duration_histograms TEXT NOT NULL DEFAULT '{}',
-        name_duration_sums TEXT NOT NULL DEFAULT '{}',
-        name_duration_sum_sqs TEXT NOT NULL DEFAULT '{}',
-        name_category TEXT NOT NULL DEFAULT '{}',
-        UNIQUE(file_info_id, checkpoint_idx)
-    );
-
-    CREATE TABLE IF NOT EXISTS index_dimensions (
-        id INTEGER PRIMARY KEY,
-        file_info_id INTEGER NOT NULL,
-        dimension TEXT NOT NULL,
-        UNIQUE(file_info_id, dimension)
-    );
-
-    CREATE TABLE IF NOT EXISTS hash_resolutions (
-        id INTEGER PRIMARY KEY,
-        file_info_id INTEGER NOT NULL,
-        dimension TEXT NOT NULL,
-        hash_value TEXT NOT NULL,
-        resolved_value TEXT NOT NULL,
-        UNIQUE(file_info_id, dimension, hash_value)
-    );
-
-    CREATE TABLE IF NOT EXISTS chunk_dimension_stats (
-        id              INTEGER PRIMARY KEY,
-        file_info_id    INTEGER NOT NULL,
-        checkpoint_idx  INTEGER NOT NULL,
-        dimension       TEXT NOT NULL,
-        distinct_count  INTEGER NOT NULL DEFAULT 0,
-        value_counts    BLOB,
-        min_value       TEXT,
-        max_value       TEXT,
-        value_type      TEXT NOT NULL DEFAULT 'string',
-        UNIQUE(file_info_id, checkpoint_idx, dimension)
-    );
-
-    CREATE INDEX IF NOT EXISTS chunk_bloom_file_dim_idx
-        ON chunk_bloom_filters(file_info_id, dimension);
-    CREATE INDEX IF NOT EXISTS chunk_stats_file_idx
-        ON chunk_statistics(file_info_id, checkpoint_idx);
-    CREATE INDEX IF NOT EXISTS chunk_dim_stats_file_dim_idx
-        ON chunk_dimension_stats(file_info_id, dimension);
-    CREATE INDEX IF NOT EXISTS hash_res_dim_val_idx
-        ON hash_resolutions(dimension, resolved_value);
-)";
-
-static const char* MANIFEST_SCHEMA = R"(
-    CREATE TABLE IF NOT EXISTS checkpoint_event_ranges (
-        checkpoint_idx  INTEGER NOT NULL,
-        file_info_id    INTEGER NOT NULL,
-        cat             TEXT NOT NULL,
-        name            TEXT NOT NULL,
-        line_numbers    BLOB NOT NULL,
-        event_count     INTEGER NOT NULL DEFAULT 0,
-        PRIMARY KEY (file_info_id, checkpoint_idx, cat, name)
-    );
-
-    CREATE TABLE IF NOT EXISTS checkpoint_metadata_lines (
-        checkpoint_idx  INTEGER NOT NULL,
-        file_info_id    INTEGER NOT NULL,
-        meta_type       TEXT NOT NULL,
-        line_numbers    BLOB NOT NULL,
-        PRIMARY KEY (file_info_id, checkpoint_idx, meta_type)
-    );
-
-    CREATE INDEX IF NOT EXISTS idx_event_ranges_checkpoint
-        ON checkpoint_event_ranges(file_info_id, checkpoint_idx);
-    CREATE INDEX IF NOT EXISTS idx_metadata_checkpoint
-        ON checkpoint_metadata_lines(file_info_id, checkpoint_idx);
-)";
-
-// ---------------------------------------------------------------------------
-// Constructor / destructor
-// ---------------------------------------------------------------------------
-
-IndexDatabase::IndexDatabase(const std::string& idx_path) : db_(idx_path) {}
-
-// ---------------------------------------------------------------------------
-// Schema initialisation
-// ---------------------------------------------------------------------------
-
-static void exec_schema(sqlite3* db, const char* sql, const char* label) {
-    char* err_msg = nullptr;
-    int rc = sqlite3_exec(db, sql, nullptr, nullptr, &err_msg);
-    if (rc != SQLITE_OK) {
-        std::string error = err_msg ? std::string(err_msg) : "unknown error";
-        if (err_msg) sqlite3_free(err_msg);
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           std::string(label) + ": " + error);
+namespace {
+
+constexpr std::uint32_t kSchemaVersion = 1;
+
+[[noreturn]] void throw_db_error(std::string_view message,
+                                 const ::rocksdb::Status& status) {
+    throw IndexerError(IndexerError::Type::DATABASE_ERROR,
+                       std::string(message) + ": " + status.ToString());
+}
+
+void append_u8(std::string& out, std::uint8_t value) {
+    out.push_back(static_cast<char>(value));
+}
+
+void append_i64(std::string& out, std::int64_t value) {
+    rocks::KeyCodec::append_be64(out, static_cast<std::uint64_t>(value));
+}
+
+void append_u64(std::string& out, std::uint64_t value) {
+    rocks::KeyCodec::append_be64(out, value);
+}
+
+void append_double(std::string& out, double value) {
+    static_assert(sizeof(double) == sizeof(std::uint64_t));
+    std::uint64_t bits = 0;
+    std::memcpy(&bits, &value, sizeof(bits));
+    append_u64(out, bits);
+}
+
+void append_string(std::string& out, std::string_view value) {
+    rocks::KeyCodec::append_be32(out, static_cast<std::uint32_t>(value.size()));
+    out.append(value.data(), value.size());
+}
+
+void append_blob(std::string& out, std::span<const unsigned char> blob) {
+    rocks::KeyCodec::append_be32(out, static_cast<std::uint32_t>(blob.size()));
+    out.append(reinterpret_cast<const char*>(blob.data()), blob.size());
+}
+
+class Cursor {
+   public:
+    explicit Cursor(std::string_view data) : data_(data) {}
+
+    std::uint8_t u8() { return static_cast<std::uint8_t>(take(1)[0]); }
+
+    std::uint32_t u32() { return rocks::KeyCodec::decode_be32(take(4)); }
+
+    std::uint64_t u64() { return rocks::KeyCodec::decode_be64(take(8)); }
+
+    std::int64_t i64() { return static_cast<std::int64_t>(u64()); }
+
+    double f64() {
+        std::uint64_t bits = u64();
+        double value = 0.0;
+        std::memcpy(&value, &bits, sizeof(value));
+        return value;
+    }
+
+    std::string str() {
+        auto len = static_cast<std::size_t>(u32());
+        auto bytes = take(len);
+        return std::string(bytes.data(), bytes.size());
+    }
+
+    std::vector<unsigned char> blob() {
+        auto len = static_cast<std::size_t>(u32());
+        auto bytes = take(len);
+        return std::vector<unsigned char>(bytes.begin(), bytes.end());
+    }
+
+   private:
+    std::string_view take(std::size_t len) {
+        if (offset_ + len > data_.size()) {
+            throw std::runtime_error("Corrupt RocksDB payload");
+        }
+        auto chunk = data_.substr(offset_, len);
+        offset_ += len;
+        return chunk;
     }
+
+    std::string_view data_;
+    std::size_t offset_ = 0;
+};
+
+std::string file_lookup_key(std::string_view logical_name) {
+    return std::string("f|") + std::string(logical_name);
 }
 
-void IndexDatabase::init_base_schema() {
-    exec_schema(db_.get(), BASE_SCHEMA, "init_base_schema");
+std::string file_reverse_key(int file_id) {
+    std::string key("r|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_id));
+    return key;
 }
 
-void IndexDatabase::init_bloom_schema() {
-    exec_schema(db_.get(), BLOOM_SCHEMA, "init_bloom_schema");
+std::string next_file_id_key() { return "_next_file_id"; }
+std::string schema_version_key() { return "_schema_version"; }
+
+std::string encode_file_record(int file_id, std::uint64_t file_hash) {
+    std::string value;
+    rocks::KeyCodec::append_be32(value, static_cast<std::uint32_t>(file_id));
+    append_u64(value, 0);
+    append_u64(value, 0);
+    append_u64(value, file_hash);
+    return value;
 }
 
-void IndexDatabase::init_manifest_schema() {
-    exec_schema(db_.get(), MANIFEST_SCHEMA, "init_manifest_schema");
+int decode_file_id(std::string_view record) {
+    if (record.size() < 4) {
+        throw std::runtime_error("Corrupt file record");
+    }
+    return static_cast<int>(rocks::KeyCodec::decode_be32(record.substr(0, 4)));
+}
+
+std::uint64_t decode_file_hash(std::string_view record) {
+    if (record.size() < 28) {
+        throw std::runtime_error("Corrupt file record");
+    }
+    return rocks::KeyCodec::decode_be64(record.substr(20, 8));
+}
+
+std::string prefix_for_file(int file_id) {
+    return rocks::KeyCodec::encode_be32(static_cast<std::uint32_t>(file_id));
+}
+
+std::string make_hash_owner_key(int file_id, std::string_view dimension,
+                                std::string_view hash_value) {
+    std::string key("o|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_id));
+    key.push_back('\0');
+    key.append(dimension);
+    key.push_back('\0');
+    key.append(hash_value);
+    return key;
+}
+
+std::string make_hash_forward_key(std::string_view dimension,
+                                  std::string_view hash_value) {
+    std::string key("h|");
+    key.append(dimension);
+    key.push_back('\0');
+    key.append(hash_value);
+    return key;
+}
+
+std::string make_hash_reverse_key(std::string_view dimension,
+                                  std::string_view resolved_value,
+                                  std::string_view hash_value) {
+    std::string key("H|");
+    key.append(dimension);
+    key.push_back('\0');
+    key.append(resolved_value);
+    key.push_back('\0');
+    key.append(hash_value);
+    return key;
+}
+
+std::string make_dimension_key(int file_id, std::string_view dimension) {
+    std::string key("d|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_id));
+    key.append(dimension);
+    return key;
+}
+
+std::string chunk_bloom_key(int file_id, std::string_view dimension,
+                            std::uint64_t checkpoint_idx) {
+    std::string key = prefix_for_file(file_id);
+    key.append(dimension);
+    key.push_back('\0');
+    append_u64(key, checkpoint_idx);
+    return key;
+}
+
+std::string file_bloom_key(int file_id, std::string_view dimension) {
+    std::string key = prefix_for_file(file_id);
+    key.append(dimension);
+    return key;
+}
+
+std::string chunk_stats_key(int file_id, std::uint64_t checkpoint_idx) {
+    std::string key = prefix_for_file(file_id);
+    append_u64(key, checkpoint_idx);
+    return key;
+}
+
+std::string checkpoint_key(int file_id, std::uint64_t uc_offset,
+                           std::uint64_t checkpoint_idx) {
+    std::string key = prefix_for_file(file_id);
+    append_u64(key, uc_offset);
+    append_u64(key, checkpoint_idx);
+    return key;
+}
+
+std::string chunk_dim_stats_key(int file_id, std::uint64_t checkpoint_idx,
+                                std::string_view dimension) {
+    std::string key = prefix_for_file(file_id);
+    append_u64(key, checkpoint_idx);
+    key.append(dimension);
+    return key;
+}
+
+std::string manifest_event_key(int file_id, std::uint64_t checkpoint_idx,
+                               std::string_view cat, std::string_view name) {
+    std::string key("E|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_id));
+    append_u64(key, checkpoint_idx);
+    key.append(cat);
+    key.push_back('\0');
+    key.append(name);
+    return key;
+}
+
+std::string manifest_metadata_key(int file_id, std::uint64_t checkpoint_idx,
+                                  std::string_view meta_type) {
+    std::string key("M|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_id));
+    append_u64(key, checkpoint_idx);
+    key.append(meta_type);
+    return key;
+}
+
+std::string metadata_key(int file_id) { return prefix_for_file(file_id); }
+
+std::string tar_archive_key(int file_id) { return prefix_for_file(file_id); }
+
+std::string tar_file_key(int file_id, std::uint64_t uncompressed_offset,
+                         std::string_view file_name) {
+    std::string key = prefix_for_file(file_id);
+    append_u64(key, uncompressed_offset);
+    key.push_back('\0');
+    key.append(file_name);
+    return key;
+}
+
+std::string encode_bloom_value(std::span<const unsigned char> blob,
+                               std::uint64_t num_entries) {
+    std::string value;
+    append_u64(value, num_entries);
+    value.append(reinterpret_cast<const char*>(blob.data()), blob.size());
+    return value;
+}
+
+IndexDatabase::ChunkBloomResult decode_chunk_bloom(std::string_view key,
+                                                   std::string_view value,
+                                                   std::size_t prefix_size) {
+    IndexDatabase::ChunkBloomResult result;
+    auto checkpoint_pos = key.find('\0', prefix_size);
+    if (checkpoint_pos == std::string_view::npos ||
+        checkpoint_pos + 1 + 8 > key.size()) {
+        throw std::runtime_error("Corrupt chunk bloom key");
+    }
+    result.checkpoint_idx =
+        rocks::KeyCodec::decode_be64(key.substr(checkpoint_pos + 1, 8));
+    if (value.size() < 8) {
+        throw std::runtime_error("Corrupt chunk bloom value");
+    }
+    result.num_entries = rocks::KeyCodec::decode_be64(value.substr(0, 8));
+    result.bloom_data.assign(value.begin() + 8, value.end());
+    return result;
+}
+
+IndexDatabase::FileBloomResult decode_file_bloom(std::string_view value) {
+    if (value.size() < 8) {
+        throw std::runtime_error("Corrupt file bloom value");
+    }
+    IndexDatabase::FileBloomResult result;
+    result.num_entries = rocks::KeyCodec::decode_be64(value.substr(0, 8));
+    result.bloom_data.assign(value.begin() + 8, value.end());
+    return result;
+}
+
+std::string encode_chunk_statistics_value(
+    const IndexDatabase::ChunkStatistics& stats) {
+    std::string value;
+    append_u64(value, stats.total_events);
+    append_u64(value, stats.min_timestamp_us);
+    append_u64(value, stats.max_timestamp_us);
+    append_i64(value, stats.duration_sum_us);
+    append_u64(value, stats.duration_min_us);
+    append_u64(value, stats.duration_max_us);
+    append_u64(value, stats.duration_count);
+    append_double(value, stats.duration_m2);
+
+    auto duration_sketch = stats.duration_sketch.serialize();
+    append_blob(value, duration_sketch);
+
+    auto duration_histogram = stats.duration_histogram.to_json();
+    append_string(value, duration_histogram);
+
+    auto name_sketches = stats.serialize_name_duration_sketches();
+    append_blob(value, name_sketches);
+    append_string(value, stats.name_duration_histograms_json());
+    append_string(value, stats.name_duration_sums_json());
+    append_string(value, stats.name_duration_sum_sqs_json());
+    append_string(value, stats.name_category_json());
+    return value;
+}
+
+IndexDatabase::ChunkStatistics decode_chunk_statistics_value(
+    std::string_view value) {
+    Cursor cursor(value);
+    IndexDatabase::ChunkStatistics stats;
+    stats.total_events = cursor.u64();
+    stats.min_timestamp_us = cursor.u64();
+    stats.max_timestamp_us = cursor.u64();
+    stats.duration_sum_us = cursor.i64();
+    stats.duration_min_us = cursor.u64();
+    stats.duration_max_us = cursor.u64();
+    stats.duration_count = cursor.u64();
+    stats.duration_m2 = cursor.f64();
+
+    auto duration_sketch = cursor.blob();
+    if (!duration_sketch.empty()) {
+        stats.duration_sketch = common::statistics::DDSketch::deserialize(
+            duration_sketch.data(), duration_sketch.size());
+    }
+
+    auto duration_histogram = cursor.str();
+    if (!duration_histogram.empty()) {
+        stats.duration_histogram =
+            common::statistics::Log2Histogram::from_json(duration_histogram);
+    }
+
+    auto name_sketches = cursor.blob();
+    if (!name_sketches.empty()) {
+        stats.name_duration_sketches =
+            IndexDatabase::ChunkStatistics::deserialize_name_duration_sketches(
+                name_sketches.data(), name_sketches.size());
+    }
+
+    stats.name_duration_histograms =
+        IndexDatabase::ChunkStatistics::parse_histogram_map_json(cursor.str());
+    stats.name_duration_sums =
+        IndexDatabase::ChunkStatistics::parse_double_map_json(cursor.str());
+    stats.name_duration_sum_sqs =
+        IndexDatabase::ChunkStatistics::parse_double_map_json(cursor.str());
+    stats.name_category =
+        IndexDatabase::ChunkStatistics::parse_string_map_json(cursor.str());
+    return stats;
+}
+
+std::string encode_checkpoint_value(
+    const IndexDatabase::IndexerCheckpoint& checkpoint) {
+    std::string value;
+    append_u64(value, checkpoint.uc_size);
+    append_u64(value, checkpoint.c_offset);
+    append_u64(value, checkpoint.c_size);
+    append_i64(value, checkpoint.bits);
+    append_blob(value, checkpoint.dict_compressed);
+    append_u64(value, checkpoint.num_lines);
+    append_u64(value, checkpoint.first_line_num);
+    append_u64(value, checkpoint.last_line_num);
+    return value;
+}
+
+IndexDatabase::IndexerCheckpoint decode_checkpoint(std::string_view key,
+                                                   std::string_view value) {
+    if (key.size() < 20) {
+        throw std::runtime_error("Corrupt checkpoint key");
+    }
+
+    IndexDatabase::IndexerCheckpoint checkpoint;
+    checkpoint.uc_offset = rocks::KeyCodec::decode_be64(key.substr(4, 8));
+    checkpoint.checkpoint_idx = rocks::KeyCodec::decode_be64(key.substr(12, 8));
+
+    Cursor cursor(value);
+    checkpoint.uc_size = cursor.u64();
+    checkpoint.c_offset = cursor.u64();
+    checkpoint.c_size = cursor.u64();
+    checkpoint.bits = static_cast<int>(cursor.i64());
+    checkpoint.dict_compressed = cursor.blob();
+    checkpoint.num_lines = cursor.u64();
+    checkpoint.first_line_num = cursor.u64();
+    checkpoint.last_line_num = cursor.u64();
+    return checkpoint;
+}
+
+std::string encode_chunk_dimension_stats_value(
+    const IndexDatabase::ChunkDimensionStats& stats,
+    std::size_t value_counts_cap) {
+    std::string value;
+    append_u64(value, stats.distinct_count);
+    append_string(value, stats.min_value);
+    append_string(value, stats.max_value);
+    append_string(value, stats.value_type);
+    auto compressed = stats.compress_value_counts(value_counts_cap);
+    append_u8(value, compressed.has_value() ? 1 : 0);
+    if (compressed) {
+        append_blob(value, *compressed);
+    }
+    return value;
+}
+
+IndexDatabase::ChunkDimensionStatsResult decode_chunk_dimension_stats_value(
+    std::string_view key, std::string_view value) {
+    IndexDatabase::ChunkDimensionStatsResult result;
+    if (key.size() < 12) {
+        throw std::runtime_error("Corrupt chunk dimension stats key");
+    }
+    result.checkpoint_idx = rocks::KeyCodec::decode_be64(key.substr(4, 8));
+    result.dimension = std::string(key.substr(12));
+
+    Cursor cursor(value);
+    result.distinct_count = cursor.u64();
+    result.min_value = cursor.str();
+    result.max_value = cursor.str();
+    result.value_type = cursor.str();
+    if (cursor.u8() != 0) {
+        auto compressed = cursor.blob();
+        result.value_counts =
+            IndexDatabase::ChunkDimensionStats::decompress_value_counts(
+                compressed.data(), compressed.size());
+    }
+    return result;
+}
+
+std::string encode_event_range_value(std::span<const std::uint32_t> lines) {
+    std::vector<std::uint32_t> vec(lines.begin(), lines.end());
+    auto blob = queries::pack_line_numbers(vec);
+    std::string value;
+    append_u64(value, vec.size());
+    append_blob(value, blob);
+    return value;
+}
+
+std::vector<std::uint32_t> decode_line_numbers(Cursor& cursor) {
+    auto blob = cursor.blob();
+    return queries::unpack_line_numbers(blob.data(), blob.size());
+}
+
+std::string encode_metadata_value(std::span<const std::uint32_t> lines) {
+    std::vector<std::uint32_t> vec(lines.begin(), lines.end());
+    auto blob = queries::pack_line_numbers(vec);
+    std::string value;
+    append_blob(value, blob);
+    return value;
+}
+
+std::string encode_metadata_record(std::uint64_t checkpoint_size,
+                                   std::uint64_t total_lines,
+                                   std::uint64_t total_uc_size) {
+    std::string value;
+    append_u64(value, checkpoint_size);
+    append_u64(value, total_lines);
+    append_u64(value, total_uc_size);
+    return value;
+}
+
+std::string encode_tar_archive_value(std::string_view archive_name,
+                                     std::uint64_t checkpoint_size,
+                                     std::uint64_t total_lines,
+                                     std::uint64_t total_uc_size,
+                                     std::uint64_t total_files) {
+    std::string value;
+    append_string(value, archive_name);
+    append_u64(value, checkpoint_size);
+    append_u64(value, total_lines);
+    append_u64(value, total_uc_size);
+    append_u64(value, total_files);
+    return value;
+}
+
+IndexDatabase::TarArchiveMetadata decode_tar_archive_value(
+    std::string_view value) {
+    Cursor cursor(value);
+    IndexDatabase::TarArchiveMetadata metadata;
+    metadata.archive_name = cursor.str();
+    metadata.checkpoint_size = cursor.u64();
+    metadata.total_lines = cursor.u64();
+    metadata.total_uc_size = cursor.u64();
+    metadata.total_files = cursor.u64();
+    return metadata;
+}
+
+std::string encode_tar_file_value(const IndexDatabase::TarFileRecord& record) {
+    std::string value;
+    append_u64(value, record.file_size);
+    append_u64(value, record.file_mtime);
+    append_u8(value, static_cast<std::uint8_t>(record.typeflag));
+    append_u64(value, record.data_offset);
+    return value;
+}
+
+IndexDatabase::TarFileRecord decode_tar_file(std::string_view key,
+                                             std::string_view value) {
+    if (key.size() < 13) {
+        throw std::runtime_error("Corrupt tar file key");
+    }
+
+    const auto name_pos = key.find('\0', 12);
+    if (name_pos == std::string_view::npos) {
+        throw std::runtime_error("Corrupt tar file key");
+    }
+
+    Cursor cursor(value);
+    IndexDatabase::TarFileRecord record;
+    record.uncompressed_offset = rocks::KeyCodec::decode_be64(key.substr(4, 8));
+    record.file_name = std::string(key.substr(name_pos + 1));
+    record.file_size = cursor.u64();
+    record.file_mtime = cursor.u64();
+    record.typeflag = static_cast<char>(cursor.u8());
+    record.data_offset = cursor.u64();
+    return record;
+}
+
+std::array<std::uint64_t, 3> decode_metadata_record(std::string_view value) {
+    Cursor cursor(value);
+    return {cursor.u64(), cursor.u64(), cursor.u64()};
+}
+
+std::string iterator_value(::rocksdb::Iterator& it) {
+    const auto slice = it.value();
+    return std::string(slice.data(), slice.size());
+}
+
+std::string iterator_key(::rocksdb::Iterator& it) {
+    const auto slice = it.key();
+    return std::string(slice.data(), slice.size());
+}
+
+template <typename Fn>
+void scan_prefix(const rocks::RocksDatabase& db, std::string_view column_family,
+                 std::string_view prefix, Fn&& fn) {
+    internal::scan_prefix_iterator(
+        "Failed to scan RocksDB prefix", prefix,
+        [&] { return db.new_iterator(column_family); }, std::forward<Fn>(fn));
+}
+
+}  // namespace
+
+IndexDatabase::IndexDatabase(const std::string& index_path,
+                             rocks::RocksDatabase::OpenMode open_mode)
+    : db_path_(internal::normalize_index_root(index_path)),
+      open_mode_(open_mode),
+      db_(rocks::RocksDBManager::instance().get_or_open(db_path_, open_mode_)) {
+    if (open_mode_ == rocks::RocksDatabase::OpenMode::ReadWrite) {
+        init_base_schema();
+    }
 }
 
-// ---------------------------------------------------------------------------
-// Query helpers
-// ---------------------------------------------------------------------------
+void IndexDatabase::init_base_schema() {
+    std::string value;
+    auto status = db_->get(schema_version_key(), &value);
+    if (status.IsNotFound()) {
+        status = db_->put(schema_version_key(),
+                          rocks::KeyCodec::encode_be32(kSchemaVersion));
+        if (!status.ok()) {
+            throw_db_error("Failed to initialize schema version", status);
+        }
+    } else if (!status.ok()) {
+        throw_db_error("Failed to read schema version", status);
+    }
+}
 
-// Returns true if the named table exists in the database.
-static bool table_exists(sqlite3* db, const char* table_name) {
-    SqliteStmt stmt(db,
-                    "SELECT 1 FROM sqlite_master "
-                    "WHERE type='table' AND name=?;");
-    stmt.bind_text(1, table_name);
-    return sqlite3_step(stmt) == SQLITE_ROW;
+void IndexDatabase::init_bloom_schema() {
+    // RocksDB column families are provisioned at DB open; bloom-specific
+    // schema initialization is intentionally a no-op.
+}
+
+void IndexDatabase::init_manifest_schema() {
+    // RocksDB column families are provisioned at DB open; manifest-specific
+    // schema initialization is intentionally a no-op.
 }
 
 bool IndexDatabase::has_bloom_data(int file_id) const {
-    if (!table_exists(db_.get(), "chunk_bloom_filters")) return false;
-    SqliteStmt stmt(db_.get(),
-                    "SELECT 1 FROM chunk_bloom_filters "
-                    "WHERE file_info_id=? LIMIT 1;");
-    stmt.bind_int(1, file_id);
-    return sqlite3_step(stmt) == SQLITE_ROW;
+    bool found = false;
+    auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "chunk_bloom", prefix,
+                [&found](::rocksdb::Iterator&) { found = true; });
+    return found;
 }
 
 bool IndexDatabase::has_manifest_data(int file_id) const {
-    if (!table_exists(db_.get(), "checkpoint_event_ranges")) return false;
-    SqliteStmt stmt(db_.get(),
-                    "SELECT 1 FROM checkpoint_event_ranges "
-                    "WHERE file_info_id=? LIMIT 1;");
-    stmt.bind_int(1, file_id);
-    return sqlite3_step(stmt) == SQLITE_ROW;
+    bool found = false;
+    std::string prefix("E|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "manifest", prefix,
+                [&found](::rocksdb::Iterator&) { found = true; });
+    return found;
 }
 
 int IndexDatabase::get_or_create_file_info(std::string_view path,
                                            std::uint64_t file_hash) {
-    {
-        SqliteStmt stmt(db_.get(),
-                        "SELECT id, hash FROM files WHERE logical_name=?;");
-        stmt.bind_text(1, path);
-        if (sqlite3_step(stmt) == SQLITE_ROW) {
-            int id = sqlite3_column_int(stmt, 0);
-            auto stored =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-            if (stored == file_hash) return id;
-            SqliteStmt del(db_.get(), "DELETE FROM files WHERE id=?;");
-            del.bind_int(1, id);
-            sqlite3_step(del);
+    const auto logical_name = std::string(path);
+    const auto lookup = file_lookup_key(logical_name);
+    std::string existing;
+    auto status = db_->get(lookup, &existing);
+    if (status.ok()) {
+        const auto file_id = decode_file_id(existing);
+        if (decode_file_hash(existing) == file_hash) {
+            return file_id;
         }
+        delete_file_data(file_id);
+        auto registry = encode_file_record(file_id, file_hash);
+        if (txn_batch_) {
+            status = db_->put(*txn_batch_, "default", lookup, registry);
+            if (!status.ok()) {
+                throw_db_error("Failed to update file registry", status);
+            }
+            status = db_->put(*txn_batch_, "default", file_reverse_key(file_id),
+                              logical_name);
+            if (!status.ok()) {
+                throw_db_error("Failed to update reverse file registry",
+                               status);
+            }
+        } else {
+            status = db_->put(lookup, registry);
+            if (!status.ok()) {
+                throw_db_error("Failed to update file registry", status);
+            }
+            status = db_->put(file_reverse_key(file_id), logical_name);
+            if (!status.ok()) {
+                throw_db_error("Failed to update reverse file registry",
+                               status);
+            }
+        }
+        return file_id;
+    }
+    if (!status.IsNotFound()) {
+        throw_db_error("Failed to query file registry", status);
     }
 
-    SqliteStmt stmt(
-        db_.get(),
-        "INSERT INTO files(logical_name, byte_size, mtime_unix, hash)"
-        " VALUES(?, 0, 0, ?);");
-    stmt.bind_text(1, path);
-    stmt.bind_int64(2, static_cast<std::int64_t>(file_hash));
-    if (sqlite3_step(stmt) != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert into files: " +
-                               std::string(sqlite3_errmsg(db_.get())));
+    std::uint32_t next_id = 1;
+    std::string next_value;
+    status = db_->get(next_file_id_key(), &next_value);
+    if (status.ok()) {
+        next_id = rocks::KeyCodec::decode_be32(next_value);
+    } else if (!status.IsNotFound()) {
+        throw_db_error("Failed to read next file id", status);
     }
-    return static_cast<int>(sqlite3_last_insert_rowid(db_.get()));
+
+    const auto file_id = static_cast<int>(next_id);
+    const auto new_registry = encode_file_record(file_id, file_hash);
+    const auto next_registry = rocks::KeyCodec::encode_be32(next_id + 1);
+
+    if (txn_batch_) {
+        status = db_->put(*txn_batch_, "default", lookup, new_registry);
+        if (!status.ok()) {
+            throw_db_error("Failed to insert file registry", status);
+        }
+        status = db_->put(*txn_batch_, "default", file_reverse_key(file_id),
+                          logical_name);
+        if (!status.ok()) {
+            throw_db_error("Failed to insert reverse file registry", status);
+        }
+        status =
+            db_->put(*txn_batch_, "default", next_file_id_key(), next_registry);
+        if (!status.ok()) {
+            throw_db_error("Failed to update next file id", status);
+        }
+    } else {
+        status = db_->put(lookup, new_registry);
+        if (!status.ok()) {
+            throw_db_error("Failed to insert file registry", status);
+        }
+        status = db_->put(file_reverse_key(file_id), logical_name);
+        if (!status.ok()) {
+            throw_db_error("Failed to insert reverse file registry", status);
+        }
+        status = db_->put(next_file_id_key(), next_registry);
+        if (!status.ok()) {
+            throw_db_error("Failed to update next file id", status);
+        }
+    }
+
+    return file_id;
 }
 
 int IndexDatabase::get_file_info_id(std::string_view path) const {
-    SqliteStmt stmt(db_.get(), "SELECT id FROM files WHERE logical_name=?;");
-    stmt.bind_text(1, path);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return sqlite3_column_int(stmt, 0);
+    std::string value;
+    auto status = db_->get(file_lookup_key(path), &value);
+    if (status.IsNotFound()) {
+        return -1;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to look up file info id", status);
+    }
+    return decode_file_id(value);
+}
+
+std::optional<std::uint64_t> IndexDatabase::get_file_hash(
+    std::string_view path) const {
+    std::string value;
+    auto status = db_->get(file_lookup_key(path), &value);
+    if (status.IsNotFound()) {
+        return std::nullopt;
     }
-    return -1;
+    if (!status.ok()) {
+        throw_db_error("Failed to look up file hash", status);
+    }
+    return decode_file_hash(value);
+}
+
+int IndexDatabase::find_file(std::string_view file_path) const {
+    return get_file_info_id(internal::get_logical_path(file_path));
 }
 
 void IndexDatabase::begin_transaction() {
-    exec_schema(db_.get(), "BEGIN TRANSACTION;", "begin_transaction");
+    txn_batch_ =
+        std::make_unique<rocks::RocksDatabase::Batch>(db_->begin_batch());
 }
 
 void IndexDatabase::commit_transaction() {
-    exec_schema(db_.get(), "COMMIT;", "commit_transaction");
+    if (!txn_batch_) {
+        return;
+    }
+    auto status = db_->commit_batch(*txn_batch_);
+    txn_batch_.reset();
+    if (!status.ok()) {
+        throw_db_error("Failed to commit RocksDB batch", status);
+    }
 }
 
-// ---------------------------------------------------------------------------
-// Bloom insert operations
-// ---------------------------------------------------------------------------
+void IndexDatabase::rollback_transaction() noexcept { txn_batch_.reset(); }
 
 void IndexDatabase::insert_chunk_bloom_filter(
     int file_id, std::uint64_t checkpoint_idx, std::string_view dimension,
     std::span<const unsigned char> blob_data, std::uint64_t num_entries) {
-    queries::insert_chunk_bloom_filter(
-        db_, file_id, checkpoint_idx, dimension, blob_data.data(),
-        static_cast<int>(blob_data.size()), num_entries);
+    const auto key = chunk_bloom_key(file_id, dimension, checkpoint_idx);
+    const auto value = encode_bloom_value(blob_data, num_entries);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "chunk_bloom", key, value)
+                             : db_->put(key, value, "chunk_bloom");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert chunk bloom filter", status);
+    }
 }
 
 void IndexDatabase::insert_chunk_bloom_filter(
     int file_id, std::uint64_t checkpoint_idx, std::string_view dimension,
     const void* blob_data, int blob_size, std::uint64_t num_entries) {
-    queries::insert_chunk_bloom_filter(db_, file_id, checkpoint_idx, dimension,
-                                       blob_data, blob_size, num_entries);
+    auto* bytes = static_cast<const unsigned char*>(blob_data);
+    insert_chunk_bloom_filter(file_id, checkpoint_idx, dimension,
+                              std::span<const unsigned char>(
+                                  bytes, static_cast<std::size_t>(blob_size)),
+                              num_entries);
 }
 
 void IndexDatabase::insert_file_bloom_filter(
     int file_id, std::string_view dimension,
     std::span<const unsigned char> blob_data, std::uint64_t num_entries) {
-    queries::insert_file_bloom_filter(db_, file_id, dimension, blob_data.data(),
-                                      static_cast<int>(blob_data.size()),
-                                      num_entries);
+    const auto key = file_bloom_key(file_id, dimension);
+    const auto value = encode_bloom_value(blob_data, num_entries);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "file_bloom", key, value)
+                             : db_->put(key, value, "file_bloom");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert file bloom filter", status);
+    }
 }
 
 void IndexDatabase::insert_file_bloom_filter(int file_id,
@@ -312,254 +775,734 @@ void IndexDatabase::insert_file_bloom_filter(int file_id,
                                              const void* blob_data,
                                              int blob_size,
                                              std::uint64_t num_entries) {
-    queries::insert_file_bloom_filter(db_, file_id, dimension, blob_data,
-                                      blob_size, num_entries);
+    auto* bytes = static_cast<const unsigned char*>(blob_data);
+    insert_file_bloom_filter(file_id, dimension,
+                             std::span<const unsigned char>(
+                                 bytes, static_cast<std::size_t>(blob_size)),
+                             num_entries);
 }
 
 void IndexDatabase::insert_chunk_statistics(int file_id,
                                             std::uint64_t checkpoint_idx,
                                             const ChunkStatistics& stats) {
-    queries::insert_chunk_statistics(db_, file_id, checkpoint_idx, stats);
+    const auto key = chunk_stats_key(file_id, checkpoint_idx);
+    const auto value = encode_chunk_statistics_value(stats);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "chunk_stats", key, value)
+                             : db_->put(key, value, "chunk_stats");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert chunk statistics", status);
+    }
+}
+
+void IndexDatabase::insert_checkpoint(int file_id,
+                                      const IndexerCheckpoint& checkpoint) {
+    const auto key = checkpoint_key(file_id, checkpoint.uc_offset,
+                                    checkpoint.checkpoint_idx);
+    const auto value = encode_checkpoint_value(checkpoint);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "checkpoints", key, value)
+                             : db_->put(key, value, "checkpoints");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert checkpoint", status);
+    }
 }
 
 void IndexDatabase::insert_index_dimension(int file_id,
                                            std::string_view dimension) {
-    queries::insert_index_dimension(db_, file_id, dimension);
+    const auto key = make_dimension_key(file_id, dimension);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "dimensions", key, "")
+                             : db_->put(key, "", "dimensions");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert index dimension", status);
+    }
+}
+
+void IndexDatabase::insert_hash_resolution(int file_id,
+                                           std::string_view dimension,
+                                           std::string_view hash_value,
+                                           std::string_view resolved_value) {
+    const auto owner = make_hash_owner_key(file_id, dimension, hash_value);
+    const auto forward = make_hash_forward_key(dimension, hash_value);
+    const auto reverse =
+        make_hash_reverse_key(dimension, resolved_value, hash_value);
+    if (txn_batch_) {
+        db_->put(*txn_batch_, "dimensions", owner, std::string(resolved_value));
+        db_->put(*txn_batch_, "dimensions", forward,
+                 std::string(resolved_value));
+        db_->put(*txn_batch_, "dimensions", reverse, "");
+        return;
+    }
+    auto status = db_->put(owner, resolved_value, "dimensions");
+    if (!status.ok()) throw_db_error("Failed to insert hash owner", status);
+    status = db_->put(forward, resolved_value, "dimensions");
+    if (!status.ok())
+        throw_db_error("Failed to insert hash resolution", status);
+    status = db_->put(reverse, "", "dimensions");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert reverse hash resolution", status);
+    }
 }
 
 void IndexDatabase::insert_chunk_dimension_stats(
     int file_id, std::uint64_t checkpoint_idx, const ChunkDimensionStats& stats,
     std::size_t value_counts_cap) {
-    queries::insert_chunk_dimension_stats(db_, file_id, checkpoint_idx, stats,
-                                          value_counts_cap);
+    const auto key =
+        chunk_dim_stats_key(file_id, checkpoint_idx, stats.dimension);
+    const auto value =
+        encode_chunk_dimension_stats_value(stats, value_counts_cap);
+    auto status = txn_batch_
+                      ? db_->put(*txn_batch_, "chunk_dim_stats", key, value)
+                      : db_->put(key, value, "chunk_dim_stats");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert chunk dimension stats", status);
+    }
 }
 
-void IndexDatabase::insert_hash_resolution(int file_id,
-                                           std::string_view dimension,
-                                           std::string_view hash_value,
-                                           std::string_view resolved_value) {
-    queries::insert_hash_resolution(db_, file_id, dimension, hash_value,
-                                    resolved_value);
+void IndexDatabase::insert_tar_archive_metadata(int file_id,
+                                                std::string_view archive_name,
+                                                std::uint64_t checkpoint_size,
+                                                std::uint64_t total_lines,
+                                                std::uint64_t total_uc_size,
+                                                std::uint64_t total_files) {
+    const auto key = tar_archive_key(file_id);
+    const auto value = encode_tar_archive_value(
+        archive_name, checkpoint_size, total_lines, total_uc_size, total_files);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "archives", key, value)
+                             : db_->put(key, value, "archives");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert tar archive metadata", status);
+    }
 }
 
-// ---------------------------------------------------------------------------
-// Bloom query operations
-// ---------------------------------------------------------------------------
+void IndexDatabase::insert_tar_file(int file_id, const TarFileRecord& record) {
+    const auto key =
+        tar_file_key(file_id, record.uncompressed_offset, record.file_name);
+    const auto value = encode_tar_file_value(record);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "tar_files", key, value)
+                             : db_->put(key, value, "tar_files");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert tar file metadata", status);
+    }
+}
 
 std::vector<IndexDatabase::ChunkBloomResult>
 IndexDatabase::query_chunk_bloom_filters(int file_id,
                                          std::string_view dimension) const {
-    return queries::query_chunk_bloom_filters(db_, file_id, dimension);
+    std::vector<ChunkBloomResult> results;
+    std::string prefix = prefix_for_file(file_id);
+    prefix.append(dimension);
+    prefix.push_back('\0');
+    scan_prefix(*db_, "chunk_bloom", prefix, [&](::rocksdb::Iterator& it) {
+        results.push_back(decode_chunk_bloom(
+            iterator_key(it), iterator_value(it), prefix.size() - 1));
+    });
+    return results;
 }
 
 std::unordered_map<std::string, std::vector<IndexDatabase::ChunkBloomResult>>
 IndexDatabase::query_chunk_bloom_filters_batch(
     int file_id, const std::vector<std::string>& dimensions) const {
-    return queries::query_chunk_bloom_filters_batch(db_, file_id, dimensions);
+    std::unordered_map<std::string, std::vector<ChunkBloomResult>> results;
+    for (const auto& dimension : dimensions) {
+        results.emplace(dimension,
+                        query_chunk_bloom_filters(file_id, dimension));
+    }
+    return results;
 }
 
 std::optional<IndexDatabase::FileBloomResult>
 IndexDatabase::query_file_bloom_filter(int file_id,
                                        std::string_view dimension) const {
-    return queries::query_file_bloom_filter(db_, file_id, dimension);
+    std::string value;
+    auto status =
+        db_->get(file_bloom_key(file_id, dimension), &value, "file_bloom");
+    if (status.IsNotFound()) {
+        return std::nullopt;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to query file bloom filter", status);
+    }
+    return decode_file_bloom(value);
 }
 
 std::unordered_map<std::string, IndexDatabase::FileBloomResult>
 IndexDatabase::query_file_bloom_filters_batch(
     int file_id, const std::vector<std::string>& dimensions) const {
-    return queries::query_file_bloom_filters_batch(db_, file_id, dimensions);
+    std::unordered_map<std::string, FileBloomResult> results;
+    for (const auto& dimension : dimensions) {
+        auto bloom = query_file_bloom_filter(file_id, dimension);
+        if (bloom) {
+            results.emplace(dimension, std::move(*bloom));
+        }
+    }
+    return results;
 }
 
 std::vector<std::string> IndexDatabase::query_index_dimensions(
     int file_id) const {
-    return queries::query_index_dimensions(db_, file_id);
+    std::vector<std::string> dimensions;
+    std::string prefix("d|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "dimensions", prefix, [&](::rocksdb::Iterator& it) {
+        auto key = iterator_key(it);
+        dimensions.push_back(key.substr(prefix.size()));
+    });
+    return dimensions;
 }
 
 bool IndexDatabase::has_index_dimension(int file_id,
                                         std::string_view dimension) const {
-    return queries::has_index_dimension(db_, file_id, dimension);
+    std::string value;
+    return db_
+        ->get(make_dimension_key(file_id, dimension), &value, "dimensions")
+        .ok();
 }
 
 std::vector<IndexDatabase::ChunkStatisticsResult>
 IndexDatabase::query_chunk_statistics(int file_id) const {
-    return queries::query_chunk_statistics(db_, file_id);
+    std::vector<ChunkStatisticsResult> results;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "chunk_stats", prefix, [&](::rocksdb::Iterator& it) {
+        ChunkStatisticsResult result;
+        auto key = iterator_key(it);
+        result.checkpoint_idx =
+            rocks::KeyCodec::decode_be64(std::string_view(key).substr(4, 8));
+        result.stats = decode_chunk_statistics_value(iterator_value(it));
+        results.push_back(std::move(result));
+    });
+    std::sort(results.begin(), results.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return lhs.checkpoint_idx < rhs.checkpoint_idx;
+              });
+    return results;
+}
+
+bool IndexDatabase::find_checkpoint(int file_id, std::size_t target_offset,
+                                    IndexerCheckpoint& checkpoint) const {
+    if (target_offset == 0 || file_id < 0) {
+        return false;
+    }
+
+    bool found = false;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "checkpoints", prefix, [&](::rocksdb::Iterator& it) {
+        auto decoded = decode_checkpoint(iterator_key(it), iterator_value(it));
+        if (decoded.uc_offset <= target_offset &&
+            (!found || decoded.uc_offset >= checkpoint.uc_offset)) {
+            checkpoint = std::move(decoded);
+            found = true;
+        }
+    });
+    return found;
+}
+
+std::vector<IndexDatabase::IndexerCheckpoint> IndexDatabase::query_checkpoints(
+    int file_id) const {
+    std::vector<IndexerCheckpoint> checkpoints;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "checkpoints", prefix, [&](::rocksdb::Iterator& it) {
+        checkpoints.push_back(
+            decode_checkpoint(iterator_key(it), iterator_value(it)));
+    });
+    std::sort(checkpoints.begin(), checkpoints.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return std::tie(lhs.uc_offset, lhs.checkpoint_idx) <
+                         std::tie(rhs.uc_offset, rhs.checkpoint_idx);
+              });
+    return checkpoints;
+}
+
+std::optional<IndexDatabase::TarArchiveMetadata>
+IndexDatabase::query_tar_archive_metadata(int file_id) const {
+    std::string value;
+    auto status = db_->get(tar_archive_key(file_id), &value, "archives");
+    if (status.IsNotFound()) {
+        return std::nullopt;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to read tar archive metadata", status);
+    }
+    return decode_tar_archive_value(value);
+}
+
+std::vector<IndexDatabase::TarFileRecord> IndexDatabase::query_tar_files(
+    int file_id) const {
+    std::vector<TarFileRecord> files;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "tar_files", prefix, [&](::rocksdb::Iterator& it) {
+        files.push_back(decode_tar_file(iterator_key(it), iterator_value(it)));
+    });
+    std::sort(files.begin(), files.end(), [](const auto& lhs, const auto& rhs) {
+        return std::tie(lhs.uncompressed_offset, lhs.file_name) <
+               std::tie(rhs.uncompressed_offset, rhs.file_name);
+    });
+    return files;
+}
+
+bool IndexDatabase::find_tar_file(int file_id, std::string_view file_name,
+                                  TarFileRecord& record) const {
+    for (auto& entry : query_tar_files(file_id)) {
+        if (entry.file_name == file_name) {
+            record = std::move(entry);
+            return true;
+        }
+    }
+    return false;
+}
+
+std::vector<IndexDatabase::TarFileRecord>
+IndexDatabase::query_tar_files_in_range(int file_id, std::uint64_t start_offset,
+                                        std::uint64_t end_offset) const {
+    std::vector<TarFileRecord> files;
+    for (auto& entry : query_tar_files(file_id)) {
+        const auto entry_end = entry.uncompressed_offset + entry.file_size;
+        if (entry.uncompressed_offset < end_offset &&
+            entry_end > start_offset) {
+            files.push_back(std::move(entry));
+        }
+    }
+    return files;
+}
+
+std::vector<IndexDatabase::IndexerCheckpoint>
+IndexDatabase::query_checkpoints_for_line_range(int file_id,
+                                                std::uint64_t start_line,
+                                                std::uint64_t end_line) const {
+    std::vector<IndexerCheckpoint> checkpoints;
+    for (auto& checkpoint : query_checkpoints(file_id)) {
+        if ((checkpoint.first_line_num <= end_line &&
+             checkpoint.last_line_num >= start_line) ||
+            (checkpoint.first_line_num <= start_line &&
+             checkpoint.last_line_num >= end_line)) {
+            checkpoints.push_back(std::move(checkpoint));
+        }
+    }
+    return checkpoints;
 }
 
 IndexDatabase::TimeBounds IndexDatabase::query_time_bounds(int file_id) const {
-    return queries::query_time_bounds(db_, file_id);
+    TimeBounds bounds;
+    for (const auto& row : query_chunk_statistics(file_id)) {
+        const auto min_ts = row.stats.min_timestamp_us;
+        const auto max_ts = row.stats.max_timestamp_us;
+        if (min_ts == std::numeric_limits<std::uint64_t>::max() ||
+            max_ts == 0) {
+            continue;
+        }
+        bounds.valid = true;
+        bounds.min_timestamp_us = std::min(bounds.min_timestamp_us, min_ts);
+        bounds.max_timestamp_us = std::max(bounds.max_timestamp_us, max_ts);
+    }
+    return bounds;
 }
 
 std::vector<IndexDatabase::ChunkDimensionStatsResult>
 IndexDatabase::query_chunk_dimension_stats(int file_id) const {
-    return queries::query_chunk_dimension_stats(db_, file_id);
+    std::vector<ChunkDimensionStatsResult> results;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "chunk_dim_stats", prefix, [&](::rocksdb::Iterator& it) {
+        results.push_back(decode_chunk_dimension_stats_value(
+            iterator_key(it), iterator_value(it)));
+    });
+    std::sort(results.begin(), results.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return std::tie(lhs.checkpoint_idx, lhs.dimension) <
+                         std::tie(rhs.checkpoint_idx, rhs.dimension);
+              });
+    return results;
 }
 
 std::vector<IndexDatabase::ChunkDimensionStatsResult>
 IndexDatabase::query_chunk_dimension_stats_for_dimension(
     int file_id, std::string_view dimension) const {
-    return queries::query_chunk_dimension_stats_for_dimension(db_, file_id,
-                                                              dimension);
+    std::vector<ChunkDimensionStatsResult> results;
+    const auto prefix = prefix_for_file(file_id);
+    scan_prefix(*db_, "chunk_dim_stats", prefix, [&](::rocksdb::Iterator& it) {
+        auto decoded = decode_chunk_dimension_stats_value(iterator_key(it),
+                                                          iterator_value(it));
+        if (decoded.dimension == dimension) {
+            results.push_back(std::move(decoded));
+        }
+    });
+    std::sort(results.begin(), results.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return lhs.checkpoint_idx < rhs.checkpoint_idx;
+              });
+    return results;
 }
 
 std::optional<std::string> IndexDatabase::query_resolved_by_hash(
     std::string_view dimension, std::string_view hash_value) const {
-    return queries::query_resolved_by_hash(db_, dimension, hash_value);
+    std::string value;
+    auto status = db_->get(make_hash_forward_key(dimension, hash_value), &value,
+                           "dimensions");
+    if (status.IsNotFound()) {
+        return std::nullopt;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to query resolved hash", status);
+    }
+    return value;
 }
 
 std::vector<std::string> IndexDatabase::query_hash_by_resolved(
     std::string_view dimension, std::string_view resolved_value) const {
-    return queries::query_hash_by_resolved(db_, dimension, resolved_value);
+    std::vector<std::string> hashes;
+    auto prefix = make_hash_reverse_key(dimension, resolved_value, "");
+    scan_prefix(*db_, "dimensions", prefix, [&](::rocksdb::Iterator& it) {
+        auto key = iterator_key(it);
+        hashes.push_back(key.substr(prefix.size()));
+    });
+    return hashes;
 }
 
-// ---------------------------------------------------------------------------
-// Bloom delete operations
-// ---------------------------------------------------------------------------
-
 void IndexDatabase::delete_chunk_bloom_filters(int file_id,
                                                std::string_view dimension) {
-    queries::delete_chunk_bloom_filters(db_, file_id, dimension);
+    std::vector<std::string> keys;
+    std::string prefix = prefix_for_file(file_id);
+    prefix.append(dimension);
+    prefix.push_back('\0');
+    scan_prefix(*db_, "chunk_bloom", prefix, [&](::rocksdb::Iterator& it) {
+        keys.push_back(iterator_key(it));
+    });
+    for (const auto& key : keys) {
+        auto status = txn_batch_ ? db_->del(*txn_batch_, "chunk_bloom", key)
+                                 : db_->del(key, "chunk_bloom");
+        if (!status.ok())
+            throw_db_error("Failed to delete chunk bloom", status);
+    }
 }
 
 void IndexDatabase::delete_file_bloom_filter(int file_id,
                                              std::string_view dimension) {
-    queries::delete_file_bloom_filter(db_, file_id, dimension);
+    auto status =
+        txn_batch_ ? db_->del(*txn_batch_, "file_bloom",
+                              file_bloom_key(file_id, dimension))
+                   : db_->del(file_bloom_key(file_id, dimension), "file_bloom");
+    if (!status.ok() && !status.IsNotFound()) {
+        throw_db_error("Failed to delete file bloom", status);
+    }
 }
 
 void IndexDatabase::delete_chunk_statistics(int file_id) {
-    queries::delete_chunk_statistics(db_, file_id);
+    std::vector<std::string> keys;
+    scan_prefix(
+        *db_, "chunk_stats", prefix_for_file(file_id),
+        [&](::rocksdb::Iterator& it) { keys.push_back(iterator_key(it)); });
+    for (const auto& key : keys) {
+        auto status = txn_batch_ ? db_->del(*txn_batch_, "chunk_stats", key)
+                                 : db_->del(key, "chunk_stats");
+        if (!status.ok()) {
+            throw_db_error("Failed to delete chunk statistics", status);
+        }
+    }
 }
 
 void IndexDatabase::delete_chunk_dimension_stats(int file_id) {
-    queries::delete_chunk_dimension_stats(db_, file_id);
+    std::vector<std::string> keys;
+    scan_prefix(
+        *db_, "chunk_dim_stats", prefix_for_file(file_id),
+        [&](::rocksdb::Iterator& it) { keys.push_back(iterator_key(it)); });
+    for (const auto& key : keys) {
+        auto status = txn_batch_ ? db_->del(*txn_batch_, "chunk_dim_stats", key)
+                                 : db_->del(key, "chunk_dim_stats");
+        if (!status.ok()) {
+            throw_db_error("Failed to delete chunk dimension stats", status);
+        }
+    }
 }
 
 void IndexDatabase::delete_hash_resolutions(int file_id) {
-    queries::delete_hash_resolutions(db_, file_id);
+    std::vector<std::pair<std::string, std::string>> owned;
+    std::string prefix("o|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    prefix.push_back('\0');
+    scan_prefix(*db_, "dimensions", prefix, [&](::rocksdb::Iterator& it) {
+        owned.emplace_back(iterator_key(it), iterator_value(it));
+    });
+    for (const auto& [owner_key, resolved] : owned) {
+        if (owner_key.size() <= prefix.size()) {
+            DFTRACER_UTILS_LOG_WARN(
+                "Skipping malformed owner key for file_id=%d", file_id);
+            continue;
+        }
+        const std::string_view payload(owner_key.data() + prefix.size(),
+                                       owner_key.size() - prefix.size());
+        auto split = payload.find('\0');
+        if (split == std::string_view::npos) {
+            DFTRACER_UTILS_LOG_WARN(
+                "Skipping malformed owner key payload for file_id=%d", file_id);
+            continue;
+        }
+        auto dimension = payload.substr(0, split);
+        auto hash_value = payload.substr(split + 1);
+        auto forward = make_hash_forward_key(dimension, hash_value);
+        auto reverse = make_hash_reverse_key(dimension, resolved, hash_value);
+        const auto del_one = [&](std::string_view key) {
+            auto status = txn_batch_ ? db_->del(*txn_batch_, "dimensions", key)
+                                     : db_->del(key, "dimensions");
+            if (!status.ok() && !status.IsNotFound()) {
+                throw_db_error("Failed to delete hash resolution", status);
+            }
+        };
+        del_one(owner_key);
+        del_one(forward);
+        del_one(reverse);
+    }
 }
 
-// ---------------------------------------------------------------------------
-// Manifest insert operations
-// ---------------------------------------------------------------------------
-
 void IndexDatabase::insert_event_range(
     int file_id, std::uint64_t checkpoint_idx, std::string_view cat,
     std::string_view name, std::span<const std::uint32_t> line_numbers) {
-    queries::insert_event_range(db_, file_id, checkpoint_idx, cat, name,
-                                line_numbers);
+    const auto key = manifest_event_key(file_id, checkpoint_idx, cat, name);
+    const auto value = encode_event_range_value(line_numbers);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "manifest", key, value)
+                             : db_->put(key, value, "manifest");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert event range", status);
+    }
 }
 
 void IndexDatabase::insert_event_range(
     int file_id, std::uint64_t checkpoint_idx, std::string_view cat,
     std::string_view name, const std::vector<std::uint32_t>& line_numbers) {
-    queries::insert_event_range(db_, file_id, checkpoint_idx, cat, name,
-                                line_numbers);
+    insert_event_range(file_id, checkpoint_idx, cat, name,
+                       std::span<const std::uint32_t>(line_numbers));
 }
 
 void IndexDatabase::insert_metadata_lines(
     int file_id, std::uint64_t checkpoint_idx, std::string_view meta_type,
     std::span<const std::uint32_t> line_numbers) {
-    queries::insert_metadata_lines(db_, file_id, checkpoint_idx, meta_type,
-                                   line_numbers);
+    const auto key = manifest_metadata_key(file_id, checkpoint_idx, meta_type);
+    const auto value = encode_metadata_value(line_numbers);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "manifest", key, value)
+                             : db_->put(key, value, "manifest");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert metadata lines", status);
+    }
 }
 
 void IndexDatabase::insert_metadata_lines(
     int file_id, std::uint64_t checkpoint_idx, std::string_view meta_type,
     const std::vector<std::uint32_t>& line_numbers) {
-    queries::insert_metadata_lines(db_, file_id, checkpoint_idx, meta_type,
-                                   line_numbers);
+    insert_metadata_lines(file_id, checkpoint_idx, meta_type,
+                          std::span<const std::uint32_t>(line_numbers));
 }
 
-// ---------------------------------------------------------------------------
-// Manifest query operations
-// ---------------------------------------------------------------------------
-
 std::vector<IndexDatabase::EventRangeResult> IndexDatabase::query_event_ranges(
     int file_id) const {
-    return queries::query_event_ranges(db_, file_id);
+    std::vector<EventRangeResult> results;
+    std::string prefix("E|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "manifest", prefix, [&](::rocksdb::Iterator& it) {
+        auto key = iterator_key(it);
+        auto payload = std::string_view(key).substr(2 + 4 + 8);
+        auto split = payload.find('\0');
+        if (split == std::string_view::npos) {
+            throw std::runtime_error("Corrupt manifest event key");
+        }
+        EventRangeResult result;
+        result.checkpoint_idx =
+            rocks::KeyCodec::decode_be64(std::string_view(key).substr(6, 8));
+        result.cat = std::string(payload.substr(0, split));
+        result.name = std::string(payload.substr(split + 1));
+        auto value = iterator_value(it);
+        Cursor cursor(value);
+        result.event_count = cursor.u64();
+        result.line_numbers = decode_line_numbers(cursor);
+        results.push_back(std::move(result));
+    });
+    std::sort(results.begin(), results.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return std::tie(lhs.checkpoint_idx, lhs.cat, lhs.name) <
+                         std::tie(rhs.checkpoint_idx, rhs.cat, rhs.name);
+              });
+    return results;
 }
 
 std::vector<IndexDatabase::EventRangeResult>
 IndexDatabase::query_event_ranges_for_checkpoint(
     int file_id, std::uint64_t checkpoint_idx) const {
-    return queries::query_event_ranges_for_checkpoint(db_, file_id,
-                                                      checkpoint_idx);
+    std::vector<EventRangeResult> results;
+    for (auto& range : query_event_ranges(file_id)) {
+        if (range.checkpoint_idx == checkpoint_idx) {
+            results.push_back(std::move(range));
+        }
+    }
+    return results;
 }
 
 std::vector<IndexDatabase::MetadataLinesResult>
 IndexDatabase::query_metadata_lines(int file_id) const {
-    return queries::query_metadata_lines(db_, file_id);
+    std::vector<MetadataLinesResult> results;
+    std::string prefix("M|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "manifest", prefix, [&](::rocksdb::Iterator& it) {
+        auto key = iterator_key(it);
+        MetadataLinesResult result;
+        result.checkpoint_idx =
+            rocks::KeyCodec::decode_be64(std::string_view(key).substr(6, 8));
+        result.meta_type = key.substr(14);
+        auto value = iterator_value(it);
+        Cursor cursor(value);
+        result.line_numbers = decode_line_numbers(cursor);
+        results.push_back(std::move(result));
+    });
+    std::sort(results.begin(), results.end(),
+              [](const auto& lhs, const auto& rhs) {
+                  return std::tie(lhs.checkpoint_idx, lhs.meta_type) <
+                         std::tie(rhs.checkpoint_idx, rhs.meta_type);
+              });
+    return results;
 }
 
 std::vector<IndexDatabase::MetadataLinesResult>
 IndexDatabase::query_metadata_lines_for_checkpoint(
     int file_id, std::uint64_t checkpoint_idx) const {
-    return queries::query_metadata_lines_for_checkpoint(db_, file_id,
-                                                        checkpoint_idx);
+    std::vector<MetadataLinesResult> results;
+    for (auto& lines : query_metadata_lines(file_id)) {
+        if (lines.checkpoint_idx == checkpoint_idx) {
+            results.push_back(std::move(lines));
+        }
+    }
+    return results;
 }
 
-// ---------------------------------------------------------------------------
-// Manifest delete operations
-// ---------------------------------------------------------------------------
-
 void IndexDatabase::delete_event_ranges(int file_id) {
-    queries::delete_event_ranges(db_, file_id);
+    std::vector<std::string> keys;
+    std::string prefix("E|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "manifest", prefix, [&](::rocksdb::Iterator& it) {
+        keys.push_back(iterator_key(it));
+    });
+    for (const auto& key : keys) {
+        auto status = txn_batch_ ? db_->del(*txn_batch_, "manifest", key)
+                                 : db_->del(key, "manifest");
+        if (!status.ok()) {
+            throw_db_error("Failed to delete manifest event ranges", status);
+        }
+    }
 }
 
 void IndexDatabase::delete_metadata_lines(int file_id) {
-    queries::delete_metadata_lines(db_, file_id);
+    std::vector<std::string> keys;
+    std::string prefix("M|");
+    rocks::KeyCodec::append_be32(prefix, static_cast<std::uint32_t>(file_id));
+    scan_prefix(*db_, "manifest", prefix, [&](::rocksdb::Iterator& it) {
+        keys.push_back(iterator_key(it));
+    });
+    for (const auto& key : keys) {
+        auto status = txn_batch_ ? db_->del(*txn_batch_, "manifest", key)
+                                 : db_->del(key, "manifest");
+        if (!status.ok()) {
+            throw_db_error("Failed to delete metadata lines", status);
+        }
+    }
 }
 
 std::uint64_t IndexDatabase::get_total_events(int file_id) const {
-    // Exact count from chunk_statistics (populated when bloom was built)
-    try {
-        SqliteStmt stmt(db_,
-                        "SELECT SUM(total_events) FROM chunk_statistics "
-                        "WHERE file_info_id = ?;");
-        stmt.bind_int(1, file_id);
-        if (sqlite3_step(stmt) == SQLITE_ROW &&
-            sqlite3_column_type(stmt, 0) != SQLITE_NULL) {
-            auto val = sqlite3_column_int64(stmt, 0);
-            if (val > 0) return static_cast<std::uint64_t>(val);
-        }
-    } catch (...) {
-        // Table may not exist if bloom was never built
+    std::uint64_t total = 0;
+    for (const auto& row : query_chunk_statistics(file_id)) {
+        total += row.stats.total_events;
     }
-    // Fallback: num_lines (approximate, might include array delimiters)
-    return get_num_lines(file_id);
+    return total > 0 ? total : get_num_lines(file_id);
 }
 
-int IndexDatabase::find_file(std::string_view file_path) const {
-    auto logical = internal::get_logical_path(file_path);
-    return get_file_info_id(logical);
+void IndexDatabase::insert_file_metadata(int file_id,
+                                         std::uint64_t checkpoint_size,
+                                         std::uint64_t total_lines,
+                                         std::uint64_t total_uc_size) {
+    const auto key = metadata_key(file_id);
+    const auto value =
+        encode_metadata_record(checkpoint_size, total_lines, total_uc_size);
+    auto status = txn_batch_ ? db_->put(*txn_batch_, "metadata", key, value)
+                             : db_->put(key, value, "metadata");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert metadata", status);
+    }
+}
+
+std::uint64_t IndexDatabase::get_checkpoint_size(int file_id) const {
+    std::string value;
+    auto status = db_->get(metadata_key(file_id), &value, "metadata");
+    if (status.IsNotFound()) {
+        return 0;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to read metadata", status);
+    }
+    return decode_metadata_record(value)[0];
 }
 
 std::uint64_t IndexDatabase::get_num_lines(int file_id) const {
-    SqliteStmt stmt(db_, "SELECT total_lines FROM metadata WHERE file_id = ?;");
-    stmt.bind_int(1, file_id);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
+    std::string value;
+    auto status = db_->get(metadata_key(file_id), &value, "metadata");
+    if (status.IsNotFound()) {
+        return 0;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to read metadata", status);
     }
-    return 0;
+    return decode_metadata_record(value)[1];
 }
 
 std::uint64_t IndexDatabase::get_max_bytes(int file_id) const {
-    // Primary: metadata table has the authoritative total uncompressed size
-    SqliteStmt stmt(db_,
-                    "SELECT total_uc_size FROM metadata WHERE file_id = ?;");
-    stmt.bind_int(1, file_id);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        auto val = sqlite3_column_int64(stmt, 0);
-        if (val > 0) return static_cast<std::uint64_t>(val);
-    }
-    // Fallback: sum from checkpoints
-    SqliteStmt stmt2(
-        db_,
-        "SELECT MAX(uc_offset + uc_size) FROM checkpoints WHERE file_id = ?;");
-    stmt2.bind_int(1, file_id);
-    if (sqlite3_step(stmt2) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt2, 0));
-    }
-    return 0;
+    std::string value;
+    auto status = db_->get(metadata_key(file_id), &value, "metadata");
+    if (status.IsNotFound()) {
+        return 0;
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to read metadata", status);
+    }
+    return decode_metadata_record(value)[2];
+}
+
+void IndexDatabase::delete_file_data(int file_id) {
+    auto delete_default_key = [&](std::string_view key) {
+        auto del_status =
+            txn_batch_ ? db_->del(*txn_batch_, "default", key) : db_->del(key);
+        if (!del_status.ok() && !del_status.IsNotFound()) {
+            throw_db_error("Failed to delete file registry entry", del_status);
+        }
+    };
+
+    const auto logical_name_key = file_reverse_key(file_id);
+    std::string logical_name;
+    auto status = db_->get(logical_name_key, &logical_name);
+    if (status.ok()) {
+        delete_default_key(file_lookup_key(logical_name));
+        delete_default_key(logical_name_key);
+    } else if (!status.IsNotFound()) {
+        throw_db_error("Failed to read reverse file registry", status);
+    }
+
+    auto delete_prefix = [&](std::string_view cf, std::string_view prefix) {
+        std::vector<std::string> keys;
+        scan_prefix(*db_, cf, prefix, [&](::rocksdb::Iterator& it) {
+            keys.push_back(iterator_key(it));
+        });
+        for (const auto& key : keys) {
+            auto del_status =
+                txn_batch_ ? db_->del(*txn_batch_, cf, key) : db_->del(key, cf);
+            if (!del_status.ok() && !del_status.IsNotFound()) {
+                throw_db_error("Failed to delete file-scoped RocksDB data",
+                               del_status);
+            }
+        }
+    };
+
+    delete_prefix("checkpoints", prefix_for_file(file_id));
+    delete_prefix("metadata", prefix_for_file(file_id));
+    delete_prefix("archives", prefix_for_file(file_id));
+    delete_prefix("tar_files", prefix_for_file(file_id));
+    delete_prefix("chunk_bloom", prefix_for_file(file_id));
+    delete_prefix("file_bloom", prefix_for_file(file_id));
+    delete_prefix("chunk_stats", prefix_for_file(file_id));
+    delete_prefix("chunk_dim_stats", prefix_for_file(file_id));
+    delete_prefix("dimensions", std::string("d|") + prefix_for_file(file_id));
+    delete_prefix("manifest", std::string("E|") + prefix_for_file(file_id));
+    delete_prefix("manifest", std::string("M|") + prefix_for_file(file_id));
+    delete_hash_resolutions(file_id);
 }
 
 }  // namespace dftracer::utils::utilities::indexer
diff --git a/src/dftracer/utils/utilities/indexer/internal/checkpoint_size.h b/src/dftracer/utils/utilities/indexer/internal/checkpoint_size.h
index de7991e7..81643312 100644
--- a/src/dftracer/utils/utilities/indexer/internal/checkpoint_size.h
+++ b/src/dftracer/utils/utilities/indexer/internal/checkpoint_size.h
@@ -12,7 +12,7 @@ namespace dftracer::utils::utilities::indexer::internal {
 std::size_t determine_checkpoint_size(
     std::size_t user_checkpoint_size, const std::string& path,
     // Tunables:
-    std::size_t max_chk = (512u << 20), std::size_t max_parts = 100000000,
+    std::size_t max_parts = 100000000, std::size_t max_chk = (512u << 20),
     // default:
     std::size_t window = constants::indexer::ZLIB_WINDOW_SIZE);
 
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.cpp
index 7de88c5f..68f102b5 100644
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.cpp
+++ b/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.cpp
@@ -3,43 +3,58 @@
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
-#include <dftracer/utils/core/sqlite/async.h>
-#include <dftracer/utils/core/sqlite/statement.h>
+#include <dftracer/utils/core/rocksdb/async.h>
+#include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/index_visitor.h>
 #include <dftracer/utils/utilities/indexer/internal/checkpoint_size.h>
 #include <dftracer/utils/utilities/indexer/internal/common/gzip_checkpointer.h>
 #include <dftracer/utils/utilities/indexer/internal/common/gzip_inflater.h>
 #include <dftracer/utils/utilities/indexer/internal/error.h>
 #include <dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
+#include <dftracer/utils/utilities/indexer/internal/transaction_scope.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include <cstdio>
+#include <mutex>
 #include <string>
 
 namespace dftracer::utils::utilities::indexer::internal::gzip {
 
-using dftracer::utils::sqlite::SqliteStmt;
-
-// Import the SQL_SCHEMA from constants
-extern const char *const &SQL_SCHEMA;
+using dftracer::utils::utilities::indexer::IndexDatabase;
+namespace rocks = dftracer::utils::rocksdb;
+
+namespace {
+
+void finalize_checkpoints(std::vector<IndexerCheckpoint>& checkpoints,
+                          std::uint64_t total_uc_size,
+                          std::uint64_t total_lines,
+                          std::uint64_t tail_line_count) {
+    for (std::size_t i = 0; i < checkpoints.size(); ++i) {
+        auto& checkpoint = checkpoints[i];
+        const std::uint64_t next_uc_offset = (i + 1 < checkpoints.size())
+                                                 ? checkpoints[i + 1].uc_offset
+                                                 : total_uc_size;
+        const std::uint64_t next_c_offset = (i + 1 < checkpoints.size())
+                                                ? checkpoints[i + 1].c_offset
+                                                : checkpoint.c_offset;
+        checkpoint.uc_size = next_uc_offset - checkpoint.uc_offset;
+        checkpoint.c_size = next_c_offset - checkpoint.c_offset;
+    }
 
-static void init_schema(const SqliteDatabase &db) {
-    DFTRACER_UTILS_LOG_DEBUG("%s", "Initializing GZIP indexer schema");
-    int rc = sqlite3_exec(db.get(), SQL_SCHEMA, NULL, NULL, NULL);
-    if (rc != SQLITE_OK) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to initialize schema: " +
-                               std::string(sqlite3_errmsg(db.get())));
+    if (tail_line_count > 0 && total_lines > 0 && !checkpoints.empty()) {
+        auto& last = checkpoints.back();
+        last.last_line_num = total_lines;
+        last.num_lines += tail_line_count;
     }
 }
 
 static dftracer::utils::coro::CoroTask<bool> process_chunks(
-    int fd, const SqliteDatabase &db, int file_id, std::uint64_t ckpt_size,
-    std::uint64_t &total_lines, std::uint64_t &total_uc_size,
-    std::uint64_t &tail_line_count, const Indexer::VisitorList &visitors) {
+    int fd, std::uint64_t ckpt_size, std::uint64_t& total_lines,
+    std::uint64_t& total_uc_size, std::uint64_t& tail_line_count,
+    std::vector<IndexerCheckpoint>& checkpoints,
+    const Indexer::VisitorList& visitors) {
     GzipInflater inflater;
     off_t offset = 0;
     if (!(co_await inflater.initialize(fd))) {
@@ -50,9 +65,8 @@ static dftracer::utils::coro::CoroTask<bool> process_chunks(
     std::uint64_t current_uc_offset = 0;
     std::uint64_t next_ckpt_offset = ckpt_size;
     std::uint64_t line_count_in_chunk = 0;
-    std::uint64_t first_line_in_chunk = total_lines + 1;  // 1-based
+    std::uint64_t first_line_in_chunk = total_lines + 1;
 
-    // Partial-line accumulator for visitor dispatch.
     std::string line_buf;
     const bool has_visitors = !visitors.empty();
 
@@ -60,13 +74,13 @@ static dftracer::utils::coro::CoroTask<bool> process_chunks(
         GzipInflaterResult result;
         if (!(co_await inflater.read(fd, offset, result))) {
             if (result.bytes_read == 0) {
-                break;        // EOF
+                break;
             }
-            co_return false;  // Error
+            co_return false;
         }
 
         if (result.bytes_read == 0) {
-            break;  // EOF
+            break;
         }
 
         current_uc_offset += result.bytes_read;
@@ -74,65 +88,59 @@ static dftracer::utils::coro::CoroTask<bool> process_chunks(
         line_count_in_chunk += result.lines_found;
 
         if (has_visitors) {
-            const auto *data = inflater.out_buffer;
+            const auto* data = inflater.out_buffer;
             const std::size_t n = result.bytes_read;
             std::size_t seg_start = 0;
             for (std::size_t i = 0; i < n; ++i) {
                 if (data[i] == '\n') {
                     line_buf.append(
-                        reinterpret_cast<const char *>(data + seg_start),
+                        reinterpret_cast<const char*>(data + seg_start),
                         i - seg_start);
                     std::string_view line_sv(line_buf);
-                    for (auto &v : visitors) {
-                        v.get().on_line(line_sv, checkpoint_idx);
+                    for (auto& visitor : visitors) {
+                        visitor.get().on_line(line_sv, checkpoint_idx);
                     }
                     line_buf.clear();
                     seg_start = i + 1;
                 }
             }
-            // Accumulate any trailing bytes that don't end with '\n'.
             if (seg_start < n) {
-                line_buf.append(
-                    reinterpret_cast<const char *>(data + seg_start),
-                    n - seg_start);
+                line_buf.append(reinterpret_cast<const char*>(data + seg_start),
+                                n - seg_start);
             }
         }
 
-        // Create checkpoint when we cross a boundary and are at a deflate
-        // block boundary (read() now stops at block boundaries via Z_BLOCK).
         if (current_uc_offset >= next_ckpt_offset && result.at_block_boundary) {
-            std::size_t chunk_start_uc = current_uc_offset;
-            std::size_t chunk_start_c = inflater.get_total_input_consumed();
+            const std::size_t chunk_start_uc = current_uc_offset;
+            const std::size_t chunk_start_c =
+                inflater.get_total_input_consumed();
 
             GzipCheckpointer checkpointer(inflater, chunk_start_uc);
             if (checkpointer.create(chunk_start_c)) {
                 std::vector<unsigned char> compressed_dict;
                 if (checkpointer.compress(compressed_dict)) {
-                    InsertCheckpointData checkpoint_data = {
-                        checkpoint_idx++,
-                        chunk_start_uc,
-                        0,  // uc_size - will be updated later
-                        0,  // c_size - will be updated later
-                        chunk_start_c,
-                        checkpointer.bits,
-                        compressed_dict.data(),
-                        compressed_dict.size(),
-                        line_count_in_chunk,
-                        first_line_in_chunk,
-                        total_lines};  // 1-based: last line = total_lines
-                    co_await dftracer::utils::sqlite::run([&] {
-                        insert_checkpoint_record(db, file_id, checkpoint_data);
-                    });
+                    IndexerCheckpoint checkpoint{
+                        .checkpoint_idx = checkpoint_idx++,
+                        .uc_offset = chunk_start_uc,
+                        .uc_size = 0,
+                        .c_offset = chunk_start_c,
+                        .c_size = 0,
+                        .bits = checkpointer.bits,
+                        .dict_compressed = std::move(compressed_dict),
+                        .num_lines = line_count_in_chunk,
+                        .first_line_num = first_line_in_chunk,
+                        .last_line_num = total_lines,
+                    };
+                    checkpoints.push_back(std::move(checkpoint));
 
                     if (has_visitors) {
-                        for (auto &v : visitors) {
-                            v.get().on_checkpoint(checkpoint_idx - 1);
+                        for (auto& visitor : visitors) {
+                            visitor.get().on_checkpoint(checkpoint_idx - 1);
                         }
                     }
 
-                    // Reset chunk counters for next chunk
                     line_count_in_chunk = 0;
-                    first_line_in_chunk = total_lines + 1;  // 1-based
+                    first_line_in_chunk = total_lines + 1;
                     next_ckpt_offset = current_uc_offset + ckpt_size;
                 }
             }
@@ -144,103 +152,74 @@ static dftracer::utils::coro::CoroTask<bool> process_chunks(
     co_return true;
 }
 
-// After all checkpoints are inserted, compute uc_size / c_size for each
-// and extend the last checkpoint's line range to cover the tail data.
-static void finalize_checkpoints(const SqliteDatabase &db, int file_id,
-                                 std::uint64_t total_uc_size,
-                                 std::uint64_t total_lines,
-                                 std::uint64_t tail_line_count) {
-    // 1. Set uc_size = distance to next checkpoint (or total_uc_size for last).
-    {
-        SqliteStmt stmt(
-            db,
-            "UPDATE checkpoints SET "
-            "uc_size = COALESCE("
-            "  (SELECT c2.uc_offset FROM checkpoints c2 "
-            "   WHERE c2.file_id = checkpoints.file_id "
-            "   AND c2.checkpoint_idx = checkpoints.checkpoint_idx + 1), ?"
-            ") - uc_offset, "
-            "c_size = COALESCE("
-            "  (SELECT c2.c_offset FROM checkpoints c2 "
-            "   WHERE c2.file_id = checkpoints.file_id "
-            "   AND c2.checkpoint_idx = checkpoints.checkpoint_idx + 1), "
-            "  c_offset"
-            ") - c_offset "
-            "WHERE file_id = ?");
-        stmt.bind_int64(1, static_cast<int64_t>(total_uc_size));
-        stmt.bind_int(2, file_id);
-        sqlite3_step(stmt.get());
-    }
-
-    // 2. Extend the last checkpoint's line range to cover the tail data
-    //    (lines after the last block boundary that didn't trigger a new
-    //    checkpoint).
-    if (tail_line_count > 0 && total_lines > 0) {
-        SqliteStmt stmt(
-            db,
-            "UPDATE checkpoints SET last_line_num = ?, "
-            "num_lines = num_lines + ? "
-            "WHERE file_id = ? AND checkpoint_idx = "
-            "(SELECT MAX(checkpoint_idx) FROM checkpoints WHERE file_id = ?)");
-        stmt.bind_int64(1, static_cast<int64_t>(total_lines));
-        stmt.bind_int64(2, static_cast<int64_t>(tail_line_count));
-        stmt.bind_int(3, file_id);
-        stmt.bind_int(4, file_id);
-        sqlite3_step(stmt.get());
-    }
-}
-
 static dftracer::utils::coro::CoroTask<bool> build_index(
-    const SqliteDatabase &db, int file_id, const std::string &gz_path,
-    std::uint64_t ckpt_size, const Indexer::VisitorList &visitors) {
+    IndexDatabase& db, int file_id, const std::string& gz_path,
+    std::uint64_t ckpt_size, const Indexer::VisitorList& visitors) {
     int fd = ::open(gz_path.c_str(), O_RDONLY);
     if (fd < 0) {
         co_return false;
     }
 
     if (!visitors.empty()) {
-        std::uint64_t compressed_bytes = file_size_bytes(gz_path);
-        std::size_t estimated = static_cast<std::size_t>(
+        const std::uint64_t compressed_bytes = file_size_bytes(gz_path);
+        const std::size_t estimated = static_cast<std::size_t>(
             compressed_bytes / (ckpt_size > 0 ? ckpt_size : 1));
-        for (auto &v : visitors) {
-            v.get().begin(estimated);
+        for (auto& visitor : visitors) {
+            visitor.get().begin(estimated);
         }
     }
 
     std::uint64_t total_lines = 0;
     std::uint64_t total_uc_size = 0;
     std::uint64_t tail_line_count = 0;
+    std::vector<IndexerCheckpoint> checkpoints;
 
-    bool success =
-        co_await process_chunks(fd, db, file_id, ckpt_size, total_lines,
-                                total_uc_size, tail_line_count, visitors);
+    const bool success =
+        co_await process_chunks(fd, ckpt_size, total_lines, total_uc_size,
+                                tail_line_count, checkpoints, visitors);
     ::close(fd);
 
-    if (success) {
-        co_await dftracer::utils::sqlite::run([&] {
-            finalize_checkpoints(db, file_id, total_uc_size, total_lines,
-                                 tail_line_count);
-            insert_file_metadata_record(db, file_id, ckpt_size, total_lines,
-                                        total_uc_size);
-        });
+    if (!success) {
+        co_return false;
     }
 
-    co_return success;
+    finalize_checkpoints(checkpoints, total_uc_size, total_lines,
+                         tail_line_count);
+
+    auto* db_ptr = &db;
+    auto* checkpoints_ptr = &checkpoints;
+    co_await rocks::run([db_ptr, file_id, ckpt_size, total_lines, total_uc_size,
+                         checkpoints_ptr] {
+        internal::TransactionScope txn(*db_ptr);
+        for (const auto& checkpoint : *checkpoints_ptr) {
+            db_ptr->insert_checkpoint(file_id, checkpoint);
+        }
+        db_ptr->insert_file_metadata(file_id, ckpt_size, total_lines,
+                                     total_uc_size);
+        txn.commit();
+    });
+
+    co_return true;
 }
 
-GzipIndexer::GzipIndexer(const std::string &gz_path_,
-                         const std::string &idx_path_, std::uint64_t ckpt_size_,
+}  // namespace
+
+GzipIndexer::GzipIndexer(const std::string& gz_path_,
+                         const std::string& idx_path_, std::uint64_t ckpt_size_,
                          bool force_rebuild_)
     : gz_path(gz_path_),
       gz_path_logical_path(get_logical_path(gz_path_)),
-      idx_path(idx_path_),
+      index_path(normalize_index_root(idx_path_)),
       ckpt_size(ckpt_size_),
       force_rebuild(force_rebuild_),
       cached_is_valid(false),
       cached_file_id(-1),
       cached_max_bytes(0),
+      cached_max_bytes_ready(false),
       cached_num_lines(0),
-      cached_checkpoint_size(0) {
+      cached_num_lines_ready(false),
+      cached_checkpoint_size(0),
+      cached_checkpoint_size_ready(false) {
     if (gz_path.empty()) {
         throw IndexerError(IndexerError::Type::INVALID_ARGUMENT,
                            "gz_path must not be empty");
@@ -264,78 +243,65 @@ GzipIndexer::~GzipIndexer() {
     close();
 }
 
-GzipIndexer::GzipIndexer(GzipIndexer &&other) noexcept
+GzipIndexer::GzipIndexer(GzipIndexer&& other) noexcept
     : gz_path(std::move(other.gz_path)),
       gz_path_logical_path(std::move(other.gz_path_logical_path)),
-      idx_path(std::move(other.idx_path)),
+      index_path(std::move(other.index_path)),
       ckpt_size(other.ckpt_size),
       force_rebuild(other.force_rebuild),
-      db(std::move(other.db)),
       visitors_(std::move(other.visitors_)),
       cached_is_valid(other.cached_is_valid.load()),
       cached_file_id(other.cached_file_id.load()),
       cached_max_bytes(other.cached_max_bytes.load()),
+      cached_max_bytes_ready(other.cached_max_bytes_ready.load()),
       cached_num_lines(other.cached_num_lines.load()),
+      cached_num_lines_ready(other.cached_num_lines_ready.load()),
       cached_checkpoint_size(other.cached_checkpoint_size.load()),
+      cached_checkpoint_size_ready(other.cached_checkpoint_size_ready.load()),
       cached_checkpoints(std::move(other.cached_checkpoints)) {}
 
-GzipIndexer &GzipIndexer::operator=(GzipIndexer &&other) noexcept {
+GzipIndexer& GzipIndexer::operator=(GzipIndexer&& other) noexcept {
     if (this != &other) {
         gz_path = std::move(other.gz_path);
         gz_path_logical_path = std::move(other.gz_path_logical_path);
-        idx_path = std::move(other.idx_path);
+        index_path = std::move(other.index_path);
         ckpt_size = other.ckpt_size;
         force_rebuild = other.force_rebuild;
-        db = std::move(other.db);
         visitors_ = std::move(other.visitors_);
         cached_is_valid.store(other.cached_is_valid.load());
         cached_file_id.store(other.cached_file_id.load());
         cached_max_bytes.store(other.cached_max_bytes.load());
+        cached_max_bytes_ready.store(other.cached_max_bytes_ready.load());
         cached_num_lines.store(other.cached_num_lines.load());
+        cached_num_lines_ready.store(other.cached_num_lines_ready.load());
         cached_checkpoint_size.store(other.cached_checkpoint_size.load());
+        cached_checkpoint_size_ready.store(
+            other.cached_checkpoint_size_ready.load());
         std::lock_guard<std::mutex> lock(cached_checkpoints_mutex);
         cached_checkpoints = std::move(other.cached_checkpoints);
     }
     return *this;
 }
 
-void GzipIndexer::open() {
-    if (!db.open(idx_path)) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to open database at " + idx_path);
-    }
-}
+void GzipIndexer::open() {}
 
-void GzipIndexer::close() {
-    DFTRACER_UTILS_LOG_DEBUG("Closing GZIP indexer database for %s",
-                             gz_path.c_str());
-    db.close();
-}
+void GzipIndexer::close() {}
 
 dftracer::utils::coro::CoroTask<void> GzipIndexer::build_async() const {
     if (!force_rebuild && !need_rebuild()) {
         co_return;
     }
 
-    co_await dftracer::utils::sqlite::run([&] {
-        init_schema(db);
-
-        int fid = find_file_id(gz_path_logical_path);
-        if (fid != -1) {
-            delete_file_record(db, fid);
-        }
-    });
-
-    std::time_t mtime = get_file_modification_time(gz_path);
-    auto hash = calculate_file_hash(gz_path);
-    std::uint64_t bytes = file_size_bytes(gz_path);
-    std::uint64_t final_ckpt_size =
+    IndexDatabase db(index_path);
+    const std::time_t mtime = get_file_modification_time(gz_path);
+    const auto hash = calculate_file_hash(gz_path);
+    const std::uint64_t bytes = file_size_bytes(gz_path);
+    const std::uint64_t final_ckpt_size =
         determine_checkpoint_size(ckpt_size, gz_path);
-
-    int file_id = co_await dftracer::utils::sqlite::run([&] {
-        int fid;
-        insert_file_record(db, gz_path_logical_path, bytes, mtime, hash, fid);
-        return fid;
+    const std::string logical = gz_path_logical_path;
+    const auto* logical_ptr = &logical;
+    const int file_id = co_await rocks::run([db_ptr = &db, logical_ptr, hash] {
+        return db_ptr->get_or_create_file_info(*logical_ptr, hash);
     });
 
     if (!(co_await build_index(db, file_id, gz_path, final_ckpt_size,
@@ -344,100 +310,157 @@ dftracer::utils::coro::CoroTask<void> GzipIndexer::build_async() const {
                            "Failed to build index for " + gz_path);
     }
 
+    (void)mtime;
+    (void)bytes;
+    struct CacheSnapshot {
+        std::uint64_t num_lines = 0;
+        std::uint64_t max_bytes = 0;
+        std::vector<IndexerCheckpoint> checkpoints;
+    };
+    auto snapshot = co_await rocks::run([db_ptr = &db, file_id] {
+        CacheSnapshot cache;
+        cache.num_lines = db_ptr->get_num_lines(file_id);
+        cache.max_bytes = db_ptr->get_max_bytes(file_id);
+        cache.checkpoints = db_ptr->query_checkpoints(file_id);
+        return cache;
+    });
+
     cached_is_valid = true;
     cached_file_id = file_id;
+    cached_checkpoint_size = final_ckpt_size;
+    cached_checkpoint_size_ready = true;
+    cached_num_lines = snapshot.num_lines;
+    cached_num_lines_ready = true;
+    cached_max_bytes = snapshot.max_bytes;
+    cached_max_bytes_ready = true;
+    std::lock_guard<std::mutex> lock(cached_checkpoints_mutex);
+    cached_checkpoints = std::move(snapshot.checkpoints);
     co_return;
 }
 
 bool GzipIndexer::is_valid() const { return cached_is_valid; }
 
-bool GzipIndexer::exists() const { return fs::exists(idx_path); }
+bool GzipIndexer::exists() const {
+    return fs::exists(index_path) && fs::is_directory(index_path);
+}
 
 bool GzipIndexer::need_rebuild() const {
-    if (is_valid()) return false;
-    if (!exists()) return true;
-
-    // Only query schema if database exists - matches original behavior
-    if (!query_schema_validity(db)) return true;
-
-    std::uint64_t stored_hash;
-    std::time_t stored_mtime;
-    if (!query_stored_file_info(db, gz_path_logical_path, stored_hash,
-                                stored_mtime)) {
+    if (is_valid()) {
+        return false;
+    }
+    if (!exists()) {
         return true;
     }
 
-    // Fast path: if mtime matches, the file hasn't changed.
-    std::time_t current_mtime = get_file_modification_time(gz_path);
-    if (stored_mtime == current_mtime) return false;
+    try {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        const auto stored_hash = db.get_file_hash(gz_path_logical_path);
+        const int file_id = db.get_file_info_id(gz_path_logical_path);
+        if (!stored_hash || file_id < 0) {
+            return true;
+        }
 
-    // mtime differs, verify with sampled fingerprint (size + head/tail)
-    // to handle edge cases like file replacement within the same second.
-    std::uint64_t current_hash = calculate_file_hash(gz_path);
-    return stored_hash != current_hash;
+        const auto current_hash = calculate_file_hash(gz_path);
+        const auto current_ckpt_size = db.get_checkpoint_size(file_id);
+        return current_hash != *stored_hash || current_ckpt_size == 0;
+    } catch (...) {
+        return true;
+    }
 }
 
-const std::string &GzipIndexer::get_idx_path() const { return idx_path; }
+const std::string& GzipIndexer::get_index_path() const { return index_path; }
 
-const std::string &GzipIndexer::get_archive_path() const { return gz_path; }
+const std::string& GzipIndexer::get_archive_path() const { return gz_path; }
 
-const std::string &GzipIndexer::get_gz_path() const { return gz_path; }
+const std::string& GzipIndexer::get_gz_path() const { return gz_path; }
 
 std::uint64_t GzipIndexer::get_max_bytes() const {
-    auto val = cached_max_bytes.load(std::memory_order_relaxed);
-    if (val == 0) {
-        val = query_max_bytes(db, gz_path_logical_path);
-        cached_max_bytes.store(val, std::memory_order_relaxed);
+    if (!cached_max_bytes_ready.load(std::memory_order_acquire)) {
+        const int file_id = get_file_id();
+        if (file_id != -1) {
+            IndexDatabase db(
+                index_path,
+                dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+            auto val = db.get_max_bytes(file_id);
+            cached_max_bytes.store(val, std::memory_order_relaxed);
+            cached_max_bytes_ready.store(true, std::memory_order_release);
+        }
     }
-    return val;
+    return cached_max_bytes.load(std::memory_order_relaxed);
 }
 
 std::uint64_t GzipIndexer::get_checkpoint_size() const {
-    auto val = cached_checkpoint_size.load(std::memory_order_relaxed);
-    if (val == 0) {
-        int file_id = get_file_id();
+    if (!cached_checkpoint_size_ready.load(std::memory_order_acquire)) {
+        const int file_id = get_file_id();
         if (file_id != -1) {
-            val = query_checkpoint_size(db, file_id);
+            IndexDatabase db(
+                index_path,
+                dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+            auto val = db.get_checkpoint_size(file_id);
             cached_checkpoint_size.store(val, std::memory_order_relaxed);
+            cached_checkpoint_size_ready.store(true, std::memory_order_release);
         }
     }
-    return val;
+    return cached_checkpoint_size.load(std::memory_order_relaxed);
 }
 
 std::uint64_t GzipIndexer::get_num_lines() const {
-    auto val = cached_num_lines.load(std::memory_order_relaxed);
-    if (val == 0) {
-        val = query_num_lines(db, gz_path_logical_path);
-        cached_num_lines.store(val, std::memory_order_relaxed);
+    if (!cached_num_lines_ready.load(std::memory_order_acquire)) {
+        const int file_id = get_file_id();
+        if (file_id != -1) {
+            IndexDatabase db(
+                index_path,
+                dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+            auto val = db.get_num_lines(file_id);
+            cached_num_lines.store(val, std::memory_order_relaxed);
+            cached_num_lines_ready.store(true, std::memory_order_release);
+        }
     }
-    return val;
+    return cached_num_lines.load(std::memory_order_relaxed);
 }
 
 int GzipIndexer::get_file_id() const {
     auto val = cached_file_id.load(std::memory_order_relaxed);
     if (val == -1) {
-        val = query_file_id(db, gz_path_logical_path);
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        val = db.get_file_info_id(gz_path_logical_path);
         cached_file_id.store(val, std::memory_order_relaxed);
     }
     return val;
 }
 
-int GzipIndexer::find_file_id(const std::string &path) const {
-    return query_file_id(db, get_logical_path(path));
+int GzipIndexer::find_file_id(const std::string& path) const {
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.get_file_info_id(get_logical_path(path));
 }
 
 bool GzipIndexer::find_checkpoint(std::size_t target_offset,
-                                  IndexerCheckpoint &checkpoint) const {
-    int file_id = get_file_id();
-    if (file_id == -1) return false;
-    return query_checkpoint(db, target_offset, file_id, checkpoint);
+                                  IndexerCheckpoint& checkpoint) const {
+    const int file_id = get_file_id();
+    if (file_id == -1) {
+        return false;
+    }
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.find_checkpoint(file_id, target_offset, checkpoint);
 }
 
 std::vector<IndexerCheckpoint> GzipIndexer::get_checkpoints() const {
+    std::lock_guard<std::mutex> lock(cached_checkpoints_mutex);
     if (cached_checkpoints.empty()) {
-        int file_id = get_file_id();
+        const int file_id = get_file_id();
         if (file_id != -1) {
-            cached_checkpoints = query_checkpoints(db, file_id);
+            IndexDatabase db(
+                index_path,
+                dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+            cached_checkpoints = db.query_checkpoints(file_id);
         }
     }
     return cached_checkpoints;
@@ -445,9 +468,14 @@ std::vector<IndexerCheckpoint> GzipIndexer::get_checkpoints() const {
 
 std::vector<IndexerCheckpoint> GzipIndexer::get_checkpoints_for_line_range(
     std::uint64_t start_line, std::uint64_t end_line) const {
-    int file_id = get_file_id();
-    if (file_id == -1) return {};
-    return query_checkpoints_for_line_range(db, file_id, start_line, end_line);
+    const int file_id = get_file_id();
+    if (file_id == -1) {
+        return {};
+    }
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.query_checkpoints_for_line_range(file_id, start_line, end_line);
 }
 
 }  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h b/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h
index 1988b552..94c0f04b 100644
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h
+++ b/src/dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h
@@ -4,7 +4,7 @@
 #include <dftracer/utils/core/common/archive_format.h>
 #include <dftracer/utils/core/common/constants.h>
 #include <dftracer/utils/core/coro/task.h>
-#include <dftracer/utils/core/sqlite/database.h>
+#include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/index_visitor.h>
 #include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer.h>
@@ -18,14 +18,12 @@
 
 namespace dftracer::utils::utilities::indexer::internal::gzip {
 
-using dftracer::utils::sqlite::SqliteDatabase;
-
 class GzipIndexer : public Indexer {
    public:
     static constexpr std::uint64_t DEFAULT_CHECKPOINT_SIZE =
         constants::indexer::DEFAULT_CHECKPOINT_SIZE;
 
-    GzipIndexer(const std::string &gz_path, const std::string &idx_path,
+    GzipIndexer(const std::string &gz_path, const std::string &index_path,
                 std::uint64_t checkpoint_size = DEFAULT_CHECKPOINT_SIZE,
                 bool force = false);
     ~GzipIndexer();
@@ -43,7 +41,7 @@ class GzipIndexer : public Indexer {
     }
 
     // Metadata - BaseIndexer interface implementation
-    const std::string &get_idx_path() const override;
+    const std::string &get_index_path() const override;
     const std::string &get_archive_path() const override;
     const std::string &get_gz_path() const;
     std::uint64_t get_checkpoint_size() const override;
@@ -69,18 +67,20 @@ class GzipIndexer : public Indexer {
    private:
     std::string gz_path;
     std::string gz_path_logical_path;
-    std::string idx_path;
+    std::string index_path;
     std::uint64_t ckpt_size;
     bool force_rebuild;
-    SqliteDatabase db;
     VisitorList visitors_;
 
     // Cached values (atomic for thread-safe lazy initialization)
     mutable std::atomic<bool> cached_is_valid{false};
     mutable std::atomic<int> cached_file_id{-1};
     mutable std::atomic<std::uint64_t> cached_max_bytes{0};
+    mutable std::atomic<bool> cached_max_bytes_ready{false};
     mutable std::atomic<std::uint64_t> cached_num_lines{0};
+    mutable std::atomic<bool> cached_num_lines_ready{false};
     mutable std::atomic<std::uint64_t> cached_checkpoint_size{0};
+    mutable std::atomic<bool> cached_checkpoint_size_ready{false};
     mutable std::vector<IndexerCheckpoint> cached_checkpoints;
     mutable std::mutex cached_checkpoints_mutex;
 
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/delete_file_record.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/delete_file_record.cpp
deleted file mode 100644
index 984ae506..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/delete_file_record.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <dftracer/utils/core/common/logging.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-bool delete_file_record(const SqliteDatabase &db, int file_id) {
-    const char *cleanup_queries[] = {
-        "DELETE FROM checkpoints WHERE file_id = ?;",
-        "DELETE FROM metadata WHERE file_id = ?;"};
-
-    for (const char *query : cleanup_queries) {
-        try {
-            SqliteStmt stmt(db, query);
-            stmt.bind_int(1, file_id);
-            int result = sqlite3_step(stmt);
-            if (result != SQLITE_DONE) {
-                DFTRACER_UTILS_LOG_ERROR(
-                    "Failed to execute cleanup statement '%s' for file_id %d: "
-                    "%d - "
-                    "%s",
-                    query, file_id, result, sqlite3_errmsg(db.get()));
-                return false;
-            }
-        } catch (const IndexerError &e) {
-            DFTRACER_UTILS_LOG_ERROR(
-                "Failed to prepare cleanup statement '%s': %s", query,
-                e.what());
-            return false;
-        }
-    }
-    DFTRACER_UTILS_LOG_DEBUG(
-        "Successfully cleaned up existing data for file_id %d", file_id);
-    return true;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_checkpoint_record.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_checkpoint_record.cpp
deleted file mode 100644
index dfbc6d02..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_checkpoint_record.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-void insert_checkpoint_record(const SqliteDatabase &db, int file_id,
-                              const InsertCheckpointData &data) {
-    SqliteStmt stmt(
-        db,
-        "INSERT INTO checkpoints(file_id, checkpoint_idx, uc_offset, "
-        "uc_size, c_offset, c_size, bits, dict_compressed, num_lines, "
-        "first_line_num, last_line_num) "
-        "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_id);
-    stmt.bind_int64(2, static_cast<std::int64_t>(data.idx));
-    stmt.bind_int64(3, static_cast<std::int64_t>(data.uc_offset));
-    stmt.bind_int64(4, static_cast<std::int64_t>(data.uc_size));
-    stmt.bind_int64(5, static_cast<std::int64_t>(data.c_offset));
-    stmt.bind_int64(6, static_cast<std::int64_t>(data.c_size));
-    stmt.bind_int(7, data.bits);
-    stmt.bind_blob(8, data.compressed_dict,
-                   static_cast<int>(data.compressed_dict_size));
-    stmt.bind_int64(9, static_cast<std::int64_t>(data.num_lines));
-    stmt.bind_int64(10, static_cast<std::int64_t>(data.first_line_num));
-    stmt.bind_int64(11, static_cast<std::int64_t>(data.last_line_num));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert checkpoint: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_metadata_record.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_metadata_record.cpp
deleted file mode 100644
index 7b212d58..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_metadata_record.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include <dftracer/utils/core/common/logging.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-void insert_file_metadata_record(const SqliteDatabase &db, int file_id,
-                                 std::size_t ckpt_size,
-                                 std::uint64_t total_lines,
-                                 std::uint64_t total_uc_size) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO metadata(file_id, checkpoint_size, "
-                    "total_lines, total_uc_size) VALUES(?, ?, ?, ?);");
-
-    stmt.bind_int(1, file_id);
-    stmt.bind_int64(2, static_cast<int64_t>(ckpt_size));
-    stmt.bind_int64(3, static_cast<int64_t>(total_lines));
-    stmt.bind_int64(4, static_cast<int64_t>(total_uc_size));
-
-    int result = sqlite3_step(stmt);
-    if (result != SQLITE_DONE) {
-        throw IndexerError(
-            IndexerError::Type::DATABASE_ERROR,
-            "Insert failed: " + std::string(sqlite3_errmsg(db.get())));
-    }
-    DFTRACER_UTILS_LOG_DEBUG(
-        "Successfully inserted metadata for file_id %d: "
-        "checkpoint_size=%zu, "
-        "total_lines=%llu, total_uc_size=%llu",
-        file_id, ckpt_size, total_lines, total_uc_size);
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_record.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_record.cpp
deleted file mode 100644
index e9890b0a..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/insert_file_record.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-void insert_file_record(const SqliteDatabase &db,
-                        const std::string &gz_path_logical_path,
-                        std::size_t bytes, std::time_t file_mtime,
-                        std::uint64_t file_hash, int &db_file_id) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO files(logical_name, byte_size, "
-                    "mtime_unix, hash) "
-                    "VALUES(?, ?, ?, ?) "
-                    "ON CONFLICT(logical_name) DO UPDATE SET "
-                    "byte_size=excluded.byte_size, "
-                    "mtime_unix=excluded.mtime_unix, "
-                    "hash=excluded.hash "
-                    "RETURNING id;");
-
-    stmt.bind_text(1, gz_path_logical_path);
-    stmt.bind_int64(2, static_cast<int64_t>(bytes));
-    stmt.bind_int64(3, static_cast<int64_t>(file_mtime));
-    stmt.bind_int64(4, static_cast<int64_t>(file_hash));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(
-            IndexerError::Type::DATABASE_ERROR,
-            "Insert failed: " + std::string(sqlite3_errmsg(db.get())));
-    }
-
-    db_file_id = sqlite3_column_int(stmt, 0);
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h
deleted file mode 100644
index 04f721eb..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_GZIP_QUERIES_H
-#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_GZIP_QUERIES_H
-
-#include <dftracer/utils/core/sqlite/database.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-
-#include <cstddef>
-#include <cstdint>
-#include <ctime>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-using dftracer::utils::sqlite::SqliteDatabase;
-using dftracer::utils::sqlite::SqliteStmt;
-
-void insert_file_record(const SqliteDatabase &db,
-                        const std::string &gz_path_logical_path,
-                        std::size_t bytes, std::time_t file_mtime,
-                        std::uint64_t file_hash, int &db_file_id);
-void insert_file_metadata_record(const SqliteDatabase &db, int file_id,
-                                 std::size_t ckpt_size,
-                                 std::uint64_t total_lines,
-                                 std::uint64_t total_uc_size);
-bool query_stored_file_info(const SqliteDatabase &db,
-                            const std::string &gz_path,
-                            std::uint64_t &stored_hash,
-                            std::time_t &stored_mtime);
-
-struct InsertCheckpointData {
-    std::uint64_t idx;
-    std::uint64_t uc_offset;
-    std::uint64_t uc_size;
-    std::uint64_t c_size;
-    std::uint64_t c_offset;
-    int bits;
-    const void *compressed_dict;
-    std::size_t compressed_dict_size;
-    std::uint64_t num_lines;
-    std::uint64_t first_line_num;
-    std::uint64_t last_line_num;
-};
-void insert_checkpoint_record(const SqliteDatabase &db, int file_id,
-                              const InsertCheckpointData &data);
-
-bool query_schema_validity(const SqliteDatabase &db);
-bool delete_file_record(const SqliteDatabase &db, int file_id);
-std::uint64_t query_max_bytes(const SqliteDatabase &db,
-                              const std::string &gz_path_logical_path);
-std::uint64_t query_num_lines(const SqliteDatabase &db,
-                              const std::string &gz_path_logical_path);
-int query_file_id(const SqliteDatabase &db,
-                  const std::string &gz_path_logical_path);
-bool query_checkpoint(const SqliteDatabase &db, std::size_t target_offset,
-                      int file_id, IndexerCheckpoint &checkpoint);
-std::vector<IndexerCheckpoint> query_checkpoints(const SqliteDatabase &db,
-                                                 int file_id);
-std::vector<IndexerCheckpoint> query_checkpoints_for_line_range(
-    const SqliteDatabase &db, int file_id, std::uint64_t start_line,
-    std::uint64_t end_line);
-std::uint64_t query_checkpoint_size(const SqliteDatabase &db, int file_id);
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
-
-#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_GZIP_QUERIES_H
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint.cpp
deleted file mode 100644
index 8c5678f3..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#include <dftracer/utils/core/common/logging.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-#include <cstring>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-bool query_checkpoint(const SqliteDatabase& db, std::size_t target_offset,
-                      int file_id, IndexerCheckpoint& checkpoint) {
-    DFTRACER_UTILS_LOG_DEBUG(
-        "query_checkpoint called: target_offset=%zu, file_id=%d", target_offset,
-        file_id);
-
-    // For target offset 0, always decompress from beginning of file (no
-    // checkpoint)
-    if (target_offset == 0) {
-        DFTRACER_UTILS_LOG_DEBUG(
-            "%s", "query_checkpoint: target_offset is 0, returning false");
-        return false;
-    }
-
-    if (file_id == -1) {
-        DFTRACER_UTILS_LOG_DEBUG(
-            "%s", "query_checkpoint: file_id is -1, returning false");
-        return false;
-    }
-
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, bits, "
-        "dict_compressed, num_lines "
-        "FROM checkpoints WHERE file_id = ? AND uc_offset <= ? "
-        "ORDER BY uc_offset DESC LIMIT 1");
-    bool found = false;
-
-    stmt.bind_int(1, file_id);
-    stmt.bind_int64(2, static_cast<int64_t>(target_offset));
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-        std::size_t dict_size =
-            static_cast<std::size_t>(sqlite3_column_bytes(stmt, 6));
-        checkpoint.dict_compressed.resize(dict_size);
-        std::memcpy(checkpoint.dict_compressed.data(),
-                    sqlite3_column_blob(stmt, 6), dict_size);
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        found = true;
-
-        DFTRACER_UTILS_LOG_DEBUG(
-            "query_checkpoint: found checkpoint idx=%llu, uc_offset=%llu, "
-            "c_offset=%llu, bits=%d",
-            checkpoint.checkpoint_idx, checkpoint.uc_offset,
-            checkpoint.c_offset, checkpoint.bits);
-    } else {
-        DFTRACER_UTILS_LOG_DEBUG(
-            "query_checkpoint: no checkpoint found for target_offset=%zu, "
-            "file_id=%d",
-            target_offset, file_id);
-    }
-
-    return found;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint_size.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint_size.cpp
deleted file mode 100644
index 4a205c61..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoint_size.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-std::uint64_t query_checkpoint_size(const SqliteDatabase &db, int file_id) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_size FROM metadata WHERE file_id = ?");
-    stmt.bind_int(1, file_id);
-    std::uint64_t ckpt_size = 0;
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        ckpt_size = static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return ckpt_size;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoints.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoints.cpp
deleted file mode 100644
index 1e25a74b..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_checkpoints.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-#include <cstring>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-std::vector<IndexerCheckpoint> query_checkpoints(const SqliteDatabase &db,
-                                                 int file_id) {
-    std::vector<IndexerCheckpoint> checkpoints;
-
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, bits, "
-        "dict_compressed, num_lines, first_line_num, last_line_num "
-        "FROM checkpoints WHERE file_id = ? ORDER BY uc_offset");
-
-    stmt.bind_int(1, file_id);
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        IndexerCheckpoint checkpoint;
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-
-        std::size_t dict_size =
-            static_cast<std::size_t>(sqlite3_column_bytes(stmt, 6));
-        checkpoint.dict_compressed.resize(dict_size);
-        std::memcpy(checkpoint.dict_compressed.data(),
-                    sqlite3_column_blob(stmt, 6), dict_size);
-
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        checkpoint.first_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 8));
-        checkpoint.last_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 9));
-
-        checkpoints.push_back(std::move(checkpoint));
-    }
-
-    return checkpoints;
-}
-
-std::vector<IndexerCheckpoint> query_checkpoints_for_line_range(
-    const SqliteDatabase &db, int file_id, std::uint64_t start_line,
-    std::uint64_t end_line) {
-    std::vector<IndexerCheckpoint> checkpoints;
-
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, bits, "
-        "dict_compressed, num_lines, first_line_num, last_line_num "
-        "FROM checkpoints WHERE file_id = ? AND "
-        "(first_line_num <= ? AND last_line_num >= ?) OR "
-        "(first_line_num <= ? AND last_line_num >= ?) "
-        "ORDER BY uc_offset");
-
-    stmt.bind_int(1, file_id);
-    stmt.bind_int64(2, static_cast<int64_t>(end_line));
-    stmt.bind_int64(3, static_cast<int64_t>(start_line));
-    stmt.bind_int64(4, static_cast<int64_t>(start_line));
-    stmt.bind_int64(5, static_cast<int64_t>(end_line));
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        IndexerCheckpoint checkpoint;
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-
-        size_t dict_size = static_cast<size_t>(sqlite3_column_bytes(stmt, 6));
-        checkpoint.dict_compressed.resize(dict_size);
-        std::memcpy(checkpoint.dict_compressed.data(),
-                    sqlite3_column_blob(stmt, 6), dict_size);
-
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        checkpoint.first_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 8));
-        checkpoint.last_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 9));
-
-        checkpoints.push_back(std::move(checkpoint));
-    }
-
-    return checkpoints;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_file_id.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_file_id.cpp
deleted file mode 100644
index 1f443a9f..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_file_id.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-int query_file_id(const SqliteDatabase &db,
-                  const std::string &gz_path_logical_path) {
-    SqliteStmt stmt(db, "SELECT id FROM files WHERE logical_name = ? LIMIT 1");
-    int file_id = -1;
-
-    stmt.bind_text(1, gz_path_logical_path);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        file_id = sqlite3_column_int(stmt, 0);
-    }
-
-    return file_id;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_max_bytes.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_max_bytes.cpp
deleted file mode 100644
index 2cd0f8c1..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_max_bytes.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <dftracer/utils/core/common/logging.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-std::uint64_t query_max_bytes(const SqliteDatabase &db,
-                              const std::string &gz_path_logical_path) {
-    // Primary: metadata table has the authoritative total uncompressed size
-    SqliteStmt metadata_stmt(
-        db,
-        "SELECT total_uc_size FROM metadata WHERE file_id = "
-        "(SELECT id FROM files WHERE logical_name = ? LIMIT 1)");
-    metadata_stmt.bind_text(1, gz_path_logical_path);
-    if (sqlite3_step(metadata_stmt) == SQLITE_ROW) {
-        std::uint64_t total =
-            static_cast<std::uint64_t>(sqlite3_column_int64(metadata_stmt, 0));
-        if (total > 0) {
-            return total;
-        }
-    }
-
-    // Fallback: derive from checkpoints if metadata is missing
-    SqliteStmt stmt(
-        db,
-        "SELECT MAX(uc_offset + uc_size) FROM checkpoints WHERE file_id = "
-        "(SELECT id FROM files WHERE logical_name = ? LIMIT 1)");
-    std::uint64_t max_bytes = 0;
-    stmt.bind_text(1, gz_path_logical_path);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        max_bytes = static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return max_bytes;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_num_lines.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_num_lines.cpp
deleted file mode 100644
index 614949a6..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_num_lines.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-std::uint64_t query_num_lines(const SqliteDatabase &db,
-                              const std::string &gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT total_lines FROM metadata WHERE file_id = "
-                    "(SELECT id FROM files WHERE logical_name = ? LIMIT 1)");
-    std::uint64_t total_lines = 0;
-
-    stmt.bind_text(1, gz_path_logical_path);
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        total_lines = static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return total_lines;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_schema_validity.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_schema_validity.cpp
deleted file mode 100644
index 43b679f3..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_schema_validity.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-bool query_schema_validity(const SqliteDatabase &db) {
-    SqliteStmt stmt(db,
-                    "SELECT name FROM sqlite_master WHERE type='table' AND "
-                    "name IN ('checkpoints', 'metadata', 'files')");
-    int table_count = 0;
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        table_count++;
-    }
-
-    return table_count >= 3;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_stored_file_info.cpp b/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_stored_file_info.cpp
deleted file mode 100644
index f891f9bb..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/gzip/queries/query_stored_file_info.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/gzip/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::gzip {
-
-bool query_stored_file_info(const SqliteDatabase &db,
-                            const std::string &gz_path,
-                            std::uint64_t &stored_hash, time_t &stored_mtime) {
-    SqliteStmt stmt(db,
-                    "SELECT hash, mtime_unix FROM files WHERE "
-                    "logical_name = ? LIMIT 1");
-
-    stmt.bind_text(1, gz_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        std::uint64_t hash =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        if (hash == 0) {
-            return false;
-        }
-        stored_hash = hash;
-        stored_mtime = static_cast<time_t>(sqlite3_column_int64(stmt, 1));
-        return true;
-    }
-
-    return false;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::gzip
diff --git a/src/dftracer/utils/utilities/indexer/internal/helpers.cpp b/src/dftracer/utils/utilities/indexer/internal/helpers.cpp
index 87789f53..e16da3ee 100644
--- a/src/dftracer/utils/utilities/indexer/internal/helpers.cpp
+++ b/src/dftracer/utils/utilities/indexer/internal/helpers.cpp
@@ -23,6 +23,21 @@ std::string get_logical_path(std::string_view path) {
     return fs_path.filename().string();
 }
 
+std::string normalize_index_root(std::string_view path) {
+    fs::path input{std::string(path)};
+    if (input.filename() == ".dftindex") {
+        return input.string();
+    }
+    if (input.parent_path().filename() == ".dftindex") {
+        return input.parent_path().string();
+    }
+    if (input.extension() == ".idx" || input.extension() == ".pidx" ||
+        input.has_extension()) {
+        return (input.parent_path() / ".dftindex").string();
+    }
+    return (input / ".dftindex").string();
+}
+
 time_t get_file_modification_time(const std::string &file_path) {
 #if defined(DFTRACER_UTILS_USE_STD_FS)
     // Use std::filesystem when available and working
@@ -118,12 +133,10 @@ std::uint64_t file_size_bytes(const std::string &path) {
     ::close(fd);
     if (pos < 0) return 0;
     return static_cast<std::uint64_t>(pos);
-    if (pos < 0) return 0;
-    return static_cast<std::uint64_t>(pos);
 }
 
-bool index_exists_and_valid(const std::string &idx_path) {
-    return fs::exists(idx_path) && fs::is_regular_file(idx_path);
+bool index_exists_and_valid(const std::string &index_path) {
+    return fs::exists(index_path) && fs::is_directory(index_path);
 }
 
 }  // namespace dftracer::utils::utilities::indexer::internal
diff --git a/src/dftracer/utils/utilities/indexer/internal/helpers.h b/src/dftracer/utils/utilities/indexer/internal/helpers.h
index 024072f3..05504215 100644
--- a/src/dftracer/utils/utilities/indexer/internal/helpers.h
+++ b/src/dftracer/utils/utilities/indexer/internal/helpers.h
@@ -9,10 +9,11 @@
 namespace dftracer::utils::utilities::indexer::internal {
 
 std::string get_logical_path(std::string_view path);
+std::string normalize_index_root(std::string_view path);
 time_t get_file_modification_time(const std::string &file_path);
 std::uint64_t calculate_file_hash(const std::string &file_path);
 std::uint64_t file_size_bytes(const std::string &path);
-bool index_exists_and_valid(const std::string &idx_path);
+bool index_exists_and_valid(const std::string &index_path);
 
 }  // namespace dftracer::utils::utilities::indexer::internal
 
diff --git a/src/dftracer/utils/utilities/indexer/internal/indexer_c.cpp b/src/dftracer/utils/utilities/indexer/internal/indexer_c.cpp
index 6b32f2d8..637ab38e 100644
--- a/src/dftracer/utils/utilities/indexer/internal/indexer_c.cpp
+++ b/src/dftracer/utils/utilities/indexer/internal/indexer_c.cpp
@@ -18,10 +18,10 @@ static std::shared_ptr<Indexer> *cast_indexer(dft_indexer_handle_t indexer) {
 }
 
 dft_indexer_handle_t dft_indexer_create(const char *gz_path,
-                                        const char *idx_path,
+                                        const char *index_path,
                                         uint64_t checkpoint_size,
                                         int force_rebuild) {
-    if (!gz_path || !idx_path || checkpoint_size == 0) {
+    if (!gz_path || !index_path || checkpoint_size == 0) {
         DFTRACER_UTILS_LOG_ERROR("%s",
                                  "Invalid parameters for indexer creation");
         return nullptr;
@@ -29,7 +29,7 @@ dft_indexer_handle_t dft_indexer_create(const char *gz_path,
 
     try {
         auto indexer = IndexerFactory::create(
-            gz_path, idx_path, checkpoint_size, force_rebuild != 0);
+            gz_path, index_path, checkpoint_size, force_rebuild != 0);
         if (indexer) {
             return static_cast<dft_indexer_handle_t>(
                 new std::shared_ptr<Indexer>(indexer));
diff --git a/src/dftracer/utils/utilities/indexer/internal/indexer_factory.cpp b/src/dftracer/utils/utilities/indexer/internal/indexer_factory.cpp
index 5ffc9c67..ab558a18 100644
--- a/src/dftracer/utils/utilities/indexer/internal/indexer_factory.cpp
+++ b/src/dftracer/utils/utilities/indexer/internal/indexer_factory.cpp
@@ -1,5 +1,6 @@
 #include <dftracer/utils/core/common/format_detector.h>
 #include <dftracer/utils/core/common/logging.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/internal/gzip/gzip_indexer.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 #include <dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h>
@@ -7,12 +8,13 @@
 namespace dftracer::utils::utilities::indexer::internal {
 
 std::shared_ptr<Indexer> IndexerFactory::create(const std::string &archive_path,
-                                                const std::string &idx_path,
+                                                const std::string &index_path,
                                                 std::uint64_t checkpoint_size,
                                                 bool force) {
     ArchiveFormat format = FormatDetector::detect(archive_path);
-    std::string final_idx_path =
-        idx_path.empty() ? generate_index_path(archive_path, format) : idx_path;
+    std::string final_idx_path = index_path.empty()
+                                     ? generate_index_path(archive_path, format)
+                                     : index_path;
 
     switch (format) {
         case ArchiveFormat::GZIP:
@@ -38,25 +40,23 @@ ArchiveFormat IndexerFactory::detect_format(const std::string &archive_path) {
 
 std::string IndexerFactory::generate_index_path(const std::string &archive_path,
                                                 ArchiveFormat format) {
-    // Auto-detect format if not specified
     if (format == ArchiveFormat::UNKNOWN) {
         format = FormatDetector::detect(archive_path);
     }
 
     switch (format) {
         case ArchiveFormat::GZIP:
-            return archive_path + ".idx";
-
         case ArchiveFormat::TAR_GZ:
-            return archive_path + ".idx.tar";
+            return composites::dft::internal::determine_index_path(archive_path,
+                                                                   "");
 
         case ArchiveFormat::UNKNOWN:
         default:
-            // Fallback to generic .idx extension
             DFTRACER_UTILS_LOG_WARN(
-                "Unknown format for %s, using generic .idx extension",
+                "Unknown format for %s, using root-local .dftindex",
                 archive_path.c_str());
-            return archive_path + ".idx";
+            return composites::dft::internal::determine_index_path(archive_path,
+                                                                   "");
     }
 }
 
diff --git a/src/dftracer/utils/utilities/indexer/internal/sqlite/database.h b/src/dftracer/utils/utilities/indexer/internal/sqlite/database.h
deleted file mode 100644
index 9fff3dfa..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/sqlite/database.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_DATABASE_H
-#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_DATABASE_H
-
-// Forwarding header: SqliteDatabase has moved to core/sqlite/.
-// This header re-exports it into the indexer::internal namespace
-// for backward compatibility.
-#include <dftracer/utils/core/sqlite/database.h>
-
-namespace dftracer::utils::utilities::indexer::internal {
-using dftracer::utils::sqlite::SqliteDatabase;
-}  // namespace dftracer::utils::utilities::indexer::internal
-
-#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_DATABASE_H
diff --git a/src/dftracer/utils/utilities/indexer/internal/sqlite/statement.h b/src/dftracer/utils/utilities/indexer/internal/sqlite/statement.h
deleted file mode 100644
index 6e831833..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/sqlite/statement.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_STATEMENT_H
-#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_STATEMENT_H
-
-// Forwarding header: SqliteStmt has moved to core/sqlite/.
-// This header re-exports it into the indexer::internal namespace
-// for backward compatibility.
-#include <dftracer/utils/core/sqlite/statement.h>
-
-namespace dftracer::utils::utilities::indexer::internal {
-using dftracer::utils::sqlite::SqliteStmt;
-}  // namespace dftracer::utils::utilities::indexer::internal
-
-#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_SQLITE_STATEMENT_H
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_metadata_record.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_metadata_record.cpp
deleted file mode 100644
index 64f8e3d6..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_metadata_record.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-void insert_archive_metadata_record(const SqliteDatabase &db, int archive_id,
-                                    std::size_t ckpt_size,
-                                    std::uint64_t total_lines,
-                                    std::uint64_t total_uc_size) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO metadata(archive_id, checkpoint_size, "
-                    "total_lines, total_uc_size) "
-                    "VALUES(?, ?, ?, ?) "
-                    "ON CONFLICT(archive_id) DO UPDATE SET "
-                    "checkpoint_size=excluded.checkpoint_size, "
-                    "total_lines=excluded.total_lines, "
-                    "total_uc_size=excluded.total_uc_size;");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_int64(2, static_cast<int64_t>(ckpt_size));
-    stmt.bind_int64(3, static_cast<int64_t>(total_lines));
-    stmt.bind_int64(4, static_cast<int64_t>(total_uc_size));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Insert archive metadata failed: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_record.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_record.cpp
deleted file mode 100644
index 0917fcbe..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_archive_record.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-void insert_archive_record(const SqliteDatabase &db, int file_id,
-                           const std::string &archive_name,
-                           std::uint64_t uncompressed_size,
-                           std::uint64_t total_files, int &archive_id) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO tar_archives(file_id, archive_name, "
-                    "uncompressed_size, total_files) "
-                    "VALUES(?, ?, ?, ?) "
-                    "ON CONFLICT(file_id) DO UPDATE SET "
-                    "archive_name=excluded.archive_name, "
-                    "uncompressed_size=excluded.uncompressed_size, "
-                    "total_files=excluded.total_files "
-                    "RETURNING id;");
-
-    stmt.bind_int(1, file_id);
-    stmt.bind_text(2, archive_name);
-    stmt.bind_int64(3, static_cast<int64_t>(uncompressed_size));
-    stmt.bind_int64(4, static_cast<int64_t>(total_files));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(
-            IndexerError::Type::DATABASE_ERROR,
-            "Insert archive failed: " + std::string(sqlite3_errmsg(db.get())));
-    }
-
-    archive_id = sqlite3_column_int(stmt, 0);
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_file_record.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_file_record.cpp
deleted file mode 100644
index 5795f667..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_file_record.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-void insert_file_record(const SqliteDatabase &db,
-                        const std::string &tar_gz_path_logical_path,
-                        std::size_t bytes, std::time_t file_mtime,
-                        std::uint64_t file_hash, int &db_file_id) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO files(logical_name, byte_size, "
-                    "mtime_unix, hash) "
-                    "VALUES(?, ?, ?, ?) "
-                    "ON CONFLICT(logical_name) DO UPDATE SET "
-                    "byte_size=excluded.byte_size, "
-                    "mtime_unix=excluded.mtime_unix, "
-                    "hash=excluded.hash "
-                    "RETURNING id;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-    stmt.bind_int64(2, static_cast<int64_t>(bytes));
-    stmt.bind_int64(3, static_cast<int64_t>(file_mtime));
-    stmt.bind_int64(4, static_cast<int64_t>(file_hash));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(
-            IndexerError::Type::DATABASE_ERROR,
-            "Insert failed: " + std::string(sqlite3_errmsg(db.get())));
-    }
-
-    db_file_id = sqlite3_column_int(stmt, 0);
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_checkpoint_record.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_checkpoint_record.cpp
deleted file mode 100644
index 9185fea0..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_checkpoint_record.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-void insert_tar_checkpoint_record(const SqliteDatabase &db, int archive_id,
-                                  const InsertTarCheckpointData &data) {
-    SqliteStmt stmt(
-        db,
-        "INSERT INTO tar_gzip_checkpoints(archive_id, checkpoint_idx, "
-        "uc_offset, uc_size, c_offset, c_size, bits, dict_compressed, "
-        "num_lines, first_line_num, last_line_num, tar_files_count) "
-        "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_int64(2, static_cast<int64_t>(data.idx));
-    stmt.bind_int64(3, static_cast<int64_t>(data.uc_offset));
-    stmt.bind_int64(4, static_cast<int64_t>(data.uc_size));
-    stmt.bind_int64(5, static_cast<int64_t>(data.c_offset));
-    stmt.bind_int64(6, static_cast<int64_t>(data.c_size));
-    stmt.bind_int(7, data.bits);
-    stmt.bind_blob(8, data.compressed_dict,
-                   static_cast<int>(data.compressed_dict_size));
-    stmt.bind_int64(9, static_cast<int64_t>(data.num_lines));
-    stmt.bind_int64(10, static_cast<int64_t>(data.first_line_num));
-    stmt.bind_int64(11, static_cast<int64_t>(data.last_line_num));
-    stmt.bind_int64(12, static_cast<int64_t>(data.tar_files_count));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Insert TAR checkpoint failed: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_file_record.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_file_record.cpp
deleted file mode 100644
index c5d2f2a5..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/insert_tar_file_record.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-void insert_tar_file_record(const SqliteDatabase &db, int archive_id,
-                            const InsertTarFileData &data) {
-    SqliteStmt stmt(db,
-                    "INSERT INTO tar_files(archive_id, file_name, file_size, "
-                    "file_mtime, typeflag, data_offset, uncompressed_offset) "
-                    "VALUES(?, ?, ?, ?, ?, ?, ?);");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_text(2, data.file_name);
-    stmt.bind_int64(3, static_cast<int64_t>(data.file_size));
-    stmt.bind_int64(4, static_cast<int64_t>(data.file_mtime));
-    stmt.bind_text(5, std::string(1, data.typeflag));
-    stmt.bind_int64(6, static_cast<int64_t>(data.data_offset));
-    stmt.bind_int64(7, static_cast<int64_t>(data.uncompressed_offset));
-
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_ROW && rc != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Insert TAR file record failed: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/queries.h b/src/dftracer/utils/utilities/indexer/internal/tar/queries/queries.h
deleted file mode 100644
index 395729e0..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/queries.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TAR_QUERIES_H
-#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TAR_QUERIES_H
-
-#include <dftracer/utils/core/sqlite/database.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
-#include <dftracer/utils/utilities/indexer/internal/error.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h>
-
-#include <cstddef>
-#include <cstdint>
-#include <ctime>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-using dftracer::utils::sqlite::SqliteDatabase;
-using dftracer::utils::sqlite::SqliteStmt;
-
-// File and archive management
-void insert_file_record(const SqliteDatabase &db,
-                        const std::string &tar_gz_path_logical_path,
-                        std::size_t bytes, std::time_t file_mtime,
-                        std::uint64_t file_sha256, int &db_file_id);
-
-void insert_archive_record(const SqliteDatabase &db, int file_id,
-                           const std::string &archive_name,
-                           std::uint64_t uncompressed_size,
-                           std::uint64_t total_files, int &archive_id);
-
-void insert_archive_metadata_record(const SqliteDatabase &db, int archive_id,
-                                    std::size_t ckpt_size,
-                                    std::uint64_t total_lines,
-                                    std::uint64_t total_uc_size);
-
-bool query_stored_file_info(const SqliteDatabase &db,
-                            const std::string &tar_gz_path,
-                            std::uint64_t &stored_hash,
-                            std::time_t &stored_mtime);
-
-// TAR file entries
-struct InsertTarFileData {
-    std::string file_name;
-    std::uint64_t file_size;
-    std::uint64_t file_mtime;
-    char typeflag;
-    std::uint64_t data_offset;
-    std::uint64_t uncompressed_offset;
-};
-
-void insert_tar_file_record(const SqliteDatabase &db, int archive_id,
-                            const InsertTarFileData &data);
-
-std::vector<TarIndexer::TarFileInfo> query_tar_files(const SqliteDatabase &db,
-                                                     int archive_id);
-
-bool query_tar_file(const SqliteDatabase &db, int archive_id,
-                    const std::string &file_name,
-                    TarIndexer::TarFileInfo &file_info);
-
-std::vector<TarIndexer::TarFileInfo> query_tar_files_in_range(
-    const SqliteDatabase &db, int archive_id, std::uint64_t start_offset,
-    std::uint64_t end_offset);
-
-// GZIP checkpoints for TAR archives
-struct InsertTarCheckpointData {
-    std::uint64_t idx;
-    std::uint64_t uc_offset;
-    std::uint64_t uc_size;
-    std::uint64_t c_size;
-    std::uint64_t c_offset;
-    int bits;
-    const void *compressed_dict;
-    std::size_t compressed_dict_size;
-    std::uint64_t num_lines;
-    std::uint64_t first_line_num;
-    std::uint64_t last_line_num;
-    std::uint64_t tar_files_count;
-};
-
-void insert_tar_checkpoint_record(const SqliteDatabase &db, int archive_id,
-                                  const InsertTarCheckpointData &data);
-
-// Database queries
-bool query_schema_validity(const SqliteDatabase &db);
-bool delete_archive_record(const SqliteDatabase &db, int archive_id);
-std::uint64_t query_max_bytes(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path);
-std::uint64_t query_num_lines(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path);
-std::uint64_t query_num_files(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path);
-std::string query_archive_name(const SqliteDatabase &db,
-                               const std::string &tar_gz_path_logical_path);
-int query_archive_id(const SqliteDatabase &db,
-                     const std::string &tar_gz_path_logical_path);
-
-bool query_tar_checkpoint(const SqliteDatabase &db, std::size_t target_offset,
-                          int archive_id, IndexerCheckpoint &checkpoint);
-std::vector<IndexerCheckpoint> query_tar_checkpoints(const SqliteDatabase &db,
-                                                     int archive_id);
-std::vector<IndexerCheckpoint> query_tar_checkpoints_for_line_range(
-    const SqliteDatabase &db, int archive_id, std::uint64_t start_line,
-    std::uint64_t end_line);
-std::uint64_t query_checkpoint_size(const SqliteDatabase &db, int archive_id);
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
-
-#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TAR_QUERIES_H
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_archive_id.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_archive_id.cpp
deleted file mode 100644
index 2ea994ba..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_archive_id.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-int query_archive_id(const SqliteDatabase &db,
-                     const std::string &tar_gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT ta.id "
-                    "FROM tar_archives ta "
-                    "JOIN files f ON ta.file_id = f.id "
-                    "WHERE f.logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-
-    int rc = sqlite3_step(stmt);
-    if (rc == SQLITE_ROW) {
-        return sqlite3_column_int(stmt, 0);
-    } else if (rc == SQLITE_DONE) {
-        return -1;  // Not found
-    } else {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Query archive ID failed: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_metadata.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_metadata.cpp
deleted file mode 100644
index 413f3dda..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_metadata.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-std::uint64_t query_max_bytes(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT m.total_uc_size "
-                    "FROM metadata m "
-                    "JOIN tar_archives ta ON m.archive_id = ta.id "
-                    "JOIN files f ON ta.file_id = f.id "
-                    "WHERE f.logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return 0;
-}
-
-std::uint64_t query_num_lines(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT m.total_lines "
-                    "FROM metadata m "
-                    "JOIN tar_archives ta ON m.archive_id = ta.id "
-                    "JOIN files f ON ta.file_id = f.id "
-                    "WHERE f.logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return 0;
-}
-
-std::uint64_t query_num_files(const SqliteDatabase &db,
-                              const std::string &tar_gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT ta.total_files "
-                    "FROM tar_archives ta "
-                    "JOIN files f ON ta.file_id = f.id "
-                    "WHERE f.logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return 0;
-}
-
-std::string query_archive_name(const SqliteDatabase &db,
-                               const std::string &tar_gz_path_logical_path) {
-    SqliteStmt stmt(db,
-                    "SELECT ta.archive_name "
-                    "FROM tar_archives ta "
-                    "JOIN files f ON ta.file_id = f.id "
-                    "WHERE f.logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path_logical_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        const char *name =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 0));
-        return name ? std::string(name) : "";
-    }
-
-    return "";
-}
-
-std::uint64_t query_checkpoint_size(const SqliteDatabase &db, int archive_id) {
-    SqliteStmt stmt(db,
-                    "SELECT checkpoint_size "
-                    "FROM metadata "
-                    "WHERE archive_id = ?;");
-
-    stmt.bind_int(1, archive_id);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        return static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-    }
-
-    return 0;
-}
-
-bool query_stored_file_info(const SqliteDatabase &db,
-                            const std::string &tar_gz_path,
-                            std::uint64_t &stored_hash,
-                            std::time_t &stored_mtime) {
-    SqliteStmt stmt(db,
-                    "SELECT hash, mtime_unix "
-                    "FROM files "
-                    "WHERE logical_name = ?;");
-
-    stmt.bind_text(1, tar_gz_path);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        std::uint64_t hash =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        if (hash == 0) {
-            return false;  // No valid hash stored
-        }
-        stored_hash = hash;
-        stored_mtime = static_cast<std::time_t>(sqlite3_column_int64(stmt, 1));
-        return true;
-    }
-
-    return false;
-}
-
-bool query_schema_validity(const SqliteDatabase &db) {
-    try {
-        SqliteStmt stmt(db,
-                        "SELECT COUNT(*) FROM sqlite_master WHERE type='table' "
-                        "AND name IN ('files', 'tar_archives', 'tar_files', "
-                        "'tar_gzip_checkpoints', 'metadata');");
-
-        if (sqlite3_step(stmt) == SQLITE_ROW) {
-            int table_count = sqlite3_column_int(stmt, 0);
-            return table_count == 5;  // Should have all 5 tables
-        }
-    } catch (...) {
-        return false;
-    }
-    return false;
-}
-
-bool delete_archive_record(const SqliteDatabase &db, int archive_id) {
-    SqliteStmt stmt(db, "DELETE FROM tar_archives WHERE id = ?;");
-    stmt.bind_int(1, archive_id);
-
-    int rc = sqlite3_step(stmt);
-    return rc == SQLITE_DONE;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_checkpoints.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_checkpoints.cpp
deleted file mode 100644
index 62a47d31..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_checkpoints.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-#include <cstring>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-bool query_tar_checkpoint(const SqliteDatabase &db, std::size_t target_offset,
-                          int archive_id, IndexerCheckpoint &checkpoint) {
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, "
-        "bits, dict_compressed, num_lines, first_line_num, last_line_num "
-        "FROM tar_gzip_checkpoints "
-        "WHERE archive_id = ? AND uc_offset <= ? "
-        "ORDER BY uc_offset DESC "
-        "LIMIT 1;");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_int64(2, static_cast<int64_t>(target_offset));
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-
-        // Copy compressed dictionary
-        const void *dict_data = sqlite3_column_blob(stmt, 6);
-        int dict_size = sqlite3_column_bytes(stmt, 6);
-        if (dict_data && dict_size > 0) {
-            checkpoint.dict_compressed.resize(dict_size);
-            std::memcpy(checkpoint.dict_compressed.data(), dict_data,
-                        dict_size);
-        }
-
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        checkpoint.first_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 8));
-        checkpoint.last_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 9));
-
-        return true;
-    }
-
-    return false;
-}
-
-std::vector<IndexerCheckpoint> query_tar_checkpoints(const SqliteDatabase &db,
-                                                     int archive_id) {
-    std::vector<IndexerCheckpoint> checkpoints;
-
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, "
-        "bits, dict_compressed, num_lines, first_line_num, last_line_num "
-        "FROM tar_gzip_checkpoints "
-        "WHERE archive_id = ? "
-        "ORDER BY checkpoint_idx;");
-
-    stmt.bind_int(1, archive_id);
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        IndexerCheckpoint checkpoint;
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-
-        // Copy compressed dictionary
-        const void *dict_data = sqlite3_column_blob(stmt, 6);
-        int dict_size = sqlite3_column_bytes(stmt, 6);
-        if (dict_data && dict_size > 0) {
-            checkpoint.dict_compressed.resize(dict_size);
-            std::memcpy(checkpoint.dict_compressed.data(), dict_data,
-                        dict_size);
-        }
-
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        checkpoint.first_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 8));
-        checkpoint.last_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 9));
-
-        checkpoints.push_back(checkpoint);
-    }
-
-    return checkpoints;
-}
-
-std::vector<IndexerCheckpoint> query_tar_checkpoints_for_line_range(
-    const SqliteDatabase &db, int archive_id, std::uint64_t start_line,
-    std::uint64_t end_line) {
-    std::vector<IndexerCheckpoint> checkpoints;
-
-    SqliteStmt stmt(
-        db,
-        "SELECT checkpoint_idx, uc_offset, uc_size, c_offset, c_size, "
-        "bits, dict_compressed, num_lines, first_line_num, last_line_num "
-        "FROM tar_gzip_checkpoints "
-        "WHERE archive_id = ? AND "
-        "((first_line_num <= ? AND last_line_num >= ?) OR "
-        " (first_line_num <= ? AND last_line_num >= ?)) "
-        "ORDER BY checkpoint_idx;");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_int64(2, static_cast<int64_t>(start_line));
-    stmt.bind_int64(3, static_cast<int64_t>(start_line));
-    stmt.bind_int64(4, static_cast<int64_t>(end_line));
-    stmt.bind_int64(5, static_cast<int64_t>(end_line));
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        IndexerCheckpoint checkpoint;
-        checkpoint.checkpoint_idx =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 0));
-        checkpoint.uc_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        checkpoint.uc_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-        checkpoint.c_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 3));
-        checkpoint.c_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        checkpoint.bits = sqlite3_column_int(stmt, 5);
-
-        // Copy compressed dictionary
-        const void *dict_data = sqlite3_column_blob(stmt, 6);
-        int dict_size = sqlite3_column_bytes(stmt, 6);
-        if (dict_data && dict_size > 0) {
-            checkpoint.dict_compressed.resize(dict_size);
-            std::memcpy(checkpoint.dict_compressed.data(), dict_data,
-                        dict_size);
-        }
-
-        checkpoint.num_lines =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 7));
-        checkpoint.first_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 8));
-        checkpoint.last_line_num =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 9));
-
-        checkpoints.push_back(checkpoint);
-    }
-
-    return checkpoints;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_files.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_files.cpp
deleted file mode 100644
index 98098908..00000000
--- a/src/dftracer/utils/utilities/indexer/internal/tar/queries/query_tar_files.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
-
-namespace dftracer::utils::utilities::indexer::internal::tar {
-
-std::vector<TarIndexer::TarFileInfo> query_tar_files(const SqliteDatabase &db,
-                                                     int archive_id) {
-    std::vector<TarIndexer::TarFileInfo> files;
-
-    SqliteStmt stmt(db,
-                    "SELECT file_name, file_size, file_mtime, typeflag, "
-                    "data_offset, uncompressed_offset "
-                    "FROM tar_files "
-                    "WHERE archive_id = ? "
-                    "ORDER BY uncompressed_offset;");
-
-    stmt.bind_int(1, archive_id);
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        TarIndexer::TarFileInfo file_info;
-        file_info.file_name =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 0));
-        file_info.file_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        file_info.file_mtime =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-
-        const char *typeflag_str =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 3));
-        file_info.typeflag = typeflag_str ? typeflag_str[0] : '0';
-
-        file_info.data_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        file_info.uncompressed_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 5));
-
-        files.push_back(file_info);
-    }
-
-    return files;
-}
-
-bool query_tar_file(const SqliteDatabase &db, int archive_id,
-                    const std::string &file_name,
-                    TarIndexer::TarFileInfo &file_info) {
-    SqliteStmt stmt(db,
-                    "SELECT file_name, file_size, file_mtime, typeflag, "
-                    "data_offset, uncompressed_offset "
-                    "FROM tar_files "
-                    "WHERE archive_id = ? AND file_name = ?;");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_text(2, file_name);
-
-    if (sqlite3_step(stmt) == SQLITE_ROW) {
-        file_info.file_name =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 0));
-        file_info.file_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        file_info.file_mtime =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-
-        const char *typeflag_str =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 3));
-        file_info.typeflag = typeflag_str ? typeflag_str[0] : '0';
-
-        file_info.data_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        file_info.uncompressed_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 5));
-
-        return true;
-    }
-
-    return false;
-}
-
-std::vector<TarIndexer::TarFileInfo> query_tar_files_in_range(
-    const SqliteDatabase &db, int archive_id, std::uint64_t start_offset,
-    std::uint64_t end_offset) {
-    std::vector<TarIndexer::TarFileInfo> files;
-
-    SqliteStmt stmt(db,
-                    "SELECT file_name, file_size, file_mtime, typeflag, "
-                    "data_offset, uncompressed_offset "
-                    "FROM tar_files "
-                    "WHERE archive_id = ? AND uncompressed_offset >= ? AND "
-                    "uncompressed_offset < ? "
-                    "ORDER BY uncompressed_offset;");
-
-    stmt.bind_int(1, archive_id);
-    stmt.bind_int64(2, static_cast<int64_t>(start_offset));
-    stmt.bind_int64(3, static_cast<int64_t>(end_offset));
-
-    while (sqlite3_step(stmt) == SQLITE_ROW) {
-        TarIndexer::TarFileInfo file_info;
-        file_info.file_name =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 0));
-        file_info.file_size =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-        file_info.file_mtime =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 2));
-
-        const char *typeflag_str =
-            reinterpret_cast<const char *>(sqlite3_column_text(stmt, 3));
-        file_info.typeflag = typeflag_str ? typeflag_str[0] : '0';
-
-        file_info.data_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 4));
-        file_info.uncompressed_offset =
-            static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 5));
-
-        files.push_back(file_info);
-    }
-
-    return files;
-}
-
-}  // namespace dftracer::utils::utilities::indexer::internal::tar
\ No newline at end of file
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.cpp b/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.cpp
index 40b14cc3..157553ed 100644
--- a/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.cpp
+++ b/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.cpp
@@ -1,122 +1,193 @@
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
-#include <dftracer/utils/core/sqlite/async.h>
-#include <dftracer/utils/core/sqlite/statement.h>
+#include <dftracer/utils/core/rocksdb/async.h>
+#include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/common/gzip_inflater.h>
 #include <dftracer/utils/utilities/indexer/internal/error.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h>
 #include <dftracer/utils/utilities/indexer/internal/tar/tar_parser.h>
+#include <dftracer/utils/utilities/indexer/internal/transaction_scope.h>
+#include <fcntl.h>
+#include <unistd.h>
 
-#include <chrono>
-#include <fstream>
-#include <sstream>
+#include <algorithm>
 #include <utility>
 
 namespace dftracer::utils::utilities::indexer::internal::tar {
 
-// Import the SQL_SCHEMA from constants
-extern const char *const &SQL_SCHEMA;
+using dftracer::utils::utilities::indexer::IndexDatabase;
+namespace rocks = dftracer::utils::rocksdb;
 
-// Forward declare helper functions
-static dftracer::utils::coro::CoroTask<bool> build_tar_index(
-    const SqliteDatabase &db, int archive_id, const std::string &tar_gz_path,
-    std::uint64_t ckpt_size);
-static void init_tar_schema(const SqliteDatabase &db);
+namespace {
 
-TarIndexer::TarIndexer(const std::string &tar_gz_file_path,
-                       const std::string &index_path,
+std::string normalize_idx_path(const std::string& path) {
+    fs::path input(path);
+    if (input.filename() == ".dftindex") {
+        return input.string();
+    }
+    if (input.parent_path().filename() == ".dftindex") {
+        return input.parent_path().string();
+    }
+    if (input.has_extension()) {
+        return (input.parent_path() / ".dftindex").string();
+    }
+    return (input / ".dftindex").string();
+}
+
+dftracer::utils::coro::CoroTask<bool> build_tar_index(
+    IndexDatabase& db, int file_id, const std::string& tar_gz_path,
+    std::uint64_t ckpt_size) {
+    int fd = ::open(tar_gz_path.c_str(), O_RDONLY);
+    if (fd < 0) {
+        co_return false;
+    }
+
+    GzipInflater inflater;
+    off_t offset = 0;
+    if (!(co_await inflater.initialize(fd))) {
+        ::close(fd);
+        co_return false;
+    }
+
+    std::uint64_t total_lines = 0;
+    std::uint64_t total_uc_size = 0;
+    std::uint64_t current_uc_offset = 0;
+
+    TarParser parser;
+    std::vector<unsigned char> accumulated_data;
+    accumulated_data.reserve(1024 * 1024);
+
+    while (true) {
+        GzipInflaterResult result;
+        if (!(co_await inflater.read(fd, offset, result))) {
+            if (result.bytes_read == 0) {
+                break;
+            }
+            ::close(fd);
+            co_return false;
+        }
+
+        if (result.bytes_read == 0) {
+            break;
+        }
+
+        accumulated_data.insert(accumulated_data.end(), inflater.out_buffer,
+                                inflater.out_buffer + result.bytes_read);
+        current_uc_offset += result.bytes_read;
+        total_lines += result.lines_found;
+    }
+
+    std::vector<TarFileEntry> tar_entries;
+    if (!parser.parse_headers(accumulated_data.data(), accumulated_data.size(),
+                              0, tar_entries)) {
+        DFTRACER_UTILS_LOG_DEBUG("%s", "Failed to parse TAR headers");
+    }
+
+    total_uc_size = current_uc_offset;
+
+    auto* db_ptr = &db;
+    auto* tar_entries_ptr = &tar_entries;
+    const std::string archive_name = fs::path(tar_gz_path).filename().string();
+    const auto* archive_name_ptr = &archive_name;
+    co_await rocks::run([db_ptr, file_id, ckpt_size, total_lines, total_uc_size,
+                         tar_entries_ptr, archive_name_ptr] {
+        internal::TransactionScope txn(*db_ptr);
+        std::uint64_t regular_files = 0;
+        for (const auto& entry : *tar_entries_ptr) {
+            if (!entry.is_regular_file()) {
+                continue;
+            }
+
+            ++regular_files;
+            db_ptr->insert_tar_file(
+                file_id, IndexDatabase::TarFileRecord{
+                             .file_name = entry.name,
+                             .file_size = entry.size,
+                             .file_mtime = entry.mtime,
+                             .typeflag = entry.typeflag,
+                             .data_offset = entry.data_offset,
+                             .uncompressed_offset = entry.uncompressed_offset,
+                         });
+        }
+
+        db_ptr->insert_file_metadata(file_id, ckpt_size, total_lines,
+                                     total_uc_size);
+        db_ptr->insert_tar_archive_metadata(file_id, *archive_name_ptr,
+                                            ckpt_size, total_lines,
+                                            total_uc_size, regular_files);
+        txn.commit();
+    });
+
+    ::close(fd);
+    co_return true;
+}
+
+}  // namespace
+
+TarIndexer::TarIndexer(const std::string& tar_gz_file_path,
+                       const std::string& index_path_value,
                        std::uint64_t checkpoint_size, bool rebuild_force)
     : tar_gz_path(tar_gz_file_path),
-      idx_path(index_path),
+      tar_gz_path_logical_path(get_logical_path(tar_gz_file_path)),
+      index_path(normalize_idx_path(index_path_value)),
       ckpt_size(checkpoint_size),
-      force_rebuild(rebuild_force),
-      cached_is_valid(false),
-      cached_archive_id(-1),
-      cached_max_bytes(0),
-      cached_num_lines(0),
-      cached_num_files(0),
-      cached_checkpoint_size(0) {
+      force_rebuild(rebuild_force) {
     open();
 }
 
 TarIndexer::~TarIndexer() {
-    try {
-        DFTRACER_UTILS_LOG_DEBUG("Destroying TarIndexer for %s",
-                                 tar_gz_path.c_str());
-        if (db.is_open()) {
-            close();
-        }
-        DFTRACER_UTILS_LOG_DEBUG("TarIndexer destruction completed for %s",
-                                 tar_gz_path.c_str());
-    } catch (const std::exception &e) {
-        DFTRACER_UTILS_LOG_ERROR("Error during TarIndexer destruction: %s",
-                                 e.what());
-    } catch (...) {
-        DFTRACER_UTILS_LOG_ERROR("%s",
-                                 "Unknown error during TarIndexer destruction");
-    }
+    DFTRACER_UTILS_LOG_DEBUG("Destroying TarIndexer for %s",
+                             tar_gz_path.c_str());
+    close();
 }
 
-TarIndexer::TarIndexer(TarIndexer &&other) noexcept
+TarIndexer::TarIndexer(TarIndexer&& other) noexcept
     : tar_gz_path(std::move(other.tar_gz_path)),
       tar_gz_path_logical_path(std::move(other.tar_gz_path_logical_path)),
-      idx_path(std::move(other.idx_path)),
+      index_path(std::move(other.index_path)),
       ckpt_size(other.ckpt_size),
       force_rebuild(other.force_rebuild),
-      db(std::move(other.db)),
-      cached_is_valid(other.cached_is_valid),
-      cached_archive_id(other.cached_archive_id),
-      cached_max_bytes(other.cached_max_bytes),
-      cached_num_lines(other.cached_num_lines),
-      cached_num_files(other.cached_num_files),
-      cached_checkpoint_size(other.cached_checkpoint_size),
+      cached_is_valid(std::move(other.cached_is_valid)),
+      cached_archive_id(std::move(other.cached_archive_id)),
+      cached_max_bytes(std::move(other.cached_max_bytes)),
+      cached_num_lines(std::move(other.cached_num_lines)),
+      cached_num_files(std::move(other.cached_num_files)),
+      cached_checkpoint_size(std::move(other.cached_checkpoint_size)),
       cached_archive_name(std::move(other.cached_archive_name)),
       cached_checkpoints(std::move(other.cached_checkpoints)) {}
 
-TarIndexer &TarIndexer::operator=(TarIndexer &&other) noexcept {
+TarIndexer& TarIndexer::operator=(TarIndexer&& other) noexcept {
     if (this != &other) {
         tar_gz_path = std::move(other.tar_gz_path);
         tar_gz_path_logical_path = std::move(other.tar_gz_path_logical_path);
-        idx_path = std::move(other.idx_path);
+        index_path = std::move(other.index_path);
         ckpt_size = other.ckpt_size;
         force_rebuild = other.force_rebuild;
-        db = std::move(other.db);
-        cached_is_valid = other.cached_is_valid;
-        cached_archive_id = other.cached_archive_id;
-        cached_max_bytes = other.cached_max_bytes;
-        cached_num_lines = other.cached_num_lines;
-        cached_num_files = other.cached_num_files;
-        cached_checkpoint_size = other.cached_checkpoint_size;
+        std::scoped_lock lock(cache_mutex, other.cache_mutex);
+        cached_is_valid = std::move(other.cached_is_valid);
+        cached_archive_id = std::move(other.cached_archive_id);
+        cached_max_bytes = std::move(other.cached_max_bytes);
+        cached_num_lines = std::move(other.cached_num_lines);
+        cached_num_files = std::move(other.cached_num_files);
+        cached_checkpoint_size = std::move(other.cached_checkpoint_size);
         cached_archive_name = std::move(other.cached_archive_name);
         cached_checkpoints = std::move(other.cached_checkpoints);
     }
     return *this;
 }
 
-void TarIndexer::open() {
-    DFTRACER_UTILS_LOG_DEBUG("Opening TAR indexer database: %s",
-                             idx_path.c_str());
-
-    tar_gz_path_logical_path = get_logical_path(tar_gz_path);
-
-    if (!db.open(idx_path)) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to open database at " + idx_path);
-    }
-}
+void TarIndexer::open() {}
 
 void TarIndexer::close() {
-    db.close();
-    // Reset all cache
-    cached_is_valid = false;
-    cached_archive_id = -1;
-    cached_max_bytes = 0;
-    cached_num_lines = 0;
-    cached_num_files = 0;
-    cached_checkpoint_size = 0;
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    cached_is_valid.reset();
+    cached_archive_id.reset();
+    cached_max_bytes.reset();
+    cached_num_lines.reset();
+    cached_num_files.reset();
+    cached_checkpoint_size.reset();
     cached_archive_name.clear();
     cached_checkpoints.clear();
 }
@@ -126,51 +197,55 @@ dftracer::utils::coro::CoroTask<void> TarIndexer::build_async() const {
         co_return;
     }
 
-    co_await dftracer::utils::sqlite::run([&] {
-        init_tar_schema(db);
-
-        int aid = find_archive_id(tar_gz_path_logical_path);
-        if (aid != -1) {
-            delete_archive_record(db, aid);
-        }
-    });
-
-    printf("Get modifcation time for %s\n", tar_gz_path.c_str());
-    std::time_t mtime = get_file_modification_time(tar_gz_path);
-    printf("Calculate hash for %s\n", tar_gz_path.c_str());
-    auto hash = calculate_file_hash(tar_gz_path);
-    printf("Get size for %s\n", tar_gz_path.c_str());
-    std::uint64_t bytes = file_size_bytes(tar_gz_path);
-    // TODO: use determine_checkpoint_size like GZIP
-    std::uint64_t final_ckpt_size = ckpt_size;
-
-    auto [file_id, archive_id] = co_await dftracer::utils::sqlite::run([&] {
-        int fid;
-        insert_file_record(db, tar_gz_path_logical_path, bytes, mtime, hash,
-                           fid);
-
-        std::string archive_name = fs::path(tar_gz_path).filename().string();
-        int aid;
-        // Will update sizes later
-        insert_archive_record(db, fid, archive_name, 0, 0, aid);
-        return std::pair{fid, aid};
+    IndexDatabase db(index_path);
+    const auto hash = calculate_file_hash(tar_gz_path);
+    const std::string logical = tar_gz_path_logical_path;
+    const auto* logical_ptr = &logical;
+    const int file_id = co_await rocks::run([db_ptr = &db, logical_ptr, hash] {
+        return db_ptr->get_or_create_file_info(*logical_ptr, hash);
     });
 
-    if (!(co_await build_tar_index(db, archive_id, tar_gz_path,
-                                   final_ckpt_size))) {
+    if (!(co_await build_tar_index(db, file_id, tar_gz_path, ckpt_size))) {
         throw IndexerError(IndexerError::Type::BUILD_ERROR,
                            "Failed to build TAR index for " + tar_gz_path);
     }
 
-    // Reset cache to force refresh
+    struct CacheSnapshot {
+        std::uint64_t checkpoint_size = 0;
+        std::uint64_t num_lines = 0;
+        std::uint64_t max_bytes = 0;
+        std::uint64_t num_files = 0;
+        std::string archive_name;
+        std::vector<IndexerCheckpoint> checkpoints;
+    };
+    const std::string fallback_archive_name =
+        fs::path(tar_gz_path).filename().string();
+    const auto* fallback_archive_name_ptr = &fallback_archive_name;
+    auto snapshot =
+        co_await rocks::run([db_ptr = &db, file_id, fallback_archive_name_ptr] {
+            CacheSnapshot cache;
+            cache.checkpoint_size = db_ptr->get_checkpoint_size(file_id);
+            cache.num_lines = db_ptr->get_num_lines(file_id);
+            cache.max_bytes = db_ptr->get_max_bytes(file_id);
+            if (auto metadata = db_ptr->query_tar_archive_metadata(file_id)) {
+                cache.num_files = metadata->total_files;
+                cache.archive_name = metadata->archive_name;
+            } else {
+                cache.archive_name = *fallback_archive_name_ptr;
+            }
+            cache.checkpoints = db_ptr->query_checkpoints(file_id);
+            return cache;
+        });
+
+    std::lock_guard<std::mutex> lock(cache_mutex);
     cached_is_valid = true;
-    cached_archive_id = archive_id;
-    cached_max_bytes = 0;
-    cached_num_lines = 0;
-    cached_num_files = 0;
-    cached_checkpoint_size = final_ckpt_size;
-    cached_archive_name.clear();
-    cached_checkpoints.clear();
+    cached_archive_id = file_id;
+    cached_checkpoint_size = snapshot.checkpoint_size;
+    cached_num_lines = snapshot.num_lines;
+    cached_max_bytes = snapshot.max_bytes;
+    cached_num_files = snapshot.num_files;
+    cached_archive_name = std::move(snapshot.archive_name);
+    cached_checkpoints = std::move(snapshot.checkpoints);
     co_return;
 }
 
@@ -180,260 +255,290 @@ bool TarIndexer::need_rebuild() const {
     }
 
     try {
-        // Check if index exists and has valid schema
-        if (!query_schema_validity(db)) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        const auto stored_hash = db.get_file_hash(tar_gz_path_logical_path);
+        if (!stored_hash.has_value()) {
+            return true;
+        }
+
+        const int file_id = db.get_file_info_id(tar_gz_path_logical_path);
+        if (file_id < 0) {
             return true;
         }
 
-        // Check if file has been modified since last index
-        std::uint64_t stored_hash;
-        std::time_t stored_mtime;
-        if (query_stored_file_info(db, tar_gz_path_logical_path, stored_hash,
-                                   stored_mtime)) {
-            std::uint64_t current_hash = calculate_file_hash(tar_gz_path);
-            std::time_t current_mtime = get_file_modification_time(tar_gz_path);
+        if (db.get_checkpoint_size(file_id) == 0) {
+            return true;
+        }
 
-            return (stored_hash != current_hash ||
-                    stored_mtime != current_mtime);
+        if (!db.query_tar_archive_metadata(file_id).has_value()) {
+            return true;
         }
+
+        return *stored_hash != calculate_file_hash(tar_gz_path);
     } catch (...) {
         return true;
     }
-
-    return true;  // If we can't determine, rebuild to be safe
 }
 
 bool TarIndexer::is_valid() const {
-    if (!cached_is_valid) {
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    if (!cached_is_valid.has_value()) {
         try {
-            bool schema_valid = query_schema_validity(db);
-            bool has_data = (find_archive_id(tar_gz_path_logical_path) != -1);
-            cached_is_valid = schema_valid && has_data;
+            IndexDatabase db(
+                index_path,
+                dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+            const auto file_id = db.get_file_info_id(tar_gz_path_logical_path);
+            cached_is_valid =
+                file_id != -1 &&
+                db.query_tar_archive_metadata(file_id).has_value();
         } catch (...) {
             cached_is_valid = false;
         }
     }
-    return cached_is_valid;
+    return *cached_is_valid;
 }
 
 bool TarIndexer::exists() const {
-    return fs::exists(idx_path) && fs::is_regular_file(idx_path);
+    return fs::exists(index_path) && fs::is_directory(index_path);
 }
 
-const std::string &TarIndexer::get_idx_path() const { return idx_path; }
+const std::string& TarIndexer::get_index_path() const { return index_path; }
 
-const std::string &TarIndexer::get_archive_path() const { return tar_gz_path; }
+const std::string& TarIndexer::get_archive_path() const { return tar_gz_path; }
 
-const std::string &TarIndexer::get_tar_gz_path() const { return tar_gz_path; }
+const std::string& TarIndexer::get_tar_gz_path() const { return tar_gz_path; }
 
-std::uint64_t TarIndexer::get_checkpoint_size() const { return ckpt_size; }
+std::uint64_t TarIndexer::get_checkpoint_size() const {
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (cached_checkpoint_size.has_value()) {
+            return *cached_checkpoint_size;
+        }
+    }
+    const int file_id = get_archive_id();
+    if (file_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        const auto value = db.get_checkpoint_size(file_id);
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        cached_checkpoint_size = value;
+    }
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    return cached_checkpoint_size.value_or(0);
+}
 
 std::uint64_t TarIndexer::get_max_bytes() const {
-    if (cached_max_bytes == 0) {
-        cached_max_bytes = query_max_bytes(db, tar_gz_path_logical_path);
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (cached_max_bytes.has_value()) {
+            return *cached_max_bytes;
+        }
+    }
+    const int file_id = get_archive_id();
+    if (file_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        const auto value = db.get_max_bytes(file_id);
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        cached_max_bytes = value;
     }
-    return cached_max_bytes;
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    return cached_max_bytes.value_or(0);
 }
 
 std::uint64_t TarIndexer::get_num_lines() const {
-    if (cached_num_lines == 0) {
-        cached_num_lines = query_num_lines(db, tar_gz_path_logical_path);
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (cached_num_lines.has_value()) {
+            return *cached_num_lines;
+        }
+    }
+    const int file_id = get_archive_id();
+    if (file_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        const auto value = db.get_num_lines(file_id);
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        cached_num_lines = value;
     }
-    return cached_num_lines;
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    return cached_num_lines.value_or(0);
 }
 
 std::uint64_t TarIndexer::get_num_files() const {
-    if (cached_num_files == 0) {
-        cached_num_files = query_num_files(db, tar_gz_path_logical_path);
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (cached_num_files.has_value()) {
+            return *cached_num_files;
+        }
+    }
+    const int file_id = get_archive_id();
+    if (file_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        std::uint64_t value = 0;
+        if (auto metadata = db.query_tar_archive_metadata(file_id)) {
+            value = metadata->total_files;
+        }
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        cached_num_files = value;
     }
-    return cached_num_files;
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    return cached_num_files.value_or(0);
 }
 
 std::string TarIndexer::get_archive_name() const {
-    if (cached_archive_name.empty()) {
-        cached_archive_name = query_archive_name(db, tar_gz_path_logical_path);
-        if (cached_archive_name.empty()) {
-            cached_archive_name = fs::path(tar_gz_path).filename().string();
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (!cached_archive_name.empty()) {
+            return cached_archive_name;
         }
     }
+    std::string value;
+    const int file_id = get_archive_id();
+    if (file_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        if (auto metadata = db.query_tar_archive_metadata(file_id)) {
+            value = metadata->archive_name;
+        }
+    }
+    if (value.empty()) {
+        value = fs::path(tar_gz_path).filename().string();
+    }
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    cached_archive_name = value;
     return cached_archive_name;
 }
 
 int TarIndexer::get_archive_id() const {
-    if (cached_archive_id == -1) {
+    std::lock_guard<std::mutex> lock(cache_mutex);
+    if (!cached_archive_id.has_value()) {
         cached_archive_id = find_archive_id(tar_gz_path_logical_path);
     }
-    return cached_archive_id;
+    return *cached_archive_id;
 }
 
-int TarIndexer::find_archive_id(const std::string &tar_gz_file_path) const {
-    return query_archive_id(db, tar_gz_file_path);
+int TarIndexer::find_archive_id(const std::string& tar_gz_file_path) const {
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.get_file_info_id(tar_gz_file_path);
 }
 
 bool TarIndexer::find_checkpoint(std::size_t target_offset,
-                                 IndexerCheckpoint &checkpoint) const {
-    int archive_id = get_archive_id();
-    if (archive_id == -1) return false;
-    return query_tar_checkpoint(db, target_offset, archive_id, checkpoint);
+                                 IndexerCheckpoint& checkpoint) const {
+    const int archive_id = get_archive_id();
+    if (archive_id == -1) {
+        return false;
+    }
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.find_checkpoint(archive_id, target_offset, checkpoint);
 }
 
 std::vector<IndexerCheckpoint> TarIndexer::get_checkpoints() const {
-    if (cached_checkpoints.empty()) {
-        int archive_id = get_archive_id();
-        if (archive_id != -1) {
-            cached_checkpoints = query_tar_checkpoints(db, archive_id);
+    {
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        if (!cached_checkpoints.empty()) {
+            return cached_checkpoints;
         }
     }
+    const int archive_id = get_archive_id();
+    if (archive_id != -1) {
+        IndexDatabase db(
+            index_path,
+            dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+        auto checkpoints = db.query_checkpoints(archive_id);
+        std::lock_guard<std::mutex> lock(cache_mutex);
+        cached_checkpoints = std::move(checkpoints);
+    }
+    std::lock_guard<std::mutex> lock(cache_mutex);
     return cached_checkpoints;
 }
 
 std::vector<IndexerCheckpoint> TarIndexer::get_checkpoints_for_line_range(
     std::uint64_t start_line, std::uint64_t end_line) const {
-    int archive_id = get_archive_id();
-    if (archive_id == -1) return {};
-    return query_tar_checkpoints_for_line_range(db, archive_id, start_line,
-                                                end_line);
+    const int archive_id = get_archive_id();
+    if (archive_id == -1) {
+        return {};
+    }
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    return db.query_checkpoints_for_line_range(archive_id, start_line,
+                                               end_line);
 }
 
 std::vector<TarIndexer::TarFileInfo> TarIndexer::list_files() const {
-    int archive_id = get_archive_id();
-    if (archive_id == -1) return {};
+    const int archive_id = get_archive_id();
+    if (archive_id == -1) {
+        return {};
+    }
 
-    auto tar_files = query_tar_files(db, archive_id);
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    auto tar_files = db.query_tar_files(archive_id);
     std::vector<TarFileInfo> result;
     result.reserve(tar_files.size());
-
-    for (const auto &tf : tar_files) {
-        result.emplace_back(
-            TarFileInfo{tf.file_name, tf.file_size, tf.file_mtime, tf.typeflag,
-                        tf.data_offset, tf.uncompressed_offset});
+    for (const auto& tf : tar_files) {
+        result.push_back(TarFileInfo{tf.file_name, tf.file_size, tf.file_mtime,
+                                     tf.typeflag, tf.data_offset,
+                                     tf.uncompressed_offset});
     }
-
     return result;
 }
 
-bool TarIndexer::find_file(const std::string &file_name,
-                           TarFileInfo &file_info) const {
-    int archive_id = get_archive_id();
-    if (archive_id == -1) return false;
+bool TarIndexer::find_file(const std::string& file_name,
+                           TarFileInfo& file_info) const {
+    const int archive_id = get_archive_id();
+    if (archive_id == -1) {
+        return false;
+    }
+
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
+    IndexDatabase::TarFileRecord record;
+    if (!db.find_tar_file(archive_id, file_name, record)) {
+        return false;
+    }
 
-    return query_tar_file(db, archive_id, file_name, file_info);
+    file_info = TarFileInfo{record.file_name,   record.file_size,
+                            record.file_mtime,  record.typeflag,
+                            record.data_offset, record.uncompressed_offset};
+    return true;
 }
 
 std::vector<TarIndexer::TarFileInfo> TarIndexer::find_files_in_range(
     std::uint64_t start_offset, std::uint64_t end_offset) const {
-    int archive_id = get_archive_id();
-    if (archive_id == -1) return {};
+    const int archive_id = get_archive_id();
+    if (archive_id == -1) {
+        return {};
+    }
 
+    IndexDatabase db(
+        index_path,
+        dftracer::utils::rocksdb::RocksDatabase::OpenMode::ReadOnly);
     auto tar_files =
-        query_tar_files_in_range(db, archive_id, start_offset, end_offset);
+        db.query_tar_files_in_range(archive_id, start_offset, end_offset);
     std::vector<TarFileInfo> result;
     result.reserve(tar_files.size());
-
-    for (const auto &tf : tar_files) {
-        result.emplace_back(
-            TarFileInfo{tf.file_name, tf.file_size, tf.file_mtime, tf.typeflag,
-                        tf.data_offset, tf.uncompressed_offset});
+    for (const auto& tf : tar_files) {
+        result.push_back(TarFileInfo{tf.file_name, tf.file_size, tf.file_mtime,
+                                     tf.typeflag, tf.data_offset,
+                                     tf.uncompressed_offset});
     }
-
     return result;
 }
 
-// Include the helper functions from the impl file
-static void init_tar_schema(const SqliteDatabase &db) {
-    DFTRACER_UTILS_LOG_DEBUG("%s", "Initializing TAR indexer schema");
-    int rc = sqlite3_exec(db.get(), SQL_SCHEMA, NULL, NULL, NULL);
-    if (rc != SQLITE_OK) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to initialize TAR schema: " +
-                               std::string(sqlite3_errmsg(db.get())));
-    }
-}
-
-static dftracer::utils::coro::CoroTask<bool> build_tar_index(
-    const SqliteDatabase &db, int archive_id, const std::string &tar_gz_path,
-    std::uint64_t ckpt_size) {
-    int fd = ::open(tar_gz_path.c_str(), O_RDONLY);
-    if (fd < 0) {
-        co_return false;
-    }
-
-    GzipInflater inflater;
-    off_t offset = 0;
-    if (!(co_await inflater.initialize(fd))) {
-        ::close(fd);
-        co_return false;
-    }
-
-    std::uint64_t total_lines = 0;
-    std::uint64_t total_uc_size = 0;
-    std::uint64_t current_uc_offset = 0;
-
-    // Parse TAR format and extract file entries
-    TarParser parser;
-    std::vector<unsigned char> accumulated_data;
-    accumulated_data.reserve(1024 * 1024);  // Pre-allocate 1MB
-
-    while (true) {
-        // std::size_t chunk_start_uc = current_uc_offset;
-        // std::size_t chunk_start_c = inflater.get_total_input_consumed();
-
-        GzipInflaterResult result;
-        if (!(co_await inflater.read(fd, offset, result))) {
-            if (result.bytes_read == 0) {
-                break;        // EOF
-            }
-            ::close(fd);
-            co_return false;  // Error
-        }
-
-        if (result.bytes_read == 0) {
-            break;  // EOF
-        }
-
-        // Accumulate data for TAR parsing
-        accumulated_data.insert(accumulated_data.end(), inflater.out_buffer,
-                                inflater.out_buffer + result.bytes_read);
-
-        current_uc_offset += result.bytes_read;
-        total_lines += result.lines_found;
-    }
-
-    // Parse TAR entries from accumulated data
-    std::vector<TarFileEntry> tar_entries;
-    if (!parser.parse_headers(accumulated_data.data(), accumulated_data.size(),
-                              0, tar_entries)) {
-        DFTRACER_UTILS_LOG_DEBUG(
-            "%s", "Failed to parse TAR headers from accumulated data");
-        // Continue anyway - might be a malformed TAR or not actually TAR.GZ
-    }
-
-    // Insert TAR file entries and metadata into database
-    total_uc_size = current_uc_offset;
-    co_await dftracer::utils::sqlite::run([&] {
-        for (const auto &entry : tar_entries) {
-            if (entry.is_regular_file()) {
-                InsertTarFileData file_data;
-                file_data.file_name = entry.name;
-                file_data.file_size = entry.size;
-                file_data.file_mtime = entry.mtime;
-                file_data.typeflag = entry.typeflag;
-                file_data.data_offset = entry.data_offset;
-                file_data.uncompressed_offset = entry.uncompressed_offset;
-
-                insert_tar_file_record(db, archive_id, file_data);
-            }
-        }
-
-        DFTRACER_UTILS_LOG_DEBUG("Parsed %zu TAR file entries",
-                                 tar_entries.size());
-
-        insert_archive_metadata_record(db, archive_id, ckpt_size, total_lines,
-                                       total_uc_size);
-    });
-
-    ::close(fd);
-    co_return true;
-}
-
 }  // namespace dftracer::utils::utilities::indexer::internal::tar
diff --git a/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h b/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h
index 5fb79072..7889a2b1 100644
--- a/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h
+++ b/src/dftracer/utils/utilities/indexer/internal/tar/tar_indexer.h
@@ -4,25 +4,25 @@
 #include <dftracer/utils/core/common/archive_format.h>
 #include <dftracer/utils/core/common/constants.h>
 #include <dftracer/utils/core/coro/task.h>
-#include <dftracer/utils/core/sqlite/database.h>
+#include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/checkpoint.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer.h>
 
 #include <cstddef>
 #include <cstdint>
+#include <mutex>
+#include <optional>
 #include <string>
 #include <vector>
 
 namespace dftracer::utils::utilities::indexer::internal::tar {
 
-using dftracer::utils::sqlite::SqliteDatabase;
-
 class TarIndexer : public Indexer {
    public:
     static constexpr std::uint64_t DEFAULT_CHECKPOINT_SIZE =
         constants::indexer::DEFAULT_CHECKPOINT_SIZE;
 
-    TarIndexer(const std::string &tar_gz_path, const std::string &idx_path,
+    TarIndexer(const std::string &tar_gz_path, const std::string &index_path,
                std::uint64_t checkpoint_size = DEFAULT_CHECKPOINT_SIZE,
                bool force = false);
     ~TarIndexer();
@@ -35,7 +35,7 @@ class TarIndexer : public Indexer {
     bool need_rebuild() const override;
     bool exists() const override;
 
-    const std::string &get_idx_path() const override;
+    const std::string &get_index_path() const override;
     const std::string &get_archive_path() const override;
     const std::string &get_tar_gz_path() const;
     std::uint64_t get_checkpoint_size() const override;
@@ -80,20 +80,20 @@ class TarIndexer : public Indexer {
    private:
     std::string tar_gz_path;
     std::string tar_gz_path_logical_path;
-    std::string idx_path;
+    std::string index_path;
     std::uint64_t ckpt_size;
     bool force_rebuild;
-    SqliteDatabase db;
 
     // Cached values
-    mutable bool cached_is_valid;
-    mutable int cached_archive_id;
-    mutable std::uint64_t cached_max_bytes;
-    mutable std::uint64_t cached_num_lines;
-    mutable std::uint64_t cached_num_files;
-    mutable std::uint64_t cached_checkpoint_size;
+    mutable std::optional<bool> cached_is_valid;
+    mutable std::optional<int> cached_archive_id;
+    mutable std::optional<std::uint64_t> cached_max_bytes;
+    mutable std::optional<std::uint64_t> cached_num_lines;
+    mutable std::optional<std::uint64_t> cached_num_files;
+    mutable std::optional<std::uint64_t> cached_checkpoint_size;
     mutable std::string cached_archive_name;
     mutable std::vector<IndexerCheckpoint> cached_checkpoints;
+    mutable std::mutex cache_mutex;
 
     // Internal methods
     void open();
diff --git a/src/dftracer/utils/utilities/indexer/internal/transaction_scope.h b/src/dftracer/utils/utilities/indexer/internal/transaction_scope.h
new file mode 100644
index 00000000..b23a23e1
--- /dev/null
+++ b/src/dftracer/utils/utilities/indexer/internal/transaction_scope.h
@@ -0,0 +1,39 @@
+#ifndef DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TRANSACTION_SCOPE_H
+#define DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TRANSACTION_SCOPE_H
+
+namespace dftracer::utils::utilities::indexer::internal {
+
+template <typename Database>
+class TransactionScope {
+   public:
+    explicit TransactionScope(Database& db) : db_(db) {
+        db_.begin_transaction();
+    }
+
+    TransactionScope(const TransactionScope&) = delete;
+    TransactionScope& operator=(const TransactionScope&) = delete;
+
+    TransactionScope(TransactionScope&& other) noexcept
+        : db_(other.db_), committed_(other.committed_) {
+        other.committed_ = true;
+    }
+
+    ~TransactionScope() {
+        if (!committed_) {
+            db_.rollback_transaction();
+        }
+    }
+
+    void commit() {
+        db_.commit_transaction();
+        committed_ = true;
+    }
+
+   private:
+    Database& db_;
+    bool committed_ = false;
+};
+
+}  // namespace dftracer::utils::utilities::indexer::internal
+
+#endif  // DFTRACER_UTILS_UTILITIES_INDEXER_INTERNAL_TRANSACTION_SCOPE_H
diff --git a/src/dftracer/utils/utilities/indexer/provenance_database.cpp b/src/dftracer/utils/utilities/indexer/provenance_database.cpp
index ca65ef2f..4896a54e 100644
--- a/src/dftracer/utils/utilities/indexer/provenance_database.cpp
+++ b/src/dftracer/utils/utilities/indexer/provenance_database.cpp
@@ -1,206 +1,426 @@
 #include <dftracer/utils/core/common/filesystem.h>
-#include <dftracer/utils/core/sqlite/statement.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
+#include <dftracer/utils/core/rocksdb/key_codec.h>
 #include <dftracer/utils/utilities/indexer/internal/error.h>
+#include <dftracer/utils/utilities/indexer/internal/helpers.h>
+#include <dftracer/utils/utilities/indexer/internal/scan_prefix.h>
 #include <dftracer/utils/utilities/indexer/provenance_database.h>
 
+#include <stdexcept>
+#include <utility>
+
 namespace dftracer::utils::utilities::indexer {
 
-namespace queries = composites::dft::indexing::queries;
+namespace rocks = dftracer::utils::rocksdb;
 
-using dftracer::utils::sqlite::SqliteStmt;
 using internal::IndexerError;
 
-static const char* PROVENANCE_SCHEMA = R"(
-    PRAGMA journal_mode=WAL;
-    PRAGMA busy_timeout=5000;
-    PRAGMA foreign_keys=ON;
-
-    CREATE TABLE IF NOT EXISTS file_info (
-        id      INTEGER PRIMARY KEY,
-        path    TEXT NOT NULL,
-        hash    INTEGER
-    );
-
-    CREATE TABLE IF NOT EXISTS provenance_info (
-        key     TEXT PRIMARY KEY,
-        value   TEXT
-    );
-
-    CREATE TABLE IF NOT EXISTS provenance_sources (
-        source_idx      INTEGER PRIMARY KEY,
-        file_info_id    INTEGER NOT NULL DEFAULT 0,
-        path            TEXT NOT NULL,
-        num_checkpoints INTEGER,
-        event_hash      TEXT NOT NULL DEFAULT ''
-    );
-
-    CREATE TABLE IF NOT EXISTS provenance_group (
-        id          INTEGER PRIMARY KEY,
-        name        TEXT,
-        predicate   TEXT
-    );
-
-    CREATE TABLE IF NOT EXISTS provenance_segments (
-        source_idx          INTEGER,
-        source_checkpoint   INTEGER,
-        output_line_start   INTEGER,
-        output_line_end     INTEGER,
-        event_count         INTEGER
-    );
-)";
-
-ProvenanceDatabase::ProvenanceDatabase(const std::string& pidx_path)
-    : db_(pidx_path) {}
-
-void ProvenanceDatabase::init_schema() {
-    char* err_msg = nullptr;
-    int rc =
-        sqlite3_exec(db_.get(), PROVENANCE_SCHEMA, nullptr, nullptr, &err_msg);
-    if (rc != SQLITE_OK) {
-        std::string error =
-            err_msg ? std::string(err_msg) : "Unknown schema error";
-        if (err_msg) sqlite3_free(err_msg);
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to initialize provenance schema: " + error);
+namespace {
+
+[[noreturn]] void throw_db_error(std::string_view message,
+                                 const ::rocksdb::Status& status) {
+    throw IndexerError(IndexerError::Type::DATABASE_ERROR,
+                       std::string(message) + ": " + status.ToString());
+}
+
+std::string file_key(std::string_view path) {
+    return std::string("pf|") + std::string(path);
+}
+
+std::string file_reverse_key(int file_info_id) {
+    std::string key("pr|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_info_id));
+    return key;
+}
+
+std::string next_file_id_key() { return "_next_prov_file_id"; }
+
+std::string encode_file_record(int file_info_id, std::uint64_t file_hash) {
+    std::string value;
+    rocks::KeyCodec::append_be32(value,
+                                 static_cast<std::uint32_t>(file_info_id));
+    rocks::KeyCodec::append_be64(value, file_hash);
+    return value;
+}
+
+int decode_file_id(std::string_view value) {
+    if (value.size() < 4) {
+        throw std::runtime_error("Corrupt provenance file record");
+    }
+    return static_cast<int>(rocks::KeyCodec::decode_be32(value.substr(0, 4)));
+}
+
+std::uint64_t decode_hash(std::string_view value) {
+    if (value.size() < 12) {
+        throw std::runtime_error("Corrupt provenance file record");
+    }
+    return rocks::KeyCodec::decode_be64(value.substr(4, 8));
+}
+
+std::string source_key(int file_info_id, int source_idx) {
+    std::string key("ps|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_info_id));
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(source_idx));
+    return key;
+}
+
+std::string info_key(int file_info_id, std::string_view key_suffix) {
+    std::string key("pi|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_info_id));
+    key.append(key_suffix);
+    return key;
+}
+
+std::string group_prefix(int file_info_id) {
+    std::string key("pg|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_info_id));
+    return key;
+}
+
+std::string group_key(int file_info_id, std::string_view name) {
+    auto key = group_prefix(file_info_id);
+    key.append(name);
+    return key;
+}
+
+std::string segment_key(int file_info_id, int source_idx,
+                        int source_checkpoint) {
+    std::string key("px|");
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(file_info_id));
+    rocks::KeyCodec::append_be32(key, static_cast<std::uint32_t>(source_idx));
+    rocks::KeyCodec::append_be32(key,
+                                 static_cast<std::uint32_t>(source_checkpoint));
+    return key;
+}
+
+void append_string(std::string& out, std::string_view value) {
+    rocks::KeyCodec::append_be32(out, static_cast<std::uint32_t>(value.size()));
+    out.append(value.data(), value.size());
+}
+
+void append_u32(std::string& out, std::uint32_t value) {
+    rocks::KeyCodec::append_be32(out, value);
+}
+
+class Cursor {
+   public:
+    explicit Cursor(std::string_view data) : data_(data) {}
+
+    std::uint32_t u32() {
+        auto part = take(4);
+        return rocks::KeyCodec::decode_be32(part);
+    }
+
+    std::string str() {
+        const auto len = static_cast<std::size_t>(u32());
+        auto bytes = take(len);
+        return std::string(bytes.data(), bytes.size());
+    }
+
+   private:
+    std::string_view take(std::size_t len) {
+        if (offset_ + len > data_.size()) {
+            throw std::runtime_error("Corrupt provenance payload");
+        }
+        auto part = data_.substr(offset_, len);
+        offset_ += len;
+        return part;
     }
+
+    std::string_view data_;
+    std::size_t offset_ = 0;
+};
+
+template <typename Fn>
+void scan_prefix(const rocks::RocksDatabase& db, std::string_view prefix,
+                 Fn&& fn) {
+    internal::scan_prefix_iterator(
+        "Failed to scan provenance prefix", prefix,
+        [&] { return db.new_iterator("provenance"); }, std::forward<Fn>(fn));
 }
 
+}  // namespace
+
+ProvenanceDatabase::ProvenanceDatabase(const std::string& provenance_path,
+                                       rocks::RocksDatabase::OpenMode open_mode)
+    : db_path_(internal::normalize_index_root(provenance_path)),
+      open_mode_(open_mode),
+      db_(rocks::RocksDBManager::instance().get_or_open(db_path_, open_mode_)) {
+    if (open_mode_ == rocks::RocksDatabase::OpenMode::ReadWrite) {
+        init_schema();
+    }
+}
+
+void ProvenanceDatabase::init_schema() {}
+
 int ProvenanceDatabase::get_or_create_file_info(const std::string& path,
                                                 std::uint64_t file_hash) {
-    {
-        SqliteStmt stmt(db_, "SELECT id, hash FROM file_info WHERE path = ?;");
-        stmt.bind_text(1, path);
-        int rc = sqlite3_step(stmt);
-        if (rc == SQLITE_ROW) {
-            int id = sqlite3_column_int(stmt, 0);
-            auto stored_hash =
-                static_cast<std::uint64_t>(sqlite3_column_int64(stmt, 1));
-            if (stored_hash == file_hash) {
-                return id;
-            }
-            SqliteStmt del(db_, "DELETE FROM file_info WHERE id = ?;");
-            del.bind_int(1, id);
-            sqlite3_step(del);
+    const auto key = file_key(path);
+    std::string value;
+    auto status = db_->get(key, &value, "provenance");
+    if (status.ok()) {
+        const auto id = decode_file_id(value);
+        if (decode_hash(value) == file_hash) {
+            return id;
         }
+        const auto encoded = encode_file_record(id, file_hash);
+        status = txn_batch_ ? db_->put(*txn_batch_, "provenance", key, encoded)
+                            : db_->put(key, encoded, "provenance");
+        if (!status.ok()) {
+            throw_db_error("Failed to update provenance file info", status);
+        }
+        status = txn_batch_
+                     ? db_->put(*txn_batch_, "provenance", file_reverse_key(id),
+                                path)
+                     : db_->put(file_reverse_key(id), path, "provenance");
+        if (!status.ok()) {
+            throw_db_error("Failed to update provenance reverse file info",
+                           status);
+        }
+        return id;
+    }
+    if (!status.IsNotFound()) {
+        throw_db_error("Failed to query provenance file info", status);
+    }
+
+    std::uint32_t next_id = 1;
+    std::string next_value;
+    status = db_->get(next_file_id_key(), &next_value, "provenance");
+    if (status.ok()) {
+        next_id = rocks::KeyCodec::decode_be32(next_value);
+    } else if (!status.IsNotFound()) {
+        throw_db_error("Failed to read next provenance file id", status);
     }
 
-    SqliteStmt stmt(db_, "INSERT INTO file_info(path, hash) VALUES(?, ?);");
-    stmt.bind_text(1, path);
-    stmt.bind_int64(2, static_cast<std::int64_t>(file_hash));
-    int rc = sqlite3_step(stmt);
-    if (rc != SQLITE_DONE) {
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to insert file_info: " +
-                               std::string(sqlite3_errmsg(db_.get())));
+    const auto encoded =
+        encode_file_record(static_cast<int>(next_id), file_hash);
+    const auto next_encoded = rocks::KeyCodec::encode_be32(next_id + 1);
+    if (txn_batch_) {
+        status = db_->put(*txn_batch_, "provenance", key, encoded);
+        if (!status.ok()) throw_db_error("Failed to insert file info", status);
+        status = db_->put(*txn_batch_, "provenance", file_reverse_key(next_id),
+                          path);
+        if (!status.ok()) {
+            throw_db_error("Failed to insert reverse file info", status);
+        }
+        status = db_->put(*txn_batch_, "provenance", next_file_id_key(),
+                          next_encoded);
+        if (!status.ok()) {
+            throw_db_error("Failed to update next provenance file id", status);
+        }
+    } else {
+        status = db_->put(key, encoded, "provenance");
+        if (!status.ok()) throw_db_error("Failed to insert file info", status);
+        status = db_->put(file_reverse_key(next_id), path, "provenance");
+        if (!status.ok()) {
+            throw_db_error("Failed to insert reverse file info", status);
+        }
+        status = db_->put(next_file_id_key(), next_encoded, "provenance");
+        if (!status.ok()) {
+            throw_db_error("Failed to update next provenance file id", status);
+        }
     }
-    return static_cast<int>(sqlite3_last_insert_rowid(db_.get()));
+    return static_cast<int>(next_id);
 }
 
 int ProvenanceDatabase::get_file_info_id(const std::string& path) const {
-    SqliteStmt stmt(db_, "SELECT id FROM file_info WHERE path = ?;");
-    stmt.bind_text(1, path);
-    int rc = sqlite3_step(stmt);
-    if (rc == SQLITE_ROW) {
-        return sqlite3_column_int(stmt, 0);
+    std::string value;
+    auto status = db_->get(file_key(path), &value, "provenance");
+    if (status.IsNotFound()) {
+        return -1;
     }
-    return -1;
+    if (!status.ok()) {
+        throw_db_error("Failed to read provenance file info id", status);
+    }
+    return decode_file_id(value);
 }
 
 void ProvenanceDatabase::begin_transaction() {
-    char* err_msg = nullptr;
-    int rc = sqlite3_exec(db_.get(), "BEGIN TRANSACTION;", nullptr, nullptr,
-                          &err_msg);
-    if (rc != SQLITE_OK) {
-        std::string error = err_msg ? std::string(err_msg) : "Unknown error";
-        if (err_msg) sqlite3_free(err_msg);
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to begin transaction: " + error);
-    }
+    txn_batch_ =
+        std::make_unique<rocks::RocksDatabase::Batch>(db_->begin_batch());
 }
 
 void ProvenanceDatabase::commit_transaction() {
-    char* err_msg = nullptr;
-    int rc = sqlite3_exec(db_.get(), "COMMIT;", nullptr, nullptr, &err_msg);
-    if (rc != SQLITE_OK) {
-        std::string error = err_msg ? std::string(err_msg) : "Unknown error";
-        if (err_msg) sqlite3_free(err_msg);
-        throw IndexerError(IndexerError::Type::DATABASE_ERROR,
-                           "Failed to commit transaction: " + error);
+    if (!txn_batch_) {
+        return;
+    }
+    auto status = db_->commit_batch(*txn_batch_);
+    txn_batch_.reset();
+    if (!status.ok()) {
+        throw_db_error("Failed to commit provenance RocksDB batch", status);
     }
 }
 
+void ProvenanceDatabase::rollback_transaction() noexcept { txn_batch_.reset(); }
+
 std::string determine_provenance_index_path(const std::string& data_path,
                                             const std::string& index_dir) {
-    fs::path p(data_path);
-    std::string filename = p.filename().string() + ".pidx";
-
-    if (!index_dir.empty()) {
-        return (fs::path(index_dir) / filename).string();
-    }
-
-    return (data_path + ".pidx");
+    fs::path path = index_dir.empty() ? fs::path(data_path).parent_path()
+                                      : fs::path(index_dir);
+    return internal::normalize_index_root((path / ".dftindex").string());
 }
 
-// ---------------------------------------------------------------------------
-// Provenance insert operations
-// ---------------------------------------------------------------------------
-
-void ProvenanceDatabase::insert_info(std::string_view key,
+void ProvenanceDatabase::insert_info(int file_info_id, std::string_view key,
                                      std::string_view value) {
-    queries::insert_provenance_info(db_, key, value);
+    const auto db_key = info_key(file_info_id, key);
+    auto status = txn_batch_
+                      ? db_->put(*txn_batch_, "provenance", db_key, value)
+                      : db_->put(db_key, value, "provenance");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert provenance info", status);
+    }
 }
 
 void ProvenanceDatabase::insert_source(int file_info_id, int source_idx,
                                        std::string_view path,
                                        int num_checkpoints,
                                        std::string_view event_hash) {
-    queries::insert_provenance_source(db_, file_info_id, source_idx, path,
-                                      num_checkpoints, event_hash);
+    std::string value;
+    append_string(value, path);
+    append_u32(value, static_cast<std::uint32_t>(num_checkpoints));
+    append_string(value, event_hash);
+    auto status = txn_batch_
+                      ? db_->put(*txn_batch_, "provenance",
+                                 source_key(file_info_id, source_idx), value)
+                      : db_->put(source_key(file_info_id, source_idx), value,
+                                 "provenance");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert provenance source", status);
+    }
 }
 
-void ProvenanceDatabase::insert_group(std::string_view name,
+void ProvenanceDatabase::insert_group(int file_info_id, std::string_view name,
                                       std::string_view predicate) {
-    queries::insert_provenance_group(db_, name, predicate);
+    const auto db_key = group_key(file_info_id, name);
+    auto status = txn_batch_
+                      ? db_->put(*txn_batch_, "provenance", db_key,
+                                 std::string(predicate))
+                      : db_->put(db_key, std::string(predicate), "provenance");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert provenance group", status);
+    }
 }
 
-void ProvenanceDatabase::insert_segment(int source_idx, int source_checkpoint,
+void ProvenanceDatabase::insert_segment(int file_info_id, int source_idx,
+                                        int source_checkpoint,
                                         int output_line_start,
                                         int output_line_end, int event_count) {
-    queries::insert_provenance_segment(db_, source_idx, source_checkpoint,
-                                       output_line_start, output_line_end,
-                                       event_count);
+    std::string value;
+    append_u32(value, static_cast<std::uint32_t>(output_line_start));
+    append_u32(value, static_cast<std::uint32_t>(output_line_end));
+    append_u32(value, static_cast<std::uint32_t>(event_count));
+    auto status =
+        txn_batch_
+            ? db_->put(*txn_batch_, "provenance",
+                       segment_key(file_info_id, source_idx, source_checkpoint),
+                       value)
+            : db_->put(segment_key(file_info_id, source_idx, source_checkpoint),
+                       value, "provenance");
+    if (!status.ok()) {
+        throw_db_error("Failed to insert provenance segment", status);
+    }
 }
 
-// ---------------------------------------------------------------------------
-// Provenance query operations
-// ---------------------------------------------------------------------------
-
 std::vector<ProvenanceDatabase::ProvenanceSource>
 ProvenanceDatabase::query_sources(int file_info_id) const {
-    return queries::query_provenance_sources(db_, file_info_id);
+    std::vector<ProvenanceSource> results;
+    std::string prefix("ps|");
+    rocks::KeyCodec::append_be32(prefix,
+                                 static_cast<std::uint32_t>(file_info_id));
+    scan_prefix(*db_, prefix, [&](::rocksdb::Iterator& it) {
+        const auto key = std::string(it.key().data(), it.key().size());
+        const auto value = std::string(it.value().data(), it.value().size());
+        ProvenanceSource source;
+        source.source_idx = static_cast<int>(
+            rocks::KeyCodec::decode_be32(std::string_view(key).substr(7, 4)));
+        Cursor cursor(value);
+        source.path = cursor.str();
+        source.num_checkpoints = static_cast<int>(cursor.u32());
+        source.event_hash = cursor.str();
+        results.push_back(std::move(source));
+    });
+    return results;
 }
 
 std::vector<ProvenanceDatabase::ProvenanceSegment>
-ProvenanceDatabase::query_segments(int source_idx) const {
-    return queries::query_provenance_segments(db_, source_idx);
+ProvenanceDatabase::query_segments(int file_info_id, int source_idx) const {
+    std::vector<ProvenanceSegment> results;
+    std::string prefix("px|");
+    rocks::KeyCodec::append_be32(prefix,
+                                 static_cast<std::uint32_t>(file_info_id));
+    rocks::KeyCodec::append_be32(prefix,
+                                 static_cast<std::uint32_t>(source_idx));
+    scan_prefix(*db_, prefix, [&](::rocksdb::Iterator& it) {
+        const auto key = std::string(it.key().data(), it.key().size());
+        const auto value = std::string(it.value().data(), it.value().size());
+        Cursor cursor(value);
+        ProvenanceSegment segment;
+        segment.source_idx = source_idx;
+        segment.source_checkpoint = static_cast<int>(
+            rocks::KeyCodec::decode_be32(std::string_view(key).substr(11, 4)));
+        segment.output_line_start = static_cast<int>(cursor.u32());
+        segment.output_line_end = static_cast<int>(cursor.u32());
+        segment.event_count = static_cast<int>(cursor.u32());
+        results.push_back(std::move(segment));
+    });
+    return results;
 }
 
 std::vector<ProvenanceDatabase::ProvenanceSegment>
-ProvenanceDatabase::query_all_segments() const {
-    return queries::query_all_provenance_segments(db_);
+ProvenanceDatabase::query_all_segments(int file_info_id) const {
+    std::vector<ProvenanceSegment> results;
+    std::string prefix("px|");
+    rocks::KeyCodec::append_be32(prefix,
+                                 static_cast<std::uint32_t>(file_info_id));
+    scan_prefix(*db_, prefix, [&](::rocksdb::Iterator& it) {
+        const auto key = std::string(it.key().data(), it.key().size());
+        const auto value = std::string(it.value().data(), it.value().size());
+        Cursor cursor(value);
+        ProvenanceSegment segment;
+        segment.source_idx = static_cast<int>(
+            rocks::KeyCodec::decode_be32(std::string_view(key).substr(7, 4)));
+        segment.source_checkpoint = static_cast<int>(
+            rocks::KeyCodec::decode_be32(std::string_view(key).substr(11, 4)));
+        segment.output_line_start = static_cast<int>(cursor.u32());
+        segment.output_line_end = static_cast<int>(cursor.u32());
+        segment.event_count = static_cast<int>(cursor.u32());
+        results.push_back(std::move(segment));
+    });
+    return results;
 }
 
-std::string ProvenanceDatabase::query_info(std::string_view key) const {
-    return queries::query_provenance_info(db_, key);
+std::string ProvenanceDatabase::query_info(int file_info_id,
+                                           std::string_view key) const {
+    std::string value;
+    auto status = db_->get(info_key(file_info_id, key), &value, "provenance");
+    if (status.IsNotFound()) {
+        return {};
+    }
+    if (!status.ok()) {
+        throw_db_error("Failed to query provenance info", status);
+    }
+    return value;
 }
 
-std::string ProvenanceDatabase::query_group_name() const {
-    return queries::query_provenance_group_name(db_);
+std::string ProvenanceDatabase::query_group_name(int file_info_id) const {
+    std::string result;
+    const auto prefix = group_prefix(file_info_id);
+    scan_prefix(*db_, prefix, [&](::rocksdb::Iterator& it) {
+        if (result.empty()) {
+            const auto key = std::string(it.key().data(), it.key().size());
+            result = key.substr(prefix.size());
+        }
+    });
+    return result;
 }
 
-std::string ProvenanceDatabase::query_group_predicate() const {
-    return queries::query_provenance_group_predicate(db_);
+std::string ProvenanceDatabase::query_group_predicate(int file_info_id) const {
+    std::string result;
+    scan_prefix(*db_, group_prefix(file_info_id), [&](::rocksdb::Iterator& it) {
+        if (result.empty()) {
+            result = std::string(it.value().data(), it.value().size());
+        }
+    });
+    return result;
 }
 
 }  // namespace dftracer::utils::utilities::indexer
diff --git a/src/dftracer/utils/utilities/indexer/visitors/bloom_visitor.cpp b/src/dftracer/utils/utilities/indexer/visitors/bloom_visitor.cpp
index e01d511e..931d8188 100644
--- a/src/dftracer/utils/utilities/indexer/visitors/bloom_visitor.cpp
+++ b/src/dftracer/utils/utilities/indexer/visitors/bloom_visitor.cpp
@@ -2,7 +2,6 @@
 #include <dftracer/utils/utilities/composites/dft/event.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/visitors/bloom_visitor.h>
 #include <yyjson.h>
@@ -14,9 +13,6 @@
 using dftracer::utils::utilities::common::json::JsonValue;
 using dftracer::utils::utilities::composites::dft::DFTracerEvent;
 using dftracer::utils::utilities::composites::dft::indexing::BloomFilter;
-namespace queries =
-    dftracer::utils::utilities::composites::dft::indexing::queries;
-
 namespace dftracer::utils::utilities::indexer {
 
 namespace {
@@ -187,18 +183,12 @@ void BloomVisitor::on_line(std::string_view line, std::size_t checkpoint_idx) {
 }
 
 void BloomVisitor::finalize(IndexDatabase& db, int file_id) {
-    auto& sql_db = db.sql_db();
-
     std::unordered_map<std::string, BloomFilter> file_blooms;
     for (const auto& dim : dimensions_) {
         file_blooms.emplace(dim, BloomFilter(config_.expected_entries_per_chunk,
                                              config_.false_positive_rate));
     }
 
-    auto bloom_stmt = queries::prepare_insert_chunk_bloom_filter(sql_db);
-    auto dim_stats_stmt = queries::prepare_insert_chunk_dimension_stats(sql_db);
-    auto hash_stmt = queries::prepare_insert_hash_resolution(sql_db);
-
     std::vector<unsigned char> blob;
 
     for (std::size_t i = 0; i < chunks_.size(); ++i) {
@@ -211,27 +201,24 @@ void BloomVisitor::finalize(IndexDatabase& db, int file_id) {
 
             const BloomFilter& bf = it->second;
             bf.serialize_into(blob);
-            queries::insert_chunk_bloom_filter(
-                bloom_stmt, file_id, checkpoint_idx, dim, blob.data(),
-                static_cast<int>(blob.size()),
+            db.insert_chunk_bloom_filter(
+                file_id, checkpoint_idx, dim,
+                std::span<const unsigned char>(blob.data(), blob.size()),
                 static_cast<std::uint64_t>(bf.num_entries()));
 
             file_blooms.at(dim).merge_from(bf);
         }
 
-        queries::insert_chunk_statistics(sql_db, file_id, checkpoint_idx,
-                                         chunk.statistics);
+        db.insert_chunk_statistics(file_id, checkpoint_idx, chunk.statistics);
 
         for (const auto& [dim, ds] : chunk.dimension_stats) {
-            queries::insert_chunk_dimension_stats(dim_stats_stmt, file_id,
-                                                  checkpoint_idx, ds,
-                                                  config_.value_counts_cap);
+            db.insert_chunk_dimension_stats(file_id, checkpoint_idx, ds,
+                                            config_.value_counts_cap);
         }
 
         for (const auto& [dim, resolutions] : chunk.hash_resolutions) {
             for (const auto& [hash_val, resolved] : resolutions) {
-                queries::insert_hash_resolution(hash_stmt, file_id, dim,
-                                                hash_val, resolved);
+                db.insert_hash_resolution(file_id, dim, hash_val, resolved);
             }
         }
     }
@@ -239,13 +226,14 @@ void BloomVisitor::finalize(IndexDatabase& db, int file_id) {
     for (const auto& dim : dimensions_) {
         const BloomFilter& bf = file_blooms.at(dim);
         bf.serialize_into(blob);
-        queries::insert_file_bloom_filter(
-            sql_db, file_id, dim, blob.data(), static_cast<int>(blob.size()),
+        db.insert_file_bloom_filter(
+            file_id, dim,
+            std::span<const unsigned char>(blob.data(), blob.size()),
             static_cast<std::uint64_t>(bf.num_entries()));
     }
 
     for (const auto& dim : dimensions_) {
-        queries::insert_index_dimension(sql_db, file_id, dim);
+        db.insert_index_dimension(file_id, dim);
     }
 }
 
diff --git a/src/dftracer/utils/utilities/indexer/visitors/manifest_visitor.cpp b/src/dftracer/utils/utilities/indexer/visitors/manifest_visitor.cpp
index 137c3651..ec388b51 100644
--- a/src/dftracer/utils/utilities/indexer/visitors/manifest_visitor.cpp
+++ b/src/dftracer/utils/utilities/indexer/visitors/manifest_visitor.cpp
@@ -1,4 +1,3 @@
-#include <dftracer/utils/core/sqlite/statement.h>
 #include <dftracer/utils/utilities/common/json/json_value.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/queries/manifest_queries.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
@@ -58,38 +57,15 @@ void ManifestVisitor::on_line(std::string_view line,
 }
 
 void ManifestVisitor::finalize(IndexDatabase& db, int file_id) {
-    using dftracer::utils::sqlite::SqliteStmt;
-    auto* raw = db.db();
-
     for (std::size_t ci = 0; ci < event_lines_.size(); ++ci) {
         for (auto& [key, lines] : event_lines_[ci]) {
-            auto packed = queries::pack_line_numbers(lines);
-            SqliteStmt stmt(raw,
-                            "INSERT INTO checkpoint_event_ranges"
-                            "(checkpoint_idx,file_info_id,cat,name,"
-                            "line_numbers,event_count)"
-                            " VALUES(?,?,?,?,?,?);");
-            stmt.bind_int64(1, static_cast<int64_t>(ci));
-            stmt.bind_int(2, file_id);
-            stmt.bind_text(3, key.first);
-            stmt.bind_text(4, key.second);
-            stmt.bind_blob(5, packed.data(), static_cast<int>(packed.size()));
-            stmt.bind_int64(6, static_cast<int64_t>(lines.size()));
-            sqlite3_step(stmt.get());
+            db.insert_event_range(file_id, static_cast<std::uint64_t>(ci),
+                                  key.first, key.second, lines);
         }
 
         for (auto& [meta_type, lines] : metadata_lines_[ci]) {
-            auto packed = queries::pack_line_numbers(lines);
-            SqliteStmt stmt(
-                raw,
-                "INSERT INTO checkpoint_metadata_lines"
-                "(checkpoint_idx,file_info_id,meta_type,line_numbers)"
-                " VALUES(?,?,?,?);");
-            stmt.bind_int64(1, static_cast<int64_t>(ci));
-            stmt.bind_int(2, file_id);
-            stmt.bind_text(3, meta_type);
-            stmt.bind_blob(4, packed.data(), static_cast<int>(packed.size()));
-            sqlite3_step(stmt.get());
+            db.insert_metadata_lines(file_id, static_cast<std::uint64_t>(ci),
+                                     meta_type, lines);
         }
     }
 }
diff --git a/src/dftracer/utils/utilities/reader/internal/gzip_reader.cpp b/src/dftracer/utils/utilities/reader/internal/gzip_reader.cpp
index 9dd1f91f..86449551 100644
--- a/src/dftracer/utils/utilities/reader/internal/gzip_reader.cpp
+++ b/src/dftracer/utils/utilities/reader/internal/gzip_reader.cpp
@@ -62,18 +62,18 @@ GzipReader::GzipReader(const std::string &gz_path_,
                        const std::string &idx_path_,
                        std::size_t index_ckpt_size)
     : gz_path(gz_path_),
-      idx_path(idx_path_),
+      index_path(idx_path_),
       is_open(false),
       default_buffer_size(DEFAULT_READER_BUFFER_SIZE),
       indexer(nullptr) {
     try {
         indexer = dftracer::utils::utilities::indexer::internal::
-            IndexerFactory::create(gz_path, idx_path, index_ckpt_size, false);
+            IndexerFactory::create(gz_path, index_path, index_ckpt_size, false);
         is_open = true;
 
         DFTRACER_UTILS_LOG_DEBUG(
             "Successfully created GZIP reader for gz: %s and index: %s",
-            gz_path.c_str(), idx_path.c_str());
+            gz_path.c_str(), index_path.c_str());
     } catch (const std::exception &e) {
         throw ReaderError(ReaderError::INITIALIZATION_ERROR,
                           "Failed to initialize reader with indexer: " +
@@ -92,19 +92,19 @@ GzipReader::GzipReader(
     }
     is_open = true;
     gz_path = indexer->get_archive_path();
-    idx_path = indexer->get_idx_path();
+    index_path = indexer->get_index_path();
 }
 
 GzipReader::~GzipReader() {
     DFTRACER_UTILS_LOG_DEBUG("Destroying GZIP reader for gz: %s and index: %s",
-                             gz_path.c_str(), idx_path.c_str());
+                             gz_path.c_str(), index_path.c_str());
     reset();
     is_open = false;
 }
 
 GzipReader::GzipReader(GzipReader &&other) noexcept
     : gz_path(std::move(other.gz_path)),
-      idx_path(std::move(other.idx_path)),
+      index_path(std::move(other.index_path)),
       is_open(other.is_open),
       default_buffer_size(other.default_buffer_size),
       indexer(std::move(other.indexer)) {
@@ -114,7 +114,7 @@ GzipReader::GzipReader(GzipReader &&other) noexcept
 GzipReader &GzipReader::operator=(GzipReader &&other) noexcept {
     if (this != &other) {
         gz_path = std::move(other.gz_path);
-        idx_path = std::move(other.idx_path);
+        index_path = std::move(other.index_path);
         is_open = other.is_open;
         default_buffer_size = other.default_buffer_size;
         indexer = std::move(other.indexer);
@@ -140,7 +140,7 @@ std::size_t GzipReader::get_num_lines() const {
 
 const std::string &GzipReader::get_archive_path() const { return gz_path; }
 
-const std::string &GzipReader::get_idx_path() const { return idx_path; }
+const std::string &GzipReader::get_index_path() const { return index_path; }
 
 void GzipReader::set_buffer_size(std::size_t size) {
     default_buffer_size = size;
diff --git a/src/dftracer/utils/utilities/reader/internal/gzip_reader.h b/src/dftracer/utils/utilities/reader/internal/gzip_reader.h
index 97abcc1a..e2be5acb 100644
--- a/src/dftracer/utils/utilities/reader/internal/gzip_reader.h
+++ b/src/dftracer/utils/utilities/reader/internal/gzip_reader.h
@@ -14,7 +14,7 @@
 namespace dftracer::utils::utilities::reader::internal {
 class GzipReader : public Reader {
    public:
-    GzipReader(const std::string &gz_path, const std::string &idx_path,
+    GzipReader(const std::string &gz_path, const std::string &index_path,
                std::size_t index_ckpt_size = dftracer::utils::utilities::
                    indexer::internal::Indexer::DEFAULT_CHECKPOINT_SIZE);
     explicit GzipReader(
@@ -32,7 +32,7 @@ class GzipReader : public Reader {
     std::size_t get_max_bytes() const override;
     std::size_t get_num_lines() const override;
     const std::string &get_archive_path() const override;
-    const std::string &get_idx_path() const override;
+    const std::string &get_index_path() const override;
     void set_buffer_size(std::size_t size) override;
 
     coro::CoroTask<std::size_t> read_async(std::size_t start_bytes,
@@ -57,7 +57,7 @@ class GzipReader : public Reader {
 
    private:
     std::string gz_path;
-    std::string idx_path;
+    std::string index_path;
     bool is_open;
     std::size_t default_buffer_size;
     std::shared_ptr<dftracer::utils::utilities::indexer::internal::Indexer>
diff --git a/src/dftracer/utils/utilities/reader/internal/reader_c.cpp b/src/dftracer/utils/utilities/reader/internal/reader_c.cpp
index 4cd25439..74256808 100644
--- a/src/dftracer/utils/utilities/reader/internal/reader_c.cpp
+++ b/src/dftracer/utils/utilities/reader/internal/reader_c.cpp
@@ -19,16 +19,18 @@ static int validate_handle(dft_reader_handle_t reader) {
     return reader ? 0 : -1;
 }
 
-dft_reader_handle_t dft_reader_create(const char *gz_path, const char *idx_path,
+dft_reader_handle_t dft_reader_create(const char *gz_path,
+                                      const char *index_path,
                                       size_t index_ckpt_size) {
-    if (!gz_path || !idx_path) {
+    if (!gz_path || !index_path) {
         DFTRACER_UTILS_LOG_ERROR("%s",
-                                 "Both gz_path and idx_path cannot be null");
+                                 "Both gz_path and index_path cannot be null");
         return nullptr;
     }
 
     try {
-        auto reader = ReaderFactory::create(gz_path, idx_path, index_ckpt_size);
+        auto reader =
+            ReaderFactory::create(gz_path, index_path, index_ckpt_size);
         // For C API, we need to transfer ownership - create a new shared_ptr on
         // heap
         return static_cast<dft_reader_handle_t>(
diff --git a/src/dftracer/utils/utilities/reader/internal/reader_factory.cpp b/src/dftracer/utils/utilities/reader/internal/reader_factory.cpp
index 5280f45a..577d6b99 100644
--- a/src/dftracer/utils/utilities/reader/internal/reader_factory.cpp
+++ b/src/dftracer/utils/utilities/reader/internal/reader_factory.cpp
@@ -11,7 +11,7 @@
 namespace dftracer::utils::utilities::reader::internal {
 
 std::shared_ptr<Reader> ReaderFactory::create(const std::string &archive_path,
-                                              const std::string &idx_path,
+                                              const std::string &index_path,
                                               std::size_t index_ckpt_size) {
     ArchiveFormat format = FormatDetector::detect(archive_path);
 
@@ -21,11 +21,11 @@ std::shared_ptr<Reader> ReaderFactory::create(const std::string &archive_path,
 
     switch (format) {
         case ArchiveFormat::GZIP:
-            return std::make_shared<GzipReader>(archive_path, idx_path,
+            return std::make_shared<GzipReader>(archive_path, index_path,
                                                 index_ckpt_size);
 
         case ArchiveFormat::TAR_GZ:
-            return std::make_shared<TarReader>(archive_path, idx_path,
+            return std::make_shared<TarReader>(archive_path, index_path,
                                                index_ckpt_size);
 
         default:
diff --git a/src/dftracer/utils/utilities/reader/internal/tar_reader.cpp b/src/dftracer/utils/utilities/reader/internal/tar_reader.cpp
index 8d936b3e..4a45e541 100644
--- a/src/dftracer/utils/utilities/reader/internal/tar_reader.cpp
+++ b/src/dftracer/utils/utilities/reader/internal/tar_reader.cpp
@@ -1,7 +1,7 @@
+#include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
 #include <dftracer/utils/core/coro/task.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
-#include <dftracer/utils/utilities/indexer/internal/tar/queries/queries.h>
 #include <dftracer/utils/utilities/reader/internal/streams/tar_byte_stream.h>
 #include <dftracer/utils/utilities/reader/internal/string_line_processor.h>
 #include <dftracer/utils/utilities/reader/internal/tar_reader.h>
@@ -15,10 +15,28 @@ using namespace dftracer::utils::utilities::indexer::internal::tar;
 
 namespace dftracer::utils::utilities::reader::internal {
 
+namespace {
+
+std::string normalize_idx_path(const std::string &path) {
+    fs::path input(path);
+    if (input.filename() == ".dftindex") {
+        return input.string();
+    }
+    if (input.parent_path().filename() == ".dftindex") {
+        return input.parent_path().string();
+    }
+    if (input.has_extension()) {
+        return (input.parent_path() / ".dftindex").string();
+    }
+    return (input / ".dftindex").string();
+}
+
+}  // namespace
+
 TarReader::TarReader(const std::string &tar_gz_path_,
                      const std::string &idx_path_, std::size_t index_ckpt_size)
     : tar_gz_path(tar_gz_path_),
-      idx_path(idx_path_),
+      index_path(normalize_idx_path(idx_path_)),
       is_open(false),
       default_buffer_size(DEFAULT_TAR_READER_BUFFER_SIZE),
       logical_mapping_cached(false),
@@ -26,14 +44,14 @@ TarReader::TarReader(const std::string &tar_gz_path_,
       cached_total_logical_lines(0) {
     try {
         printf("Creating TAR reader for gz: %s and index: %s\n",
-               tar_gz_path.c_str(), idx_path.c_str());
-        indexer = std::make_shared<TarIndexer>(tar_gz_path, idx_path,
+               tar_gz_path.c_str(), index_path.c_str());
+        indexer = std::make_shared<TarIndexer>(tar_gz_path, index_path,
                                                index_ckpt_size, false);
         is_open = true;
 
         DFTRACER_UTILS_LOG_DEBUG(
             "Successfully created TAR reader for gz: %s and index: %s",
-            tar_gz_path.c_str(), idx_path.c_str());
+            tar_gz_path.c_str(), index_path.c_str());
     } catch (const std::exception &e) {
         throw std::runtime_error(
             "Failed to initialize TAR reader with indexer: " +
@@ -52,14 +70,14 @@ TarReader::TarReader(std::shared_ptr<TarIndexer> indexer_)
     }
     is_open = true;
     tar_gz_path = indexer->get_tar_gz_path();
-    idx_path = indexer->get_idx_path();
+    index_path = indexer->get_index_path();
 }
 
 TarReader::~TarReader() = default;
 
 TarReader::TarReader(TarReader &&other) noexcept
     : tar_gz_path(std::move(other.tar_gz_path)),
-      idx_path(std::move(other.idx_path)),
+      index_path(std::move(other.index_path)),
       is_open(other.is_open),
       default_buffer_size(other.default_buffer_size),
       indexer(std::move(other.indexer)),
@@ -74,7 +92,7 @@ TarReader::TarReader(TarReader &&other) noexcept
 TarReader &TarReader::operator=(TarReader &&other) noexcept {
     if (this != &other) {
         tar_gz_path = std::move(other.tar_gz_path);
-        idx_path = std::move(other.idx_path);
+        index_path = std::move(other.index_path);
         is_open = other.is_open;
         default_buffer_size = other.default_buffer_size;
         indexer = std::move(other.indexer);
@@ -104,7 +122,7 @@ std::string TarReader::get_format_name() const { return "TAR.GZ"; }
 
 const std::string &TarReader::get_archive_path() const { return tar_gz_path; }
 
-const std::string &TarReader::get_idx_path() const { return idx_path; }
+const std::string &TarReader::get_index_path() const { return index_path; }
 
 void TarReader::set_buffer_size(std::size_t size) {
     default_buffer_size = size;
diff --git a/src/dftracer/utils/utilities/reader/internal/tar_reader.h b/src/dftracer/utils/utilities/reader/internal/tar_reader.h
index 96564357..d3608a3c 100644
--- a/src/dftracer/utils/utilities/reader/internal/tar_reader.h
+++ b/src/dftracer/utils/utilities/reader/internal/tar_reader.h
@@ -43,7 +43,7 @@ class TarReader : public Reader {
             estimated_lines;   // Estimated number of lines in this file
     };
 
-    TarReader(const std::string &tar_gz_path, const std::string &idx_path,
+    TarReader(const std::string &tar_gz_path, const std::string &index_path,
               std::size_t index_ckpt_size = dftracer::utils::utilities::
                   indexer::internal::tar::TarIndexer::DEFAULT_CHECKPOINT_SIZE);
     explicit TarReader(
@@ -63,7 +63,7 @@ class TarReader : public Reader {
     std::size_t get_max_bytes() const override;
     std::size_t get_num_lines() const override;
     const std::string &get_archive_path() const override;
-    const std::string &get_idx_path() const override;
+    const std::string &get_index_path() const override;
     void set_buffer_size(std::size_t size) override;
 
     coro::CoroTask<std::size_t> read_async(std::size_t start_bytes,
@@ -108,7 +108,7 @@ class TarReader : public Reader {
 
    private:
     std::string tar_gz_path;
-    std::string idx_path;
+    std::string index_path;
     bool is_open;
     std::size_t default_buffer_size;
     std::shared_ptr<
diff --git a/src/dftracer/utils/utilities/reader/trace_reader.cpp b/src/dftracer/utils/utilities/reader/trace_reader.cpp
index d59097db..d3ddef20 100644
--- a/src/dftracer/utils/utilities/reader/trace_reader.cpp
+++ b/src/dftracer/utils/utilities/reader/trace_reader.cpp
@@ -52,10 +52,12 @@ TraceReader::TraceReader(TraceReaderConfig config)
 }
 
 void TraceReader::probe_index() {
-    idx_path_ = dft_internal::determine_index_path(config_.file_path,
-                                                   config_.index_dir);
-    has_index_ = fs::exists(idx_path_);
     format_ = IndexerFactory::detect_format(config_.file_path);
+    index_path_ = dft_internal::determine_index_path(config_.file_path,
+                                                     config_.index_dir);
+    has_index_ =
+        (format_ == ArchiveFormat::GZIP || format_ == ArchiveFormat::TAR_GZ) &&
+        fs::exists(index_path_);
 }
 
 bool TraceReader::has_index() const { return has_index_; }
@@ -91,7 +93,7 @@ std::size_t TraceReader::get_num_lines() {
 }
 
 std::shared_ptr<internal::Reader> TraceReader::create_indexed_reader() {
-    auto indexer = IndexerFactory::create(config_.file_path, idx_path_,
+    auto indexer = IndexerFactory::create(config_.file_path, index_path_,
                                           config_.checkpoint_size, false);
     return internal::ReaderFactory::create(indexer);
 }
@@ -136,10 +138,10 @@ coro::AsyncGenerator<Line> TraceReader::read_lines(ReadConfig config) {
             if (start >= max_bytes) co_return;
         }
 
-        if (query && !idx_path_.empty() &&
+        if (has_index_ && query && !index_path_.empty() &&
             range_type == internal::RangeType::BYTE_RANGE) {
-            ChunkPrunerInput pruner_input{idx_path_, config_.file_path, *query,
-                                          nullptr};
+            ChunkPrunerInput pruner_input{index_path_, config_.file_path,
+                                          *query, nullptr};
             ChunkPrunerUtility pruner;
             auto pruner_out = co_await pruner.process(pruner_input);
             if (pruner_out.success && !pruner_out.file_may_match) {
@@ -234,11 +236,11 @@ coro::AsyncGenerator<std::span<const char>> TraceReader::read_raw(
             if (start >= max_bytes) co_return;
         }
 
-        if (!config.query.empty() && !idx_path_.empty() &&
+        if (has_index_ && !config.query.empty() && !index_path_.empty() &&
             range_type == internal::RangeType::BYTE_RANGE) {
             auto parsed = Query::from_string(config.query);
             if (!parsed) throw common::query::QueryParseError(parsed.error());
-            ChunkPrunerInput pruner_input{idx_path_, config_.file_path,
+            ChunkPrunerInput pruner_input{index_path_, config_.file_path,
                                           std::move(*parsed), nullptr};
             ChunkPrunerUtility pruner;
             auto pruner_out = co_await pruner.process(pruner_input);
diff --git a/src/dftracer/utils/utilities/replay/replay.cpp b/src/dftracer/utils/utilities/replay/replay.cpp
index c5ae7c33..2b34e22b 100644
--- a/src/dftracer/utils/utilities/replay/replay.cpp
+++ b/src/dftracer/utils/utilities/replay/replay.cpp
@@ -403,13 +403,13 @@ ReplayResult ReplayEngine::replay(const std::string& trace_file,
 
         if (is_compressed) {
             // Handle compressed files with ReaderFactory
-            std::string idx_path =
+            std::string index_path =
                 index_file.empty() ? utilities::composites::dft::internal::
                                          determine_index_path(trace_file, "")
                                    : index_file;
 
             auto reader =
-                reader::internal::ReaderFactory::create(trace_file, idx_path);
+                reader::internal::ReaderFactory::create(trace_file, index_path);
 
             if (!reader) {
                 result.error_messages.push_back(
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ff7aca05..e23f12fb 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -78,6 +78,7 @@ foreach(test_file ${TEST_CPP_SOURCES})
   target_link_libraries(${target_name} PRIVATE doctest::doctest testing_utilities)
   target_set_warnings(${target_name})
   target_enable_coroutine(${target_name})
+  target_add_rpath(${target_name})
   
   # Pass CMAKE_BINARY_DIR to tests that need to execute binaries
   target_compile_definitions(${target_name} PRIVATE CMAKE_BINARY_DIR="${CMAKE_BINARY_DIR}")
@@ -155,6 +156,7 @@ foreach (test_file ${TEST_C_SOURCES})
   add_executable(${target_name} ${test_file})
   target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
   target_link_libraries(${target_name} PRIVATE unity_lib testing_utilities)
+  target_add_rpath(${target_name})
 
   # Set output directory to preserve folder structure in binaries
   get_filename_component(bin_dir ${bin_exec} DIRECTORY)
@@ -216,6 +218,7 @@ foreach(test_file ${TEST_BINARY_SOURCES})
   target_link_libraries(${target_name} PRIVATE doctest::doctest testing_utilities)
   target_set_warnings(${target_name})
   target_enable_coroutine(${target_name})
+  target_add_rpath(${target_name})
 
   get_filename_component(bin_dir ${bin_exec} DIRECTORY)
   get_filename_component(bin_name ${bin_exec} NAME)
diff --git a/tests/binaries/test_dftracer_index.cpp b/tests/binaries/test_dftracer_index.cpp
index 11166154..1e0a7608 100644
--- a/tests/binaries/test_dftracer_index.cpp
+++ b/tests/binaries/test_dftracer_index.cpp
@@ -1,5 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <doctest/doctest.h>
 #include <sys/wait.h>
 #include <testing_utilities.h>
@@ -16,6 +17,14 @@
 
 namespace {
 
+void set_test_library_path(const std::string& binary) {
+    const fs::path build_root = fs::path(binary).parent_path().parent_path();
+    const std::string lib_path =
+        (build_root / "lib").string() + ":" +
+        (build_root / "_deps" / "rocksdb-build").string();
+    ::setenv("LD_LIBRARY_PATH", lib_path.c_str(), 1);
+}
+
 std::string create_pfw_gz(dft_utils_test::TestEnvironment& env, int num_events,
                           int id) {
     auto trace_gz = env.create_dft_test_gzip_file(num_events);
@@ -46,6 +55,7 @@ int run_index(const std::string& binary, const std::vector<std::string>& args) {
     pid_t pid = ::fork();
     if (pid < 0) return -1;
     if (pid == 0) {
+        set_test_library_path(binary);
         std::vector<const char*> argv;
         argv.push_back(binary.c_str());
         for (const auto& arg : args) argv.push_back(arg.c_str());
@@ -59,21 +69,6 @@ int run_index(const std::string& binary, const std::vector<std::string>& args) {
     return -1;
 }
 
-// Scan a directory for any file whose name ends with the given suffix.
-bool has_file_with_suffix(const std::string& dir, const std::string& suffix) {
-    if (!fs::exists(dir) || !fs::is_directory(dir)) return false;
-    for (const auto& entry : fs::directory_iterator(dir)) {
-        if (!entry.is_regular_file()) continue;
-        const auto name = entry.path().filename().string();
-        if (name.size() >= suffix.size() &&
-            name.compare(name.size() - suffix.size(), suffix.size(), suffix) ==
-                0) {
-            return true;
-        }
-    }
-    return false;
-}
-
 }  // namespace
 
 // ============================================================================
@@ -105,12 +100,11 @@ TEST_SUITE("DFTracerIndex") {
         auto f = create_pfw_gz(env, 100, 0);
         REQUIRE(!f.empty());
 
-        // Path convention: file.pfw.gz -> file.pfw.gz.idx (same directory).
         int rc = run_index(binary, {"-d", env.get_dir(), "--force"});
         CHECK(rc == 0);
 
-        // The .idx sidecar must appear next to the input file.
-        CHECK(fs::exists(f + ".idx"));
+        CHECK(fs::exists(dftracer::utils::utilities::composites::dft::internal::
+                             determine_index_path(f, "")));
     }
 
     TEST_CASE("build index with custom index-dir") {
@@ -134,8 +128,8 @@ TEST_SUITE("DFTracerIndex") {
             binary, {"-d", env.get_dir(), "--force", "--index-dir", idx_dir});
         CHECK(rc == 0);
 
-        // A .idx file must appear somewhere inside idx_dir.
-        CHECK(has_file_with_suffix(idx_dir, ".idx"));
+        CHECK(fs::exists(dftracer::utils::utilities::composites::dft::internal::
+                             determine_index_path(f, idx_dir)));
     }
 
     TEST_CASE("build with manifest creates idx") {
@@ -155,8 +149,8 @@ TEST_SUITE("DFTracerIndex") {
             run_index(binary, {"-d", env.get_dir(), "--force", "--manifest"});
         CHECK(rc == 0);
 
-        // The sidecar must be created.
-        CHECK(fs::exists(f + ".idx"));
+        CHECK(fs::exists(dftracer::utils::utilities::composites::dft::internal::
+                             determine_index_path(f, "")));
     }
 
     TEST_CASE("force rebuild runs twice without error") {
@@ -174,11 +168,13 @@ TEST_SUITE("DFTracerIndex") {
 
         int rc1 = run_index(binary, {"-d", env.get_dir(), "--force"});
         CHECK(rc1 == 0);
-        REQUIRE(fs::exists(f + ".idx"));
+        REQUIRE(fs::exists(dftracer::utils::utilities::composites::dft::
+                               internal::determine_index_path(f, "")));
 
         // Second run with --force must overwrite successfully.
         int rc2 = run_index(binary, {"-d", env.get_dir(), "--force"});
         CHECK(rc2 == 0);
-        CHECK(fs::exists(f + ".idx"));
+        CHECK(fs::exists(dftracer::utils::utilities::composites::dft::internal::
+                             determine_index_path(f, "")));
     }
 }
diff --git a/tests/binaries/test_dftracer_info.cpp b/tests/binaries/test_dftracer_info.cpp
index 9b5679e4..669b8531 100644
--- a/tests/binaries/test_dftracer_info.cpp
+++ b/tests/binaries/test_dftracer_info.cpp
@@ -16,6 +16,14 @@
 
 namespace {
 
+void set_test_library_path(const std::string& binary) {
+    const fs::path build_root = fs::path(binary).parent_path().parent_path();
+    const std::string lib_path =
+        (build_root / "lib").string() + ":" +
+        (build_root / "_deps" / "rocksdb-build").string();
+    ::setenv("LD_LIBRARY_PATH", lib_path.c_str(), 1);
+}
+
 std::string create_pfw_gz(dft_utils_test::TestEnvironment& env, int num_events,
                           int id) {
     auto trace_gz = env.create_dft_test_gzip_file(num_events);
@@ -46,6 +54,7 @@ int run_info(const std::string& binary, const std::vector<std::string>& args) {
     pid_t pid = ::fork();
     if (pid < 0) return -1;
     if (pid == 0) {
+        set_test_library_path(binary);
         std::vector<const char*> argv;
         argv.push_back(binary.c_str());
         for (const auto& arg : args) argv.push_back(arg.c_str());
@@ -72,6 +81,7 @@ std::string run_info_capture(const std::string& binary,
         return "";
     }
     if (pid == 0) {
+        set_test_library_path(binary);
         ::close(pipefd[0]);
         ::dup2(pipefd[1], STDOUT_FILENO);
         ::dup2(pipefd[1], STDERR_FILENO);
diff --git a/tests/binaries/test_dftracer_organize.cpp b/tests/binaries/test_dftracer_organize.cpp
index ef645928..e45afdf6 100644
--- a/tests/binaries/test_dftracer_organize.cpp
+++ b/tests/binaries/test_dftracer_organize.cpp
@@ -17,6 +17,14 @@
 
 namespace {
 
+void set_test_library_path(const std::string& binary) {
+    const fs::path build_root = fs::path(binary).parent_path().parent_path();
+    const std::string lib_path =
+        (build_root / "lib").string() + ":" +
+        (build_root / "_deps" / "rocksdb-build").string();
+    ::setenv("LD_LIBRARY_PATH", lib_path.c_str(), 1);
+}
+
 std::string create_pfw_gz(dft_utils_test::TestEnvironment& env, int num_events,
                           int id) {
     auto trace_gz = env.create_dft_test_gzip_file(num_events);
@@ -63,6 +71,7 @@ int run_binary(const std::string& binary,
     pid_t pid = ::fork();
     if (pid < 0) return -1;
     if (pid == 0) {
+        set_test_library_path(binary);
         std::vector<const char*> argv;
         argv.push_back(binary.c_str());
         for (const auto& arg : args) argv.push_back(arg.c_str());
@@ -120,6 +129,16 @@ bool any_file_with_suffix(const std::string& dir, const std::string& suffix) {
     return false;
 }
 
+bool any_dir_named(const std::string& dir, const std::string& name) {
+    if (!fs::exists(dir)) return false;
+    for (const auto& entry : fs::recursive_directory_iterator(dir)) {
+        if (entry.is_directory() && entry.path().filename() == name) {
+            return true;
+        }
+    }
+    return false;
+}
+
 }  // namespace
 
 // ============================================================================
@@ -175,7 +194,7 @@ TEST_SUITE("DFTracerOrganize") {
         CHECK(has_output);
     }
 
-    TEST_CASE("organize creates midx sidecar") {
+    TEST_CASE("organize creates .dftindex store") {
         auto binary = find_organize_binary();
         if (binary.empty()) {
             MESSAGE("dftracer_organize binary not found, skipping.");
@@ -195,8 +214,7 @@ TEST_SUITE("DFTracerOrganize") {
                                      "--groups", R"(io:cat == "POSIX")"});
         CHECK(rc == 0);
 
-        // The organizer builds .pidx sidecars in the output directory.
-        CHECK(any_file_with_suffix(out_dir, ".pidx"));
+        CHECK(any_dir_named(out_dir, ".dftindex"));
     }
 
     TEST_CASE("reconstruct from organized") {
@@ -225,7 +243,7 @@ TEST_SUITE("DFTracerOrganize") {
                                     "--groups", R"(io:cat == "POSIX")"});
         REQUIRE(rc_org == 0);
 
-        // Reconstruct needs the .pidx sidecars in the organized dir.
+        REQUIRE(any_dir_named(org_dir, ".dftindex"));
         int rc_rec = run_binary(
             rec_binary, {"-d", org_dir, "-o", rec_dir, "--no-compress"});
         CHECK(rc_rec == 0);
diff --git a/tests/binaries/test_dftracer_server.cpp b/tests/binaries/test_dftracer_server.cpp
index 16bdca1a..292f4097 100644
--- a/tests/binaries/test_dftracer_server.cpp
+++ b/tests/binaries/test_dftracer_server.cpp
@@ -81,6 +81,24 @@ bool port_is_listening(int port, int timeout_ms = 100) {
     return result == 0;
 }
 
+bool can_bind_local_tcp_socket() {
+    int sock = ::socket(AF_INET, SOCK_STREAM, 0);
+    if (sock < 0) return false;
+
+    int opt = 1;
+    ::setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+    struct sockaddr_in addr{};
+    addr.sin_family = AF_INET;
+    addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+    addr.sin_port = htons(0);
+
+    const int rc =
+        ::bind(sock, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr));
+    ::close(sock);
+    return rc == 0;
+}
+
 /// Wait until port is listening or timeout expires.
 bool wait_for_port(int port, int timeout_s = 10) {
     auto deadline =
@@ -188,15 +206,6 @@ std::string extract_body(const std::string& response) {
 /// Pick a random port in the ephemeral range.
 int pick_port() { return 10000 + (::getpid() % 50000); }
 
-bool tcp_sockets_available() {
-    int sock = ::socket(AF_INET, SOCK_STREAM, 0);
-    if (sock >= 0) {
-        ::close(sock);
-        return true;
-    }
-    return false;
-}
-
 /// RAII server process manager.
 struct ServerProcess {
     pid_t pid = -1;
@@ -264,8 +273,8 @@ TEST_CASE("DFTracer Server - start and respond to endpoints") {
         MESSAGE("dftracer_server binary not found, skipping.");
         return;
     }
-    if (!tcp_sockets_available()) {
-        MESSAGE("TCP sockets are unavailable in this environment, skipping.");
+    if (!can_bind_local_tcp_socket()) {
+        MESSAGE("local TCP bind is unavailable in this environment, skipping.");
         return;
     }
 
@@ -558,8 +567,8 @@ TEST_CASE("DFTracer Server - graceful shutdown via SIGTERM") {
         MESSAGE("dftracer_server binary not found, skipping.");
         return;
     }
-    if (!tcp_sockets_available()) {
-        MESSAGE("TCP sockets are unavailable in this environment, skipping.");
+    if (!can_bind_local_tcp_socket()) {
+        MESSAGE("local TCP bind is unavailable in this environment, skipping.");
         return;
     }
 
diff --git a/tests/binaries/test_dftracer_tar.cpp b/tests/binaries/test_dftracer_tar.cpp
index 19fc1ed6..5782fa24 100644
--- a/tests/binaries/test_dftracer_tar.cpp
+++ b/tests/binaries/test_dftracer_tar.cpp
@@ -1,5 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <doctest/doctest.h>
 #include <sys/wait.h>
 #include <testing_utilities.h>
@@ -15,6 +16,14 @@
 
 namespace {
 
+void set_test_library_path(const std::string& binary) {
+    const fs::path build_root = fs::path(binary).parent_path().parent_path();
+    const std::string lib_path =
+        (build_root / "lib").string() + ":" +
+        (build_root / "_deps" / "rocksdb-build").string();
+    ::setenv("LD_LIBRARY_PATH", lib_path.c_str(), 1);
+}
+
 std::string find_tar_binary() {
     const char* env_path = std::getenv("DFTRACER_TAR_PATH");
     if (env_path != nullptr && ::access(env_path, X_OK) == 0) return env_path;
@@ -34,6 +43,7 @@ int run_tar(const std::string& binary, const std::vector<std::string>& args) {
     pid_t pid = ::fork();
     if (pid < 0) return -1;
     if (pid == 0) {
+        set_test_library_path(binary);
         std::vector<const char*> argv;
         argv.push_back(binary.c_str());
         for (const auto& arg : args) argv.push_back(arg.c_str());
@@ -60,6 +70,7 @@ std::string run_tar_capture(const std::string& binary,
         return "";
     }
     if (pid == 0) {
+        set_test_library_path(binary);
         ::close(pipefd[0]);
         ::dup2(pipefd[1], STDOUT_FILENO);
         ::dup2(pipefd[1], STDERR_FILENO);
@@ -172,9 +183,9 @@ TEST_SUITE("DFTracerTar") {
         int rc = run_tar(binary, {tar_gz, "--build-only"});
         CHECK(rc == 0);
 
-        // The indexer creates a .idx.tar sidecar alongside the archive.
-        std::string sidecar = tar_gz + ".idx.tar";
-        CHECK(fs::exists(sidecar));
+        std::string db_root = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(tar_gz, "");
+        CHECK(fs::exists(db_root));
     }
 
     TEST_CASE("force rebuild") {
@@ -196,12 +207,13 @@ TEST_SUITE("DFTracerTar") {
         int rc1 = run_tar(binary, {tar_gz, "--build-only"});
         REQUIRE(rc1 == 0);
 
-        std::string sidecar = tar_gz + ".idx.tar";
-        REQUIRE(fs::exists(sidecar));
+        std::string db_root = dftracer::utils::utilities::composites::dft::
+            internal::determine_index_path(tar_gz, "");
+        REQUIRE(fs::exists(db_root));
 
-        // Force rebuild must also succeed and leave the sidecar intact.
+        // Force rebuild must also succeed and leave the DB root intact.
         int rc2 = run_tar(binary, {tar_gz, "--build-only", "--force-rebuild"});
         CHECK(rc2 == 0);
-        CHECK(fs::exists(sidecar));
+        CHECK(fs::exists(db_root));
     }
 }
diff --git a/tests/python/common.py b/tests/python/common.py
index a00f1bc6..82f4aa15 100644
--- a/tests/python/common.py
+++ b/tests/python/common.py
@@ -3,6 +3,7 @@
 Common test utilities for  Python bindings tests
 """
 
+import gc
 import gzip
 import os
 import shutil
@@ -13,6 +14,12 @@
 import dftracer.utils as dft_utils
 
 
+def determine_index_path(file_path: str, index_dir: str = "") -> str:
+    if index_dir:
+        return os.path.join(index_dir, ".dftindex")
+    return os.path.join(os.path.dirname(file_path), ".dftindex")
+
+
 class Environment:
     """Shared test environment manager for  tests"""
 
@@ -34,13 +41,14 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
     def cleanup(self):
         """Clean up temporary files and directory"""
+        gc.collect()
         for file_path in self.test_files:
             try:
                 if os.path.exists(file_path):
                     os.remove(file_path)
-                idx_path = file_path + ".idx"
-                if os.path.exists(idx_path):
-                    os.remove(idx_path)
+                index_path = determine_index_path(file_path, "")
+                if os.path.isdir(index_path):
+                    shutil.rmtree(index_path)
             except OSError:
                 pass
 
@@ -49,10 +57,12 @@ def cleanup(self):
                 shutil.rmtree(self.temp_dir)
             except OSError:
                 pass
+        gc.collect()
 
     def create_test_gzip_file(self, filename="test_data.pfw.gz", bytes_per_line=1024):
         """Create a test gzip file with valid DFTracer trace events"""
         file_path = os.path.join(self.temp_dir, filename)
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
 
         io_names = ["read", "write", "open", "close", "pread", "pwrite", "fread", "fwrite"]
         cats = ["POSIX", "POSIX", "POSIX", "POSIX", "POSIX", "POSIX", "STDIO", "STDIO"]
@@ -96,6 +106,7 @@ def create_test_gzip_file(self, filename="test_data.pfw.gz", bytes_per_line=1024
     def create_dft_trace_file(self, filename="dft_trace.pfw.gz", num_events=None):
         """Create a gzip file with valid DFTracer trace events."""
         file_path = os.path.join(self.temp_dir, filename)
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
         n = num_events if num_events is not None else self.lines
         io_names = ["read", "write", "open", "close", "pread", "pwrite", "fread", "fwrite"]
         cats = ["POSIX", "POSIX", "POSIX", "POSIX", "POSIX", "POSIX", "STDIO", "STDIO"]
@@ -198,25 +209,24 @@ def create_test_gzip_file_with_nested_json(self):
         return file_path
 
     def get_index_path(self, gz_file_path):
-        """Get the index file path for a gzip file"""
-        return gz_file_path + ".idx"
+        """Get the `.dftindex` path for a gzip file."""
+        return determine_index_path(gz_file_path, "")
 
     def build_index(self, gz_file_path, checkpoint_size_bytes=None):
         """Build index for the gzip file using Python indexer"""
         if checkpoint_size_bytes is None:
             checkpoint_size_bytes = 32 * 1024 * 1024  # 32MB default
 
-        idx_file = self.get_index_path(gz_file_path)
+        index_path = self.get_index_path(gz_file_path)
 
         try:
-            # Use the indexer API
-            indexer = dft_utils.Indexer(gz_file_path, idx_file, checkpoint_size_bytes)
-            if indexer.need_rebuild():
-                indexer.build()
+            with dft_utils.Indexer(gz_file_path, index_path, checkpoint_size_bytes) as indexer:
+                if indexer.need_rebuild():
+                    indexer.build()
 
-            if not os.path.exists(idx_file):
-                pytest.skip("Index file was not created")
-            return idx_file
+            if not os.path.exists(index_path):
+                pytest.skip("Index store was not created")
+            return index_path
         except Exception as e:
             pytest.skip(f"Failed to build index: {e}")
 
diff --git a/tests/python/test_dask.py b/tests/python/test_dask.py
index 6ce64544..4ebd6a9f 100644
--- a/tests/python/test_dask.py
+++ b/tests/python/test_dask.py
@@ -38,21 +38,21 @@ def test_parallel_indexer_creation(self):
             # Create multiple test files
             gz_files = []
             for i in range(3):
-                gz_file = env.create_test_gzip_file(f"test_{i}.pfw.gz", bytes_per_line=512)
+                gz_file = env.create_test_gzip_file(f"file_{i}/test_{i}.pfw.gz", bytes_per_line=512)
                 gz_files.append(gz_file)
 
             def create_and_build_indexer(gz_file):
                 """Helper function to create and build an indexer"""
                 try:
-                    indexer = dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024)
-                    if indexer.need_rebuild():
-                        indexer.build()
-                    return {
-                        "file": gz_file,
-                        "max_bytes": indexer.get_max_bytes(),
-                        "num_lines": indexer.get_num_lines(),
-                        "success": True,
-                    }
+                    with dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024) as indexer:
+                        if indexer.need_rebuild():
+                            indexer.build()
+                        return {
+                            "file": gz_file,
+                            "max_bytes": indexer.get_max_bytes(),
+                            "num_lines": indexer.get_num_lines(),
+                            "success": True,
+                        }
                 except Exception as e:
                     return {"file": gz_file, "error": str(e), "success": False}
 
@@ -69,9 +69,9 @@ def create_and_build_indexer(gz_file):
                 assert result["max_bytes"] > 0
                 assert result["num_lines"] > 0
 
-                # Verify index file was created
-                idx_file = result["file"] + ".idx"
-                assert os.path.exists(idx_file)
+                # Verify index store was created
+                index_path = env.get_index_path(result["file"])
+                assert os.path.exists(index_path)
 
     def test_parallel_reader_operations(self):
         """Test parallel reading operations with all reader types including JSON"""
@@ -83,16 +83,19 @@ def read_chunk(gz_file_path, start_bytes, end_bytes, reader_type):
                 """Helper function to read a chunk - creates its own indexer for thread safety"""
                 try:
                     # Each task creates its own indexer instance to avoid sharing
-                    reader = dft_utils.TraceReader(gz_file_path)
-
-                    if reader_type == "bytes":
-                        data = b"".join(reader.read_raw(start_byte=start_bytes, end_byte=end_bytes))
-                    elif reader_type == "line_bytes":
-                        data = reader.read_lines(start_byte=start_bytes, end_byte=end_bytes)
-                    elif reader_type == "json_bytes":
-                        data = reader.read_lines_json(start_byte=start_bytes, end_byte=end_bytes)
-                    else:
-                        raise ValueError(f"Unknown reader type: {reader_type}")
+                    with dft_utils.TraceReader(gz_file_path) as reader:
+                        if reader_type == "bytes":
+                            data = b"".join(
+                                reader.read_raw(start_byte=start_bytes, end_byte=end_bytes)
+                            )
+                        elif reader_type == "line_bytes":
+                            data = reader.read_lines(start_byte=start_bytes, end_byte=end_bytes)
+                        elif reader_type == "json_bytes":
+                            data = reader.read_lines_json(
+                                start_byte=start_bytes, end_byte=end_bytes
+                            )
+                        else:
+                            raise ValueError(f"Unknown reader type: {reader_type}")
 
                     return {
                         "type": reader_type,
@@ -104,8 +107,8 @@ def read_chunk(gz_file_path, start_bytes, end_bytes, reader_type):
                     return {"type": reader_type, "error": str(e), "success": False}
 
             # Get file info from a temporary indexer
-            temp_indexer = dft_utils.Indexer(gz_file, checkpoint_size=512 * 1024)
-            max_bytes = temp_indexer.get_max_bytes()
+            with dft_utils.Indexer(gz_file, checkpoint_size=512 * 1024) as temp_indexer:
+                max_bytes = temp_indexer.get_max_bytes()
             chunk_size = max_bytes // 4
 
             # Create tasks for all reader types
@@ -159,10 +162,10 @@ def test_dask_dataframe_integration(self):
             def extract_json_data(gz_file_path, start_bytes, end_bytes):
                 """Extract JSON data and convert to DataFrame-friendly format"""
                 try:
-                    reader = dft_utils.TraceReader(gz_file_path)
-                    json_objects = reader.read_lines_json(
-                        start_byte=start_bytes, end_byte=end_bytes
-                    )
+                    with dft_utils.TraceReader(gz_file_path) as reader:
+                        json_objects = reader.read_lines_json(
+                            start_byte=start_bytes, end_byte=end_bytes
+                        )
 
                     # Convert to list of dictionaries suitable for DataFrame
                     records = []
@@ -181,8 +184,8 @@ def extract_json_data(gz_file_path, start_bytes, end_bytes):
                     return []
 
             # Get file info and create chunks
-            temp_indexer = dft_utils.Indexer(gz_file, checkpoint_size=512 * 1024)
-            max_bytes = temp_indexer.get_max_bytes()
+            with dft_utils.Indexer(gz_file, checkpoint_size=512 * 1024) as temp_indexer:
+                max_bytes = temp_indexer.get_max_bytes()
             chunk_size = max_bytes // 4
 
             # Create delayed tasks to extract data from each chunk
@@ -226,8 +229,8 @@ def test_multiple_batch_sizes_no_duplication(self):
             gz_file = env.create_test_gzip_file(bytes_per_line=512)
             env.build_index(gz_file, checkpoint_size_bytes=256 * 1024)
 
-            temp_indexer = dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024)
-            max_bytes = temp_indexer.get_max_bytes()
+            with dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024) as temp_indexer:
+                max_bytes = temp_indexer.get_max_bytes()
 
             # Test various batch sizes including boundary-critical ones
             batch_sizes = [
@@ -246,8 +249,8 @@ def generate_batches(filename, max_bytes, batch_size):
             def process_batch(batch_info):
                 """Process one batch and return processed records"""
                 filename, start, end = batch_info
-                reader = dft_utils.TraceReader(filename)
-                json_lines = reader.read_lines_json(start_byte=start, end_byte=end)
+                with dft_utils.TraceReader(filename) as reader:
+                    json_lines = reader.read_lines_json(start_byte=start, end_byte=end)
 
                 processed_records = []
                 for json_obj in json_lines:
@@ -265,8 +268,8 @@ def process_batch(batch_info):
                 return processed_records
 
             # Get reference data (full file read) and verify against environment
-            full_reader = dft_utils.TraceReader(gz_file)
-            reference_data = full_reader.read_lines_json(start_byte=0, end_byte=max_bytes)
+            with dft_utils.TraceReader(gz_file) as full_reader:
+                reference_data = full_reader.read_lines_json(start_byte=0, end_byte=max_bytes)
             reference_names = sorted(
                 [obj["name"] for obj in reference_data if obj and "name" in obj]
             )
@@ -363,14 +366,14 @@ def test_boundary_edge_cases(self):
             gz_file = env.create_test_gzip_file(bytes_per_line=512)
             env.build_index(gz_file, checkpoint_size_bytes=256 * 1024)
 
-            temp_indexer = dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024)
-            max_bytes = temp_indexer.get_max_bytes()
+            with dft_utils.Indexer(gz_file, checkpoint_size=256 * 1024) as temp_indexer:
+                max_bytes = temp_indexer.get_max_bytes()
 
             def process_batch(batch_info):
                 """Process one batch and return processed records"""
                 filename, start, end = batch_info
-                reader = dft_utils.TraceReader(filename)
-                json_lines = reader.read_lines_json(start_byte=start, end_byte=end)
+                with dft_utils.TraceReader(filename) as reader:
+                    json_lines = reader.read_lines_json(start_byte=start, end_byte=end)
 
                 processed_records = []
                 for json_obj in json_lines:
@@ -385,8 +388,8 @@ def process_batch(batch_info):
                 return processed_records
 
             # Get reference data and verify against environment
-            full_reader = dft_utils.TraceReader(gz_file)
-            reference_data = full_reader.read_lines_json(start_byte=0, end_byte=max_bytes)
+            with dft_utils.TraceReader(gz_file) as full_reader:
+                reference_data = full_reader.read_lines_json(start_byte=0, end_byte=max_bytes)
             expected_count = len([obj for obj in reference_data if obj and "name" in obj])
 
             assert expected_count == env.lines, (
diff --git a/tests/python/test_indexer.py b/tests/python/test_indexer.py
index c5d438ed..8079c833 100644
--- a/tests/python/test_indexer.py
+++ b/tests/python/test_indexer.py
@@ -19,12 +19,12 @@ def test_indexer_creation(self):
         """Test indexer creation"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
 
             # Test basic creation using context manager
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 assert indexer.gz_path == gz_file
-                assert indexer.idx_path == idx_file
+                assert indexer.index_path == index_path
                 assert indexer.checkpoint_size > 0
 
     def test_indexer_creation_with_defaults(self):
@@ -35,7 +35,7 @@ def test_indexer_creation_with_defaults(self):
             # Test creation with defaults using context manager
             with dft_utils.Indexer(gz_file) as indexer:
                 assert indexer.gz_path == gz_file
-                assert indexer.idx_path == gz_file + ".idx"
+                assert indexer.index_path == env.get_index_path(gz_file)
                 assert indexer.checkpoint_size <= 33554432  # Should be <= 32MB default
 
     def test_indexer_custom_checkpoint_size(self):
@@ -57,9 +57,9 @@ def test_indexer_build_and_rebuild(self):
         """Test indexer build and rebuild functionality"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
 
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 # Should need rebuild initially
                 assert indexer.need_rebuild()
 
@@ -67,7 +67,7 @@ def test_indexer_build_and_rebuild(self):
                 indexer.build()
 
                 # Index file should exist
-                assert os.path.exists(idx_file)
+                assert os.path.exists(index_path)
 
                 # Should not need rebuild after building
                 assert not indexer.need_rebuild()
@@ -75,7 +75,7 @@ def test_indexer_build_and_rebuild(self):
             # Test force rebuild with a new indexer
             # Note: force_rebuild affects the build process, not need_rebuild() check
             # The need_rebuild() method checks file consistency, not force_rebuild flag
-            with dft_utils.Indexer(gz_file, idx_file, force_rebuild=True) as indexer_force:
+            with dft_utils.Indexer(gz_file, index_path, force_rebuild=True) as indexer_force:
                 # Since the index already exists and file hasn't changed, need_rebuild should be False
                 # But force_rebuild will cause a rebuild when build() is called
                 assert not indexer_force.need_rebuild()
@@ -205,7 +205,7 @@ def test_indexer_with_reader_creation(self):
                 if indexer.need_rebuild():
                     indexer.build()
 
-                # Test creating reader after indexer builds sidecar
+                # Test creating reader after indexer builds the shared index store
                 reader = dft_utils.TraceReader(gz_file)
                 assert reader.get_max_bytes() > 0
                 assert reader.file_path == gz_file
@@ -220,7 +220,7 @@ def test_indexer_with_reader_creation_context_manager(self):
                 if indexer.need_rebuild():
                     indexer.build()
 
-                # Test creating reader after indexer builds sidecar
+                # Test creating reader after indexer builds the shared index store
                 reader = dft_utils.TraceReader(gz_file)
                 assert reader.get_max_bytes() > 0
 
@@ -234,7 +234,7 @@ def test_multiple_readers_same_indexer(self):
                 if indexer.need_rebuild():
                     indexer.build()
 
-                # Create multiple readers (all use same sidecar)
+                # Create multiple readers (all use the same shared index store)
                 readers = []
                 for i in range(3):
                     reader = dft_utils.TraceReader(gz_file)
@@ -247,6 +247,44 @@ def test_multiple_readers_same_indexer(self):
                     assert reader.get_max_bytes() == max_bytes
 
 
+class TestIndexerLifetime:
+    """Python wrapper lifetime should not own the shared index store."""
+
+    def test_indexer_close_releases_wrapper_not_index_store(self):
+        """close() should release the Python handle without deleting .dftindex."""
+        with Environment() as env:
+            gz_file = env.create_test_gzip_file()
+            index_path = env.get_index_path(gz_file)
+
+            indexer = dft_utils.Indexer(gz_file, index_path)
+            assert indexer.need_rebuild()
+            indexer.build()
+            assert os.path.exists(index_path)
+
+            indexer.close()
+            assert os.path.exists(index_path)
+
+            with dft_utils.Indexer(gz_file, index_path) as reopened:
+                assert not reopened.need_rebuild()
+                assert reopened.get_num_lines() > 0
+
+    def test_indexer_context_exit_keeps_shared_index_store(self):
+        """Context exit should not tear down the shared index store."""
+        with Environment() as env:
+            gz_file = env.create_test_gzip_file()
+            index_path = env.get_index_path(gz_file)
+
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
+                if indexer.need_rebuild():
+                    indexer.build()
+                assert indexer.get_num_lines() > 0
+
+            assert os.path.exists(index_path)
+
+            reader = dft_utils.TraceReader(gz_file)
+            assert reader.get_num_lines() > 0
+
+
 class TestIndexerUnified:
     """Test unified IndexBuilder features via Python Indexer"""
 
@@ -254,9 +292,9 @@ def test_indexer_build_bloom(self):
         """Test building with bloom=True"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert indexer.has_bloom
@@ -265,9 +303,9 @@ def test_indexer_build_manifest(self):
         """Test building with manifest=True"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_manifest=True, index_threshold=0
+                gz_file, index_path, build_manifest=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert indexer.has_manifest
@@ -276,10 +314,10 @@ def test_indexer_build_bloom_and_manifest(self):
         """Test building with both bloom and manifest"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=0,
@@ -292,8 +330,8 @@ def test_indexer_no_bloom_by_default(self):
         """Test that bloom is not built when build_bloom is omitted"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file, index_threshold=0) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path, index_threshold=0) as indexer:
                 indexer.build()
                 assert not indexer.has_bloom
 
@@ -301,8 +339,8 @@ def test_indexer_no_manifest_by_default(self):
         """Test that manifest is not built when build_manifest is omitted"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file, index_threshold=0) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path, index_threshold=0) as indexer:
                 indexer.build()
                 assert not indexer.has_manifest
 
@@ -310,9 +348,9 @@ def test_indexer_has_bloom_is_bool(self):
         """Test that has_bloom returns a bool"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert isinstance(indexer.has_bloom, bool)
@@ -321,9 +359,9 @@ def test_indexer_has_manifest_is_bool(self):
         """Test that has_manifest returns a bool"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_manifest=True, index_threshold=0
+                gz_file, index_path, build_manifest=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert isinstance(indexer.has_manifest, bool)
@@ -332,11 +370,11 @@ def test_indexer_custom_index_threshold(self):
         """Test that index_threshold is accepted without error"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             # A very large threshold skips bloom for small files
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=1024 * 1024 * 1024,
             ) as indexer:
@@ -345,30 +383,29 @@ def test_indexer_custom_index_threshold(self):
                 assert not indexer.has_bloom
 
     def test_indexer_bloom_persists_across_instances(self):
-        """Bloom data written to the sidecar is visible from a new Indexer"""
+        """Bloom data written to the index store is visible from a new Indexer"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
 
-            # Open a fresh Indexer pointing at the same sidecar
-            with dft_utils.Indexer(gz_file, idx_file) as indexer2:
+            with dft_utils.Indexer(gz_file, index_path) as indexer2:
                 assert indexer2.has_bloom
 
     def test_indexer_manifest_persists_across_instances(self):
-        """Manifest data written to the sidecar is visible from a new Indexer"""
+        """Manifest data written to the index store is visible from a new Indexer"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_manifest=True, index_threshold=0
+                gz_file, index_path, build_manifest=True, index_threshold=0
             ) as indexer:
                 indexer.build()
 
-            with dft_utils.Indexer(gz_file, idx_file) as indexer2:
+            with dft_utils.Indexer(gz_file, index_path) as indexer2:
                 assert indexer2.has_manifest
 
 
@@ -379,9 +416,12 @@ def test_threshold_skips_bloom_for_small_file(self):
         """Explicit large threshold should skip bloom for small files"""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file(bytes_per_line=128)
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=10 * 1024 * 1024
+                gz_file,
+                index_path,
+                build_bloom=True,
+                index_threshold=10 * 1024 * 1024,
             ) as indexer:
                 indexer.build()
                 assert not indexer.has_bloom
@@ -390,9 +430,12 @@ def test_threshold_skips_manifest_for_small_file(self):
         """Explicit large threshold should skip manifest for small files"""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file(bytes_per_line=128)
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_manifest=True, index_threshold=10 * 1024 * 1024
+                gz_file,
+                index_path,
+                build_manifest=True,
+                index_threshold=10 * 1024 * 1024,
             ) as indexer:
                 indexer.build()
                 assert not indexer.has_manifest
@@ -401,10 +444,10 @@ def test_threshold_skips_bloom_and_manifest_for_small_file(self):
         """Explicit large threshold should skip bloom and manifest for small files"""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file(bytes_per_line=128)
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=10 * 1024 * 1024,
@@ -417,10 +460,10 @@ def test_explicit_large_threshold_skips_bloom(self):
         """Explicit large threshold should skip bloom for small files"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=1024 * 1024 * 1024,
             ) as indexer:
@@ -431,9 +474,9 @@ def test_zero_threshold_forces_bloom(self):
         """index_threshold=0 disables threshold, bloom should be built"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert indexer.has_bloom
@@ -442,9 +485,9 @@ def test_zero_threshold_forces_manifest(self):
         """index_threshold=0 disables threshold, manifest should be built"""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_manifest=True, index_threshold=0
+                gz_file, index_path, build_manifest=True, index_threshold=0
             ) as indexer:
                 indexer.build()
                 assert indexer.has_manifest
diff --git a/tests/python/test_reorganization_planner.py b/tests/python/test_reorganization_planner.py
index 9ea8ee36..b5518fe6 100644
--- a/tests/python/test_reorganization_planner.py
+++ b/tests/python/test_reorganization_planner.py
@@ -17,10 +17,10 @@ class TestReorganizationPlannerUtility:
     def test_plan_returns_dict(self):
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=0,
@@ -37,10 +37,10 @@ def test_plan_returns_dict(self):
     def test_call_delegates_to_process(self):
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=0,
@@ -60,10 +60,10 @@ def test_plan_succeeds_without_manifest(self):
         """Without manifest the planner streams the file and succeeds."""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file(bytes_per_line=128)
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
@@ -81,10 +81,10 @@ def test_plan_has_tasks_without_manifest(self):
         """Whole-file fallback produces extraction tasks."""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file(bytes_per_line=128)
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 build_manifest=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
diff --git a/tests/python/test_statistics_aggregator.py b/tests/python/test_statistics_aggregator.py
index 555c40ec..3be995d5 100644
--- a/tests/python/test_statistics_aggregator.py
+++ b/tests/python/test_statistics_aggregator.py
@@ -17,9 +17,9 @@ class TestStatisticsAggregatorUtility:
     def test_compute_returns_dict(self):
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsAggregatorUtility().process(gz_file)
@@ -30,9 +30,9 @@ def test_compute_returns_dict(self):
     def test_compute_correct_event_count(self):
         with Environment(lines=30) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsAggregatorUtility().process(gz_file)
@@ -42,9 +42,9 @@ def test_compute_correct_event_count(self):
     def test_compute_has_statistics_fields(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsAggregatorUtility().process(gz_file)
@@ -57,9 +57,9 @@ def test_compute_has_statistics_fields(self):
     def test_call_delegates_to_process(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             util = StatisticsAggregatorUtility()
@@ -75,10 +75,10 @@ def test_returns_dict_without_bloom(self):
         """Without bloom data the aggregator streams the file and succeeds."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
             ) as indexer:
@@ -94,10 +94,10 @@ def test_correct_event_count_without_bloom(self):
         """Sequential fallback produces the same event count as indexed path."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
             ) as indexer:
@@ -111,10 +111,10 @@ def test_has_statistics_fields_without_bloom(self):
         """Sequential fallback populates all statistics fields."""
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
             ) as indexer:
diff --git a/tests/python/test_statistics_query.py b/tests/python/test_statistics_query.py
index a02cc9b6..fa2cb123 100644
--- a/tests/python/test_statistics_query.py
+++ b/tests/python/test_statistics_query.py
@@ -17,9 +17,9 @@ class TestStatisticsQueryUtility:
     def test_query_summary(self):
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsQueryUtility().process(gz_file, query_type="summary")
@@ -30,9 +30,9 @@ def test_query_summary(self):
     def test_query_categories(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsQueryUtility().process(gz_file, query_type="categories")
@@ -42,9 +42,9 @@ def test_query_categories(self):
     def test_query_names(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsQueryUtility().process(gz_file, query_type="names")
@@ -53,9 +53,9 @@ def test_query_names(self):
     def test_query_top_n_names(self):
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsQueryUtility().process(gz_file, query_type="top_n_names", top_n=5)
@@ -65,9 +65,9 @@ def test_query_top_n_names(self):
     def test_query_duration_stats(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             result = StatisticsQueryUtility().process(gz_file, query_type="duration_stats")
@@ -77,9 +77,9 @@ def test_query_duration_stats(self):
     def test_call_delegates_to_process(self):
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
-                gz_file, idx_file, build_bloom=True, index_threshold=0
+                gz_file, index_path, build_bloom=True, index_threshold=0
             ) as indexer:
                 indexer.build()
             util = StatisticsQueryUtility()
@@ -95,10 +95,10 @@ def test_summary_correct_events_without_bloom(self):
         """Sequential fallback produces correct event count."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
             ) as indexer:
@@ -112,10 +112,10 @@ def test_categories_populated_without_bloom(self):
         """Sequential fallback populates categories."""
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
+            index_path = env.get_index_path(gz_file)
             with dft_utils.Indexer(
                 gz_file,
-                idx_file,
+                index_path,
                 build_bloom=True,
                 index_threshold=_SKIP_INDEX_THRESHOLD,
             ) as indexer:
diff --git a/tests/python/test_trace_reader.py b/tests/python/test_trace_reader.py
index e86df1eb..586fbd7a 100644
--- a/tests/python/test_trace_reader.py
+++ b/tests/python/test_trace_reader.py
@@ -25,21 +25,21 @@ def test_creation_nonexistent_file(self):
         with pytest.raises(RuntimeError):
             reader.read_lines()
 
-    def test_has_index_false_without_sidecar(self):
-        """has_index is False when no .idx sidecar exists."""
+    def test_has_index_false_without_index_store(self):
+        """has_index is False when no `.dftindex` store exists."""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
             reader = dft_utils.TraceReader(gz_file)
             assert reader.has_index is False
 
     def test_has_index_true_after_indexer_build(self):
-        """has_index is True when a sidecar was built before construction."""
+        """has_index is True when an index store was built before construction."""
         with Environment() as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 indexer.build()
-            # TraceReader probes for the sidecar at __init__ time
+            # TraceReader probes for the index store at __init__ time
             reader = dft_utils.TraceReader(gz_file)
             assert reader.has_index is True
 
@@ -150,11 +150,11 @@ def test_read_lines_negative_end_raises(self):
                 reader.read_lines(end_line=-1)
 
     def test_read_lines_with_index(self):
-        """read_lines() works correctly when a sidecar index is present."""
+        """read_lines() works correctly when an index store is present."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 indexer.build()
             reader = dft_utils.TraceReader(gz_file)
             assert reader.has_index
@@ -169,8 +169,8 @@ def test_read_lines_indexed_matches_sequential(self):
             sequential = dft_utils.TraceReader(gz_file).read_lines()
 
             # Build index, then read again
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 indexer.build()
             indexed = dft_utils.TraceReader(gz_file).read_lines()
 
diff --git a/tests/python/test_trace_reader_arrow.py b/tests/python/test_trace_reader_arrow.py
index c90009d1..822554c5 100644
--- a/tests/python/test_trace_reader_arrow.py
+++ b/tests/python/test_trace_reader_arrow.py
@@ -14,8 +14,8 @@ def test_iter_arrow_returns_batches(self):
         """iter_arrow yields batch objects with __arrow_c_array__."""
         with Environment(lines=50) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            batches = list(reader.iter_arrow(batch_size=100))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(batch_size=100))
             assert len(batches) >= 1
             for b in batches:
                 assert hasattr(b, "__arrow_c_array__")
@@ -26,8 +26,8 @@ def test_iter_arrow_correct_row_count(self):
         """Total rows across all batches equals number of JSON lines."""
         with Environment(lines=50) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            batches = list(reader.iter_arrow(batch_size=20))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(batch_size=20))
             total_rows = sum(b.num_rows for b in batches)
             assert total_rows == 50
 
@@ -35,9 +35,9 @@ def test_iter_arrow_batch_size_respected(self):
         """Each batch has at most batch_size rows."""
         with Environment(lines=100) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
             batch_size = 30
-            batches = list(reader.iter_arrow(batch_size=batch_size))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(batch_size=batch_size))
             for b in batches:
                 assert b.num_rows <= batch_size
 
@@ -45,8 +45,8 @@ def test_iter_arrow_discovers_columns(self):
         """Arrow batches have columns matching JSON keys."""
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            batches = list(reader.iter_arrow(batch_size=100))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(batch_size=100))
             assert len(batches) == 1
             b = batches[0]
             # Test data has: name, cat, dur, data
@@ -56,10 +56,10 @@ def test_iter_arrow_clamped_range(self):
         """iter_arrow with out-of-range bytes clamps to actual bounds."""
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
             # Out-of-range start_byte is clamped to max, yielding empty
             # (for non-indexed files, clamping may read all data)
-            batches = list(reader.iter_arrow(start_byte=999999999, end_byte=999999999))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(start_byte=999999999, end_byte=999999999))
             # Just verify it doesn't crash — clamping behavior varies
             assert isinstance(batches, list)
 
@@ -68,11 +68,11 @@ def test_iter_arrow_with_line_range(self):
         with Environment(lines=50) as env:
             gz_file = env.create_test_gzip_file()
             # Build index for line-based access
-            idx_file = gz_file + ".idx"
-            with dft_utils.Indexer(gz_file, idx_file) as indexer:
+            index_path = env.get_index_path(gz_file)
+            with dft_utils.Indexer(gz_file, index_path) as indexer:
                 indexer.build()
-            reader = dft_utils.TraceReader(gz_file)
-            batches = list(reader.iter_arrow(start_line=10, end_line=20, batch_size=100))
+            with dft_utils.TraceReader(gz_file) as reader:
+                batches = list(reader.iter_arrow(start_line=10, end_line=20, batch_size=100))
             total_rows = sum(b.num_rows for b in batches)
             # end_line is inclusive, so lines 10..20 = 11 lines
             assert total_rows == 11
@@ -85,24 +85,24 @@ def test_read_arrow_returns_arrow_table(self):
         """read_arrow returns an ArrowTable."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            table = reader.read_arrow(batch_size=100)
+            with dft_utils.TraceReader(gz_file) as reader:
+                table = reader.read_arrow(batch_size=100)
             assert isinstance(table, ArrowTable)
 
     def test_read_arrow_row_count(self):
         """ArrowTable has correct total row count."""
         with Environment(lines=30) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            table = reader.read_arrow(batch_size=100)
+            with dft_utils.TraceReader(gz_file) as reader:
+                table = reader.read_arrow(batch_size=100)
             assert table.num_rows == 30
 
     def test_read_arrow_batch_access(self):
         """ArrowTable provides batch access."""
         with Environment(lines=50) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            table = reader.read_arrow(batch_size=20)
+            with dft_utils.TraceReader(gz_file) as reader:
+                table = reader.read_arrow(batch_size=20)
             assert table.num_batches >= 1
             for b in table.batches():
                 # Batches are raw _ArrowBatchCapsule objects (not ArrowBatch wrappers)
@@ -113,8 +113,8 @@ def test_read_arrow_properties(self):
         """ArrowTable exposes num_batches, num_rows, empty."""
         with Environment(lines=20) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            table = reader.read_arrow(batch_size=100)
+            with dft_utils.TraceReader(gz_file) as reader:
+                table = reader.read_arrow(batch_size=100)
             assert table.num_rows == 20
             assert table.num_batches >= 1
             assert not table.empty
@@ -127,8 +127,8 @@ def test_arrow_batch_wraps_capsule(self):
         """ArrowBatch wraps a capsule from iter_arrow."""
         with Environment(lines=10) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            raw_batches = list(reader.iter_arrow(batch_size=100))
+            with dft_utils.TraceReader(gz_file) as reader:
+                raw_batches = list(reader.iter_arrow(batch_size=100))
             assert len(raw_batches) >= 1
             batch = ArrowBatch(raw_batches[0])
             assert hasattr(batch, "__arrow_c_array__")
@@ -138,8 +138,8 @@ def test_arrow_batch_to_pandas_requires_pyarrow(self):
         """to_pandas raises ImportError if pyarrow is not installed."""
         with Environment(lines=5) as env:
             gz_file = env.create_test_gzip_file()
-            reader = dft_utils.TraceReader(gz_file)
-            raw_batches = list(reader.iter_arrow(batch_size=100))
+            with dft_utils.TraceReader(gz_file) as reader:
+                raw_batches = list(reader.iter_arrow(batch_size=100))
             batch = ArrowBatch(raw_batches[0])
             # This test only verifies the method exists; actual conversion
             # depends on pyarrow being installed
diff --git a/tests/reader/test_basic_factory.cpp b/tests/reader/test_basic_factory.cpp
index 18a94d02..364997c9 100644
--- a/tests/reader/test_basic_factory.cpp
+++ b/tests/reader/test_basic_factory.cpp
@@ -1,4 +1,5 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 #include <dftracer/utils/utilities/reader/internal/reader_factory.h>
 #include <doctest/doctest.h>
@@ -6,6 +7,7 @@
 #include "testing_utilities.h"
 
 using namespace dftracer::utils;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dftracer::utils::utilities::reader::internal;
 using namespace dft_utils_test;
@@ -18,12 +20,13 @@ TEST_CASE("Factory Pattern - Basic GZIP functionality") {
     REQUIRE(!gz_file.empty());
 
     std::string idx_file = env.get_index_path(gz_file);
+    std::string db_root = determine_index_path(gz_file, "");
 
     SUBCASE("IndexerFactory creates valid indexer") {
         auto indexer = IndexerFactory::create(gz_file, idx_file, 1024 * 1024);
         REQUIRE(indexer != nullptr);
         CHECK(indexer->get_archive_path() == gz_file);
-        CHECK(indexer->get_idx_path() == idx_file);
+        CHECK(indexer->get_index_path() == db_root);
     }
 
     SUBCASE("ReaderFactory creates valid reader") {
@@ -35,7 +38,7 @@ TEST_CASE("Factory Pattern - Basic GZIP functionality") {
         REQUIRE(reader != nullptr);
         CHECK(reader->is_valid());
         CHECK(reader->get_archive_path() == gz_file);
-        CHECK(reader->get_idx_path() == idx_file);
+        CHECK(reader->get_index_path() == db_root);
     }
 
     SUBCASE("Reader factory from files") {
@@ -59,13 +62,14 @@ TEST_CASE("Factory Pattern - Basic TAR.GZ functionality") {
     REQUIRE(!tar_gz_file.empty());
 
     std::string idx_file = env.get_index_path(tar_gz_file);
+    std::string db_root = determine_index_path(tar_gz_file, "");
 
     SUBCASE("IndexerFactory creates valid TAR.GZ indexer") {
         auto indexer =
             IndexerFactory::create(tar_gz_file, idx_file, 1024 * 1024);
         REQUIRE(indexer != nullptr);
         CHECK(indexer->get_archive_path() == tar_gz_file);
-        CHECK(indexer->get_idx_path() == idx_file);
+        CHECK(indexer->get_index_path() == db_root);
     }
 
     SUBCASE("ReaderFactory creates valid TAR.GZ reader") {
@@ -78,7 +82,7 @@ TEST_CASE("Factory Pattern - Basic TAR.GZ functionality") {
         REQUIRE(reader != nullptr);
         CHECK(reader->is_valid());
         CHECK(reader->get_archive_path() == tar_gz_file);
-        CHECK(reader->get_idx_path() == idx_file);
+        CHECK(reader->get_index_path() == db_root);
     }
 }
 
diff --git a/tests/reader/test_reader.c b/tests/reader/test_reader.c
index f6e7d607..79b32684 100644
--- a/tests/reader/test_reader.c
+++ b/tests/reader/test_reader.c
@@ -70,7 +70,7 @@ void test_indexer_invalid_parameters(void) {
     indexer = dft_indexer_create(NULL, "test.idx", mb_to_b(1.0), 0);
     TEST_ASSERT_NULL(indexer);
 
-    // Test null idx_path
+    // Test null index_path
     indexer = dft_indexer_create("test.gz", NULL, mb_to_b(1.0), 0);
     TEST_ASSERT_NULL(indexer);
 
@@ -184,7 +184,7 @@ void test_reader_invalid_parameters(void) {
     reader = dft_reader_create(NULL, "test.idx", ckpt_size);
     TEST_ASSERT_NULL(reader);
 
-    // Test null idx_path
+    // Test null index_path
     reader = dft_reader_create("test.gz", NULL, ckpt_size);
     TEST_ASSERT_NULL(reader);
 
diff --git a/tests/reader/test_reader.cpp b/tests/reader/test_reader.cpp
index e4423481..cc5153ed 100644
--- a/tests/reader/test_reader.cpp
+++ b/tests/reader/test_reader.cpp
@@ -53,7 +53,7 @@ TEST_CASE("C++ Indexer - Basic functionality") {
 
         // Test getter methods
         CHECK(indexer->get_archive_path() == gz_file);
-        CHECK(indexer->get_idx_path() == idx_file);
+        CHECK(indexer->get_index_path() == idx_file);
 
         // Build index first before accessing metadata
         indexer->build();
@@ -119,7 +119,7 @@ TEST_CASE("C++ Reader - Basic functionality") {
 
         // Test getter methods
         CHECK(reader->get_archive_path() == gz_file);
-        CHECK(reader->get_idx_path() == idx_file);
+        CHECK(reader->get_index_path() == idx_file);
     }
 
     SUBCASE("Read byte range using streaming API") {
diff --git a/tests/reader/test_reader_formats.cpp b/tests/reader/test_reader_formats.cpp
index 440229ca..99703384 100644
--- a/tests/reader/test_reader_formats.cpp
+++ b/tests/reader/test_reader_formats.cpp
@@ -76,7 +76,8 @@ TEST_CASE_TEMPLATE("Indexer creation and destruction", FormatType, GZIPFormat,
         auto indexer = IndexerFactory::create(
             fixture.get_test_file(), fixture.get_index_file(), 1024 * 1024);
         REQUIRE(indexer != nullptr);
-        CHECK(indexer->exists());
+        CHECK_FALSE(indexer->exists());
+        CHECK(indexer->need_rebuild());
     }
 
     SUBCASE("Invalid file path") {
diff --git a/tests/reader/test_reader_stream.cpp b/tests/reader/test_reader_stream.cpp
index 31af79dc..111c62fb 100644
--- a/tests/reader/test_reader_stream.cpp
+++ b/tests/reader/test_reader_stream.cpp
@@ -622,7 +622,7 @@ TEST_CASE("C++ Reader Streaming API - Format identification") {
 
     SUBCASE("Verify metadata access") {
         CHECK(reader->get_archive_path() == gz_file);
-        CHECK(reader->get_idx_path() == idx_file);
+        CHECK(reader->get_index_path() == idx_file);
         CHECK(reader->get_max_bytes() > 0);
     }
 }
diff --git a/tests/reader/test_reader_tar_comprehensive.cpp b/tests/reader/test_reader_tar_comprehensive.cpp
index 2629f86c..4cab7477 100644
--- a/tests/reader/test_reader_tar_comprehensive.cpp
+++ b/tests/reader/test_reader_tar_comprehensive.cpp
@@ -1,5 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/internal/error.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
@@ -17,6 +18,7 @@
 #include "testing_utilities.h"
 
 using namespace dftracer::utils;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dftracer::utils::utilities::reader::internal;
 using namespace dft_utils_test;
@@ -29,6 +31,7 @@ TEST_CASE("TAR.GZ Indexer - Basic functionality") {
     REQUIRE(!tar_gz_file.empty());
 
     std::string idx_file = env.get_index_path(tar_gz_file);
+    std::string db_root = determine_index_path(tar_gz_file, "");
 
     SUBCASE("Build index") {
         auto indexer =
@@ -55,7 +58,7 @@ TEST_CASE("TAR.GZ Indexer - Basic functionality") {
 
         // Test getter methods
         CHECK(indexer->get_archive_path() == tar_gz_file);
-        CHECK(indexer->get_idx_path() == idx_file);
+        CHECK(indexer->get_index_path() == db_root);
 
         // Build index first before accessing metadata
         indexer->build();
@@ -86,6 +89,7 @@ TEST_CASE("TAR.GZ Reader - Basic functionality") {
     REQUIRE(!tar_gz_file.empty());
 
     std::string idx_file = env.get_index_path(tar_gz_file);
+    std::string db_root = determine_index_path(tar_gz_file, "");
 
     // Build index first
     {
@@ -123,7 +127,7 @@ TEST_CASE("TAR.GZ Reader - Basic functionality") {
 
         // Test getter methods
         CHECK(reader->get_archive_path() == tar_gz_file);
-        CHECK(reader->get_idx_path() == idx_file);
+        CHECK(reader->get_index_path() == db_root);
     }
 
     SUBCASE("Read byte range using streaming API") {
diff --git a/tests/testing_utilities.cpp b/tests/testing_utilities.cpp
index ed4a0ab2..0b4665ad 100644
--- a/tests/testing_utilities.cpp
+++ b/tests/testing_utilities.cpp
@@ -2,6 +2,7 @@
 
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/common/logging.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <zlib.h>
 
 #include <cstdint>
@@ -246,7 +247,6 @@ std::string TestEnvironment::create_test_gzip_file_impl() {
 
     // Create test file in the unique directory
     std::string gz_file = test_dir + "/test_data.gz";
-    std::string idx_file = test_dir + "/test_data.gz.idx";
     std::string txt_file = test_dir + "/test_data.txt";
 
     // Write test data to text file
@@ -333,7 +333,8 @@ std::string TestEnvironment::create_test_tar_gzip_file_impl() {
 }
 
 std::string TestEnvironment::get_index_path(const std::string& gz_file) {
-    return gz_file + ".idx";
+    return dftracer::utils::utilities::composites::dft::internal::
+        determine_index_path(gz_file, "");
 }
 
 std::string TestEnvironment::create_dft_test_file(int num_events) {
@@ -465,10 +466,10 @@ char* test_environment_get_index_path(test_environment_handle_t env,
                                       const char* gz_file) {
     if (!env || !gz_file) return nullptr;
     auto* cpp_env = reinterpret_cast<dft_utils_test::TestEnvironment*>(env);
-    std::string idx_path = cpp_env->get_index_path(gz_file);
-    char* result = static_cast<char*>(malloc(idx_path.length() + 1));
+    std::string index_path = cpp_env->get_index_path(gz_file);
+    char* result = static_cast<char*>(malloc(index_path.length() + 1));
     if (result) {
-        strcpy(result, idx_path.c_str());
+        strcpy(result, index_path.c_str());
     }
     return result;
 }
diff --git a/tests/testing_utilities.h b/tests/testing_utilities.h
index 002589ef..35445d88 100644
--- a/tests/testing_utilities.h
+++ b/tests/testing_utilities.h
@@ -93,7 +93,7 @@ char** get_tar_file_list(const char* tar_path, size_t* num_files);
 void free_tar_file_list(char** file_list, size_t num_files);
 
 /**
- * Get index path for a given gzip file
+ * Get the `.dftindex` path for a given gzip file
  * Returns allocated string - caller must free
  */
 char* test_environment_get_index_path(test_environment_handle_t env,
diff --git a/tests/utilities/CMakeLists.txt b/tests/utilities/CMakeLists.txt
index bf2f6a76..1818992b 100644
--- a/tests/utilities/CMakeLists.txt
+++ b/tests/utilities/CMakeLists.txt
@@ -77,6 +77,8 @@ set(UTILITIES_TEST_SOURCES
     composites/dft/comparator/test_is_data_transfer_op.cpp
 
     # Indexer
+    indexer/test_rocksdb_storage.cpp
+    indexer/test_scan_prefix.cpp
     indexer/test_index_database.cpp
     indexer/test_provenance_database.cpp
     indexer/test_index_builder.cpp
diff --git a/tests/utilities/composites/dft/indexing/test_bloom_query.cpp b/tests/utilities/composites/dft/indexing/test_bloom_query.cpp
index b32e91de..c27c9b96 100644
--- a/tests/utilities/composites/dft/indexing/test_bloom_query.cpp
+++ b/tests/utilities/composites/dft/indexing/test_bloom_query.cpp
@@ -2,7 +2,6 @@
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/bloom_query_utility.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
 #include <doctest/doctest.h>
@@ -17,9 +16,9 @@ using dftracer::utils::utilities::indexer::IndexDatabase;
 using dftracer::utils::utilities::indexer::internal::get_logical_path;
 
 // Helper to set up a .idx database with test data
-static void populate_test_idx(const std::string& idx_path,
+static void populate_test_idx(const std::string& index_path,
                               const std::string& file_path) {
-    IndexDatabase idx_db(idx_path);
+    IndexDatabase idx_db(index_path);
     idx_db.init_base_schema();
     idx_db.init_bloom_schema();
 
@@ -44,10 +43,9 @@ static void populate_test_idx(const std::string& idx_path,
         }
 
         auto blob = name_bloom.serialize();
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, static_cast<std::uint64_t>(ckpt), "name",
-            blob.data(), static_cast<int>(blob.size()),
-            name_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(
+            fid, static_cast<std::uint64_t>(ckpt), "name", blob.data(),
+            static_cast<int>(blob.size()), name_bloom.num_entries());
 
         // cat dimension
         BloomFilter cat_bloom(100, 0.01);
@@ -58,10 +56,9 @@ static void populate_test_idx(const std::string& idx_path,
         }
 
         auto cat_blob = cat_bloom.serialize();
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, static_cast<std::uint64_t>(ckpt), "cat",
-            cat_blob.data(), static_cast<int>(cat_blob.size()),
-            cat_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(
+            fid, static_cast<std::uint64_t>(ckpt), "cat", cat_blob.data(),
+            static_cast<int>(cat_blob.size()), cat_bloom.num_entries());
     }
 
     // Create file-level bloom filters (merged from all chunks)
@@ -72,42 +69,40 @@ static void populate_test_idx(const std::string& idx_path,
     file_name_bloom.add("close");
     file_name_bloom.add("stat");
     auto name_blob = file_name_bloom.serialize();
-    queries::insert_file_bloom_filter(
-        idx_db.sql_db(), fid, "name", name_blob.data(),
-        static_cast<int>(name_blob.size()), file_name_bloom.num_entries());
+    idx_db.insert_file_bloom_filter(fid, "name", name_blob.data(),
+                                    static_cast<int>(name_blob.size()),
+                                    file_name_bloom.num_entries());
 
     BloomFilter file_cat_bloom(100, 0.01);
     file_cat_bloom.add("POSIX");
     file_cat_bloom.add("storage");
     auto cat_blob = file_cat_bloom.serialize();
-    queries::insert_file_bloom_filter(
-        idx_db.sql_db(), fid, "cat", cat_blob.data(),
-        static_cast<int>(cat_blob.size()), file_cat_bloom.num_entries());
+    idx_db.insert_file_bloom_filter(fid, "cat", cat_blob.data(),
+                                    static_cast<int>(cat_blob.size()),
+                                    file_cat_bloom.num_entries());
 
     // Add fhash with resolution
     BloomFilter fhash_bloom(100, 0.01);
     fhash_bloom.add("abc123");
     auto fhash_blob = fhash_bloom.serialize();
-    queries::insert_file_bloom_filter(
-        idx_db.sql_db(), fid, "fhash", fhash_blob.data(),
-        static_cast<int>(fhash_blob.size()), fhash_bloom.num_entries());
+    idx_db.insert_file_bloom_filter(fid, "fhash", fhash_blob.data(),
+                                    static_cast<int>(fhash_blob.size()),
+                                    fhash_bloom.num_entries());
 
     for (int ckpt = 0; ckpt < 3; ++ckpt) {
         auto blob = fhash_bloom.serialize();
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, static_cast<std::uint64_t>(ckpt), "fhash",
-            blob.data(), static_cast<int>(blob.size()),
-            fhash_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(
+            fid, static_cast<std::uint64_t>(ckpt), "fhash", blob.data(),
+            static_cast<int>(blob.size()), fhash_bloom.num_entries());
     }
 
     // Hash resolutions
-    queries::insert_hash_resolution(idx_db.sql_db(), fid, "fhash", "abc123",
-                                    "./data/file.h5");
+    idx_db.insert_hash_resolution(fid, "fhash", "abc123", "./data/file.h5");
 
     // Record dimensions
-    queries::insert_index_dimension(idx_db.sql_db(), fid, "name");
-    queries::insert_index_dimension(idx_db.sql_db(), fid, "cat");
-    queries::insert_index_dimension(idx_db.sql_db(), fid, "fhash");
+    idx_db.insert_index_dimension(fid, "name");
+    idx_db.insert_index_dimension(fid, "cat");
+    idx_db.insert_index_dimension(fid, "fhash");
 
     idx_db.commit_transaction();
 }
@@ -118,13 +113,14 @@ TEST_SUITE("BloomQueryUtility") {
             dft_utils_test::make_unique_test_path("test_bloom_query").string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         BloomQueryInput input;
-        input.with_idx_path(idx_path).with_file_path(file_path).with_predicate(
-            "name", {"nonexistent_operation"});
+        input.with_index_path(index_path)
+            .with_file_path(file_path)
+            .with_predicate("name", {"nonexistent_operation"});
 
         BloomQueryUtility query;
         auto output = query.process(input).get();
@@ -142,13 +138,14 @@ TEST_SUITE("BloomQueryUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         BloomQueryInput input;
-        input.with_idx_path(idx_path).with_file_path(file_path).with_predicate(
-            "name", {"read"});
+        input.with_index_path(index_path)
+            .with_file_path(file_path)
+            .with_predicate("name", {"read"});
 
         BloomQueryUtility query;
         auto output = query.process(input).get();
@@ -168,12 +165,12 @@ TEST_SUITE("BloomQueryUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         BloomQueryInput input;
-        input.with_idx_path(idx_path)
+        input.with_index_path(index_path)
             .with_file_path(file_path)
             .with_predicate("name", {"open"})
             .with_predicate("cat", {"storage"});
@@ -196,12 +193,12 @@ TEST_SUITE("BloomQueryUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         BloomQueryInput input;
-        input.with_idx_path(idx_path).with_file_path(file_path);
+        input.with_index_path(index_path).with_file_path(file_path);
 
         BloomQueryUtility query;
         auto output = query.process(input).get();
@@ -218,14 +215,15 @@ TEST_SUITE("BloomQueryUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // Query by resolved value (not hash)
         BloomQueryInput input;
-        input.with_idx_path(idx_path).with_file_path(file_path).with_predicate(
-            "fhash", {"./data/file.h5"});
+        input.with_index_path(index_path)
+            .with_file_path(file_path)
+            .with_predicate("fhash", {"./data/file.h5"});
 
         BloomQueryUtility query;
         auto output = query.process(input).get();
@@ -244,13 +242,14 @@ TEST_SUITE("BloomQueryUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         BloomQueryInput input;
-        input.with_idx_path(idx_path).with_file_path(file_path).with_predicate(
-            "name", {"read", "open"});
+        input.with_index_path(index_path)
+            .with_file_path(file_path)
+            .with_predicate("name", {"read", "open"});
 
         BloomQueryUtility query;
         auto output = query.process(input).get();
diff --git a/tests/utilities/composites/dft/indexing/test_chunk_indexer.cpp b/tests/utilities/composites/dft/indexing/test_chunk_indexer.cpp
index 6b5b565d..fafeb5c5 100644
--- a/tests/utilities/composites/dft/indexing/test_chunk_indexer.cpp
+++ b/tests/utilities/composites/dft/indexing/test_chunk_indexer.cpp
@@ -70,7 +70,7 @@ TEST_SUITE("ChunkIndexerUtility") {
 
         ChunkIndexerInput input;
         input.with_file_path(trace_file)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
@@ -140,7 +140,7 @@ TEST_SUITE("ChunkIndexerUtility") {
 
         ChunkIndexerInput input;
         input.with_file_path(trace_file)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
@@ -197,7 +197,7 @@ TEST_SUITE("ChunkIndexerUtility") {
 
         ChunkIndexerInput input;
         input.with_file_path(gz_path)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
diff --git a/tests/utilities/composites/dft/indexing/test_chunk_pruner.cpp b/tests/utilities/composites/dft/indexing/test_chunk_pruner.cpp
index 493c8652..5a606ad8 100644
--- a/tests/utilities/composites/dft/indexing/test_chunk_pruner.cpp
+++ b/tests/utilities/composites/dft/indexing/test_chunk_pruner.cpp
@@ -3,7 +3,6 @@
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_dimension_stats.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_pruner_utility.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
 #include <doctest/doctest.h>
@@ -14,14 +13,13 @@
 
 using namespace dftracer::utils;
 using namespace dftracer::utils::utilities::composites::dft::indexing;
-using namespace dftracer::utils::utilities::composites::dft::indexing::queries;
 using dftracer::utils::utilities::common::query::Query;
 using dftracer::utils::utilities::indexer::IndexDatabase;
 using dftracer::utils::utilities::indexer::internal::get_logical_path;
 
-static void populate_test_idx(const std::string& idx_path,
+static void populate_test_idx(const std::string& index_path,
                               const std::string& file_path) {
-    IndexDatabase idx_db(idx_path);
+    IndexDatabase idx_db(index_path);
     idx_db.init_base_schema();
     idx_db.init_bloom_schema();
 
@@ -37,25 +35,25 @@ static void populate_test_idx(const std::string& idx_path,
         cat_ds.value_type = "string";
         cat_ds.observe("POSIX");
         cat_ds.observe("POSIX");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 0, cat_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 0, cat_ds);
 
         ChunkDimensionStats name_ds;
         name_ds.dimension = "name";
         name_ds.value_type = "string";
         name_ds.observe("read");
         name_ds.observe("read");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 0, name_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 0, name_ds);
 
         ChunkDimensionStats dur_ds;
         dur_ds.dimension = "dur";
         dur_ds.value_type = "uint";
         dur_ds.observe("100");
         dur_ds.observe("200");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 0, dur_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 0, dur_ds);
 
-        insert_index_dimension(idx_db.sql_db(), fid, "cat");
-        insert_index_dimension(idx_db.sql_db(), fid, "name");
-        insert_index_dimension(idx_db.sql_db(), fid, "dur");
+        idx_db.insert_index_dimension(fid, "cat");
+        idx_db.insert_index_dimension(fid, "name");
+        idx_db.insert_index_dimension(fid, "dur");
     }
 
     // Chunk 1: STDIO writes, dur 500-600
@@ -64,20 +62,20 @@ static void populate_test_idx(const std::string& idx_path,
         cat_ds.dimension = "cat";
         cat_ds.value_type = "string";
         cat_ds.observe("STDIO");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 1, cat_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 1, cat_ds);
 
         ChunkDimensionStats name_ds;
         name_ds.dimension = "name";
         name_ds.value_type = "string";
         name_ds.observe("write");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 1, name_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 1, name_ds);
 
         ChunkDimensionStats dur_ds;
         dur_ds.dimension = "dur";
         dur_ds.value_type = "uint";
         dur_ds.observe("500");
         dur_ds.observe("600");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 1, dur_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 1, dur_ds);
     }
 
     // Chunk 2: POSIX + MPI mixed, dur 50-1000
@@ -87,33 +85,33 @@ static void populate_test_idx(const std::string& idx_path,
         cat_ds.value_type = "string";
         cat_ds.observe("POSIX");
         cat_ds.observe("MPI");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 2, cat_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 2, cat_ds);
 
         ChunkDimensionStats name_ds;
         name_ds.dimension = "name";
         name_ds.value_type = "string";
         name_ds.observe("read");
         name_ds.observe("send");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 2, name_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 2, name_ds);
 
         ChunkDimensionStats dur_ds;
         dur_ds.dimension = "dur";
         dur_ds.value_type = "uint";
         dur_ds.observe("50");
         dur_ds.observe("1000");
-        insert_chunk_dimension_stats(idx_db.sql_db(), fid, 2, dur_ds);
+        idx_db.insert_chunk_dimension_stats(fid, 2, dur_ds);
     }
 
     idx_db.commit_transaction();
 }
 
-static ChunkPrunerOutput run_pruner(const std::string& idx_path,
+static ChunkPrunerOutput run_pruner(const std::string& index_path,
                                     const std::string& file_path,
                                     const char* query_str) {
     auto q = Query::from_string(query_str);
     REQUIRE(q.has_value());
 
-    ChunkPrunerInput input{idx_path, file_path, std::move(*q), nullptr};
+    ChunkPrunerInput input{index_path, file_path, std::move(*q), nullptr};
 
     ChunkPrunerUtility pruner;
     return pruner.process(input).get();
@@ -124,11 +122,11 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_eq").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
-        auto out = run_pruner(idx_path, file_path, R"(cat == "POSIX")");
+        auto out = run_pruner(index_path, file_path, R"(cat == "POSIX")");
         CHECK(out.success);
         CHECK(out.total_checkpoints == 3);
         // Chunks 0 and 2 have POSIX, chunk 1 has only STDIO
@@ -142,11 +140,11 @@ TEST_SUITE("ChunkPrunerUtility") {
             dft_utils_test::make_unique_test_path("test_pruner_eq_none")
                 .string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
-        auto out = run_pruner(idx_path, file_path, R"(cat == "HDF5")");
+        auto out = run_pruner(index_path, file_path, R"(cat == "HDF5")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.empty());
         CHECK_FALSE(out.file_may_match);
@@ -156,12 +154,12 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_in").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         auto out =
-            run_pruner(idx_path, file_path, R"(cat in ["POSIX", "STDIO"])");
+            run_pruner(index_path, file_path, R"(cat in ["POSIX", "STDIO"])");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 3);
     }
@@ -170,14 +168,14 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_notin").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // Chunk 0: only POSIX → excluded by not in ["POSIX"]
         // Chunk 1: only STDIO → kept
         // Chunk 2: POSIX + MPI → MPI not in list → kept
-        auto out = run_pruner(idx_path, file_path, R"(cat not in ["POSIX"])");
+        auto out = run_pruner(index_path, file_path, R"(cat not in ["POSIX"])");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 2);
         CHECK(out.candidate_checkpoints[0] == 1);
@@ -188,14 +186,14 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_and").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // cat == "POSIX" → chunks 0, 2
         // name == "read" → chunks 0, 2
         // AND → chunks 0, 2
-        auto out = run_pruner(idx_path, file_path,
+        auto out = run_pruner(index_path, file_path,
                               R"(cat == "POSIX" and name == "read")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 2);
@@ -205,14 +203,14 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_and2").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // cat == "POSIX" → chunks 0, 2
         // name == "send" → chunk 2 only
         // AND → chunk 2
-        auto out = run_pruner(idx_path, file_path,
+        auto out = run_pruner(index_path, file_path,
                               R"(cat == "POSIX" and name == "send")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 1);
@@ -223,14 +221,14 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_or").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // cat == "STDIO" → chunk 1
         // name == "send" → chunk 2
         // OR → chunks 1, 2
-        auto out = run_pruner(idx_path, file_path,
+        auto out = run_pruner(index_path, file_path,
                               R"(cat == "STDIO" or name == "send")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 2);
@@ -242,13 +240,13 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_not").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // cat == "STDIO" → chunk 1
         // NOT → chunks 0, 2
-        auto out = run_pruner(idx_path, file_path, R"(not cat == "STDIO")");
+        auto out = run_pruner(index_path, file_path, R"(not cat == "STDIO")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 2);
         CHECK(out.candidate_checkpoints[0] == 0);
@@ -259,13 +257,13 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_range").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         // dur > "500": chunk 0 max=200 (skip), chunk 1 max=600 (keep),
         // chunk 2 max=1000 (keep)
-        auto out = run_pruner(idx_path, file_path, R"(dur > "500")");
+        auto out = run_pruner(index_path, file_path, R"(dur > "500")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 2);
         CHECK(out.candidate_checkpoints[0] == 1);
@@ -276,11 +274,11 @@ TEST_SUITE("ChunkPrunerUtility") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_pruner_case").string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
-        auto out = run_pruner(idx_path, file_path,
+        auto out = run_pruner(index_path, file_path,
                               R"(cat == "POSIX" AND name == "send")");
         CHECK(out.success);
         CHECK(out.candidate_checkpoints.size() == 1);
diff --git a/tests/utilities/composites/dft/indexing/test_manifest_index_builder.cpp b/tests/utilities/composites/dft/indexing/test_manifest_index_builder.cpp
index d6354c82..3761d6a2 100644
--- a/tests/utilities/composites/dft/indexing/test_manifest_index_builder.cpp
+++ b/tests/utilities/composites/dft/indexing/test_manifest_index_builder.cpp
@@ -94,15 +94,15 @@ TEST_SUITE("ManifestIndexBuilder") {
         CHECK(result.success == true);
         CHECK(result.total_lines > 0);
 
-        CHECK(fs::exists(result.idx_path));
+        CHECK(fs::exists(result.index_path));
 
-        IndexDatabase idx_db(result.idx_path);
+        IndexDatabase idx_db(result.index_path);
         idx_db.init_base_schema();
         idx_db.init_manifest_schema();
         int fid = idx_db.get_file_info_id(get_logical_path(trace_file));
         REQUIRE(fid >= 0);
 
-        auto event_ranges = queries::query_event_ranges(idx_db.sql_db(), fid);
+        auto event_ranges = idx_db.query_event_ranges(fid);
         CHECK(event_ranges.size() == 3);
 
         bool found_posix_read = false;
@@ -117,7 +117,7 @@ TEST_SUITE("ManifestIndexBuilder") {
         }
         CHECK(found_posix_read);
 
-        auto metadata = queries::query_metadata_lines(idx_db.sql_db(), fid);
+        auto metadata = idx_db.query_metadata_lines(fid);
         CHECK(metadata.size() == 2);
 
         fs::remove_all(test_dir);
diff --git a/tests/utilities/composites/dft/indexing/test_manifest_indexer.cpp b/tests/utilities/composites/dft/indexing/test_manifest_indexer.cpp
index a2d01bc3..84d6e68a 100644
--- a/tests/utilities/composites/dft/indexing/test_manifest_indexer.cpp
+++ b/tests/utilities/composites/dft/indexing/test_manifest_indexer.cpp
@@ -68,7 +68,7 @@ TEST_SUITE("ManifestIndexer") {
 
         ChunkIndexerInput input;
         input.with_file_path(trace_file)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
@@ -133,7 +133,7 @@ TEST_SUITE("ManifestIndexer") {
 
         ChunkIndexerInput input;
         input.with_file_path(trace_file)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
@@ -191,7 +191,7 @@ TEST_SUITE("ManifestIndexer") {
 
         ChunkIndexerInput input;
         input.with_file_path(trace_file)
-            .with_idx_path("")
+            .with_index_path("")
             .with_checkpoint_size(uncompressed_size)
             .with_checkpoint_idx(0)
             .with_byte_range(0, uncompressed_size)
diff --git a/tests/utilities/composites/dft/indexing/test_manifest_queries.cpp b/tests/utilities/composites/dft/indexing/test_manifest_queries.cpp
index e7ac88f3..b7450e18 100644
--- a/tests/utilities/composites/dft/indexing/test_manifest_queries.cpp
+++ b/tests/utilities/composites/dft/indexing/test_manifest_queries.cpp
@@ -10,7 +10,8 @@
 #include "testing_utilities.h"
 
 using namespace dftracer::utils;
-using namespace dftracer::utils::utilities::composites::dft::indexing;
+namespace queries =
+    dftracer::utils::utilities::composites::dft::indexing::queries;
 using dftracer::utils::utilities::indexer::IndexDatabase;
 using dftracer::utils::utilities::indexer::internal::get_logical_path;
 
@@ -29,9 +30,9 @@ TEST_SUITE("ManifestQueries") {
             dft_utils_test::make_unique_test_path("test_manifest_queries")
                 .string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
 
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(index_path);
         idx_db.init_base_schema();
         idx_db.init_manifest_schema();
         int fid =
@@ -39,26 +40,20 @@ TEST_SUITE("ManifestQueries") {
 
         idx_db.begin_transaction();
 
-        queries::insert_event_range(idx_db.sql_db(), fid, 0, "POSIX", "read",
-                                    {0, 2, 5});
-        queries::insert_event_range(idx_db.sql_db(), fid, 0, "POSIX", "write",
-                                    {1});
-        queries::insert_event_range(idx_db.sql_db(), fid, 0, "APP", "compute",
-                                    {3, 4});
-        queries::insert_event_range(idx_db.sql_db(), fid, 1, "POSIX", "read",
-                                    {0, 1});
+        idx_db.insert_event_range(fid, 0, "POSIX", "read", {0, 2, 5});
+        idx_db.insert_event_range(fid, 0, "POSIX", "write", {1});
+        idx_db.insert_event_range(fid, 0, "APP", "compute", {3, 4});
+        idx_db.insert_event_range(fid, 1, "POSIX", "read", {0, 1});
 
         idx_db.commit_transaction();
 
-        auto all = queries::query_event_ranges(idx_db.sql_db(), fid);
+        auto all = idx_db.query_event_ranges(fid);
         CHECK(all.size() == 4);
 
-        auto ckpt0 =
-            queries::query_event_ranges_for_checkpoint(idx_db.sql_db(), fid, 0);
+        auto ckpt0 = idx_db.query_event_ranges_for_checkpoint(fid, 0);
         CHECK(ckpt0.size() == 3);
 
-        auto ckpt1 =
-            queries::query_event_ranges_for_checkpoint(idx_db.sql_db(), fid, 1);
+        auto ckpt1 = idx_db.query_event_ranges_for_checkpoint(fid, 1);
         CHECK(ckpt1.size() == 1);
         CHECK(ckpt1[0].cat == "POSIX");
         CHECK(ckpt1[0].name == "read");
@@ -73,9 +68,9 @@ TEST_SUITE("ManifestQueries") {
             dft_utils_test::make_unique_test_path("test_manifest_meta_q")
                 .string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
 
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(index_path);
         idx_db.init_base_schema();
         idx_db.init_manifest_schema();
         int fid =
@@ -83,21 +78,19 @@ TEST_SUITE("ManifestQueries") {
 
         idx_db.begin_transaction();
 
-        queries::insert_metadata_lines(idx_db.sql_db(), fid, 0, "HH", {0, 3});
-        queries::insert_metadata_lines(idx_db.sql_db(), fid, 0, "FH", {1});
-        queries::insert_metadata_lines(idx_db.sql_db(), fid, 1, "HH", {0});
+        idx_db.insert_metadata_lines(fid, 0, "HH", {0, 3});
+        idx_db.insert_metadata_lines(fid, 0, "FH", {1});
+        idx_db.insert_metadata_lines(fid, 1, "HH", {0});
 
         idx_db.commit_transaction();
 
-        auto all = queries::query_metadata_lines(idx_db.sql_db(), fid);
+        auto all = idx_db.query_metadata_lines(fid);
         CHECK(all.size() == 3);
 
-        auto ckpt0 = queries::query_metadata_lines_for_checkpoint(
-            idx_db.sql_db(), fid, 0);
+        auto ckpt0 = idx_db.query_metadata_lines_for_checkpoint(fid, 0);
         CHECK(ckpt0.size() == 2);
 
-        auto ckpt1 = queries::query_metadata_lines_for_checkpoint(
-            idx_db.sql_db(), fid, 1);
+        auto ckpt1 = idx_db.query_metadata_lines_for_checkpoint(fid, 1);
         CHECK(ckpt1.size() == 1);
 
         fs::remove_all(test_dir);
@@ -108,28 +101,27 @@ TEST_SUITE("ManifestQueries") {
             dft_utils_test::make_unique_test_path("test_manifest_delete")
                 .string();
         fs::create_directories(test_dir);
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
 
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(index_path);
         idx_db.init_base_schema();
         idx_db.init_manifest_schema();
         int fid =
             idx_db.get_or_create_file_info(get_logical_path("test.pfw.gz"), 0);
 
         idx_db.begin_transaction();
-        queries::insert_event_range(idx_db.sql_db(), fid, 0, "POSIX", "read",
-                                    {0, 1});
-        queries::insert_metadata_lines(idx_db.sql_db(), fid, 0, "HH", {2});
+        idx_db.insert_event_range(fid, 0, "POSIX", "read", {0, 1});
+        idx_db.insert_metadata_lines(fid, 0, "HH", {2});
         idx_db.commit_transaction();
 
-        CHECK(queries::query_event_ranges(idx_db.sql_db(), fid).size() == 1);
-        CHECK(queries::query_metadata_lines(idx_db.sql_db(), fid).size() == 1);
+        CHECK(idx_db.query_event_ranges(fid).size() == 1);
+        CHECK(idx_db.query_metadata_lines(fid).size() == 1);
 
-        queries::delete_event_ranges(idx_db.sql_db(), fid);
-        CHECK(queries::query_event_ranges(idx_db.sql_db(), fid).empty());
+        idx_db.delete_event_ranges(fid);
+        CHECK(idx_db.query_event_ranges(fid).empty());
 
-        queries::delete_metadata_lines(idx_db.sql_db(), fid);
-        CHECK(queries::query_metadata_lines(idx_db.sql_db(), fid).empty());
+        idx_db.delete_metadata_lines(fid);
+        CHECK(idx_db.query_metadata_lines(fid).empty());
 
         fs::remove_all(test_dir);
     }
diff --git a/tests/utilities/composites/dft/reorganize/test_reconstruct_integration.cpp b/tests/utilities/composites/dft/reorganize/test_reconstruct_integration.cpp
index b740ee0c..f4efdc68 100644
--- a/tests/utilities/composites/dft/reorganize/test_reconstruct_integration.cpp
+++ b/tests/utilities/composites/dft/reorganize/test_reconstruct_integration.cpp
@@ -167,10 +167,10 @@ static void execute_extraction(const ExtractionPlan& plan,
             }
         }
 
-        std::string idx_path =
+        std::string index_path =
             internal::determine_index_path(src.file_path, index_dir);
         auto reader_input =
-            IndexedReadInput::from_file(src.file_path).with_index(idx_path);
+            IndexedReadInput::from_file(src.file_path).with_index(index_path);
         IndexedFileReaderUtility reader_utility;
         auto reader = reader_utility.process(reader_input).get();
 
@@ -243,6 +243,59 @@ static const SegmentInterval* find_segment(
     return nullptr;
 }
 
+static void write_group_provenance(
+    const ExtractionPlan& plan,
+    const std::map<std::string, std::string>& group_gz_paths,
+    const std::string& reorg_dir) {
+    for (const auto& g : plan.groups) {
+        auto gz_it = group_gz_paths.find(g.name);
+        if (gz_it == group_gz_paths.end()) continue;
+        const std::string& gz_path = gz_it->second;
+
+        std::string db_root =
+            internal::determine_provenance_index_path(gz_path, reorg_dir);
+
+        ProvenanceDatabase pdb(db_root);
+        pdb.init_schema();
+        int fid = pdb.get_or_create_file_info(gz_path, 0);
+        REQUIRE(fid >= 0);
+
+        pdb.begin_transaction();
+
+        pdb.insert_info(fid, "version", "1.0");
+        pdb.insert_info(fid, "tool", "dftracer_organize");
+        pdb.insert_group(fid, g.name, g.query);
+
+        for (std::size_t si = 0; si < plan.source_files.size(); ++si) {
+            const auto& src = plan.source_files[si];
+            pdb.insert_source(fid, static_cast<int>(si), src.file_path,
+                              static_cast<int>(src.num_checkpoints), "");
+        }
+
+        std::map<std::size_t, std::map<std::uint64_t, std::size_t>>
+            segment_events;
+        for (const auto& task : plan.tasks) {
+            if (task.target_group == g.name) {
+                segment_events[task.source_file_idx][task.checkpoint_idx] =
+                    task.line_numbers.size();
+            }
+        }
+
+        int output_line = 0;
+        for (const auto& [src_idx, ckpts] : segment_events) {
+            for (const auto& [ckpt, count] : ckpts) {
+                pdb.insert_segment(fid, static_cast<int>(src_idx),
+                                   static_cast<int>(ckpt), output_line,
+                                   output_line + static_cast<int>(count),
+                                   static_cast<int>(count));
+                output_line += static_cast<int>(count);
+            }
+        }
+
+        pdb.commit_transaction();
+    }
+}
+
 TEST_SUITE("ReconstructIntegration") {
     TEST_CASE("Round-trip: reorganize then reconstruct") {
         std::string test_dir =
@@ -310,54 +363,8 @@ TEST_SUITE("ReconstructIntegration") {
             build_idx(gz_path, reorg_dir);
         }
 
-        // Step 6: Write provenance into each output .pidx
-        for (const auto& g : plan.groups) {
-            auto gz_it = group_gz_paths.find(g.name);
-            if (gz_it == group_gz_paths.end()) continue;
-            const std::string& gz_path = gz_it->second;
-
-            std::string pidx_path =
-                internal::determine_provenance_index_path(gz_path, reorg_dir);
-
-            ProvenanceDatabase pdb(pidx_path);
-            pdb.init_schema();
-            int fid = pdb.get_or_create_file_info(gz_path, 0);
-            REQUIRE(fid >= 0);
-
-            pdb.begin_transaction();
-
-            pdb.insert_info("version", "1.0");
-            pdb.insert_info("tool", "dftracer_organize");
-            pdb.insert_group(g.name, g.query);
-
-            for (std::size_t si = 0; si < plan.source_files.size(); ++si) {
-                const auto& src = plan.source_files[si];
-                pdb.insert_source(fid, static_cast<int>(si), src.file_path,
-                                  static_cast<int>(src.num_checkpoints), "");
-            }
-
-            std::map<std::size_t, std::map<std::uint64_t, std::size_t>>
-                segment_events;
-            for (const auto& task : plan.tasks) {
-                if (task.target_group == g.name) {
-                    segment_events[task.source_file_idx][task.checkpoint_idx] =
-                        task.line_numbers.size();
-                }
-            }
-
-            int output_line = 0;
-            for (const auto& [src_idx, ckpts] : segment_events) {
-                for (const auto& [ckpt, count] : ckpts) {
-                    pdb.insert_segment(static_cast<int>(src_idx),
-                                       static_cast<int>(ckpt), output_line,
-                                       output_line + static_cast<int>(count),
-                                       static_cast<int>(count));
-                    output_line += static_cast<int>(count);
-                }
-            }
-
-            pdb.commit_transaction();
-        }
+        // Step 6: Write provenance into the shared output-root .dftindex
+        write_group_provenance(plan, group_gz_paths, reorg_dir);
 
         // Step 7: Plan reconstruction
         std::vector<std::string> reorg_files;
@@ -403,18 +410,18 @@ TEST_SUITE("ReconstructIntegration") {
         }
 
         for (const auto& [reorg_file, intervals] : per_reorg_segments) {
-            std::string idx_path =
+            std::string index_path =
                 internal::determine_index_path(reorg_file, reorg_dir);
 
             MetadataCollectorUtility meta_collector;
             auto meta_input =
                 MetadataCollectorUtilityInput::from_file(reorg_file)
-                    .with_index(idx_path);
+                    .with_index(index_path);
             auto meta = meta_collector.process(meta_input).get();
             REQUIRE(meta.success);
 
             auto reader_input =
-                IndexedReadInput::from_file(reorg_file).with_index(idx_path);
+                IndexedReadInput::from_file(reorg_file).with_index(index_path);
             IndexedFileReaderUtility reader_utility;
             auto reader = reader_utility.process(reader_input).get();
 
@@ -496,4 +503,97 @@ TEST_SUITE("ReconstructIntegration") {
 
         fs::remove_all(test_dir);
     }
+
+    TEST_CASE(
+        "reconstruction planner reads multiple outputs from one shared "
+        ".dftindex") {
+        std::string test_dir =
+            dft_utils_test::make_unique_test_path("test_recon_shared_root")
+                .string();
+        std::string input_dir = test_dir + "/input";
+        std::string reorg_dir = test_dir + "/reorg";
+        fs::create_directories(input_dir);
+        fs::create_directories(reorg_dir);
+
+        std::string trace_file = create_test_trace(input_dir);
+        build_idx(trace_file, input_dir);
+
+        ReorganizationPlannerUtility planner;
+        ReorganizationPlannerInput planner_input;
+        planner_input.source_files = {trace_file};
+        planner_input.groups = {{"io", R"(cat == "POSIX")"},
+                                {"compute", R"(cat == "APP")"}};
+        planner_input.index_dir = input_dir;
+
+        auto plan = planner.process(planner_input).get();
+        REQUIRE(plan.tasks.size() > 0);
+
+        std::map<std::string, FILE*> group_files;
+        std::map<std::string, std::string> group_pfw_paths;
+        for (const auto& g : plan.groups) {
+            std::string pfw_path = reorg_dir + "/" + g.name + ".pfw";
+            FILE* f = std::fopen(pfw_path.c_str(), "w");
+            REQUIRE(f != nullptr);
+            group_files[g.name] = f;
+            group_pfw_paths[g.name] = pfw_path;
+        }
+
+        execute_extraction(plan, input_dir, group_files);
+
+        for (auto& [_, f] : group_files) {
+            std::fclose(f);
+        }
+
+        std::map<std::string, std::string> group_gz_paths;
+        for (const auto& g : plan.groups) {
+            std::string pfw_path = group_pfw_paths[g.name];
+            if (!fs::exists(pfw_path) || fs::file_size(pfw_path) == 0) continue;
+
+            FileCompressorUtility compressor;
+            auto comp_result =
+                compressor
+                    .process(FileCompressionUtilityInput::from_file(pfw_path))
+                    .get();
+            REQUIRE(comp_result.success);
+
+            std::string gz_path = pfw_path + ".gz";
+            REQUIRE(fs::exists(gz_path));
+            group_gz_paths[g.name] = gz_path;
+            fs::remove(pfw_path);
+        }
+
+        for (const auto& [_, gz_path] : group_gz_paths) {
+            build_idx(gz_path, reorg_dir);
+        }
+
+        write_group_provenance(plan, group_gz_paths, reorg_dir);
+
+        const std::string shared_root =
+            determine_provenance_index_path(trace_file, reorg_dir);
+        REQUIRE(fs::exists(shared_root));
+
+        ProvenanceDatabase pdb(shared_root);
+        const int io_fid = pdb.get_file_info_id(group_gz_paths.at("io"));
+        const int compute_fid =
+            pdb.get_file_info_id(group_gz_paths.at("compute"));
+        REQUIRE(io_fid >= 0);
+        REQUIRE(compute_fid >= 0);
+        CHECK(io_fid != compute_fid);
+        CHECK(pdb.query_group_name(io_fid) == "io");
+        CHECK(pdb.query_group_name(compute_fid) == "compute");
+
+        ReconstructionPlannerUtility recon_planner;
+        ReconstructionPlannerInput recon_input;
+        for (const auto& [_, gz_path] : group_gz_paths) {
+            recon_input.reorganized_files.push_back(gz_path);
+        }
+        recon_input.index_dir = reorg_dir;
+
+        auto recon_plan = recon_planner.process(recon_input).get();
+        REQUIRE(recon_plan.files.size() == 1);
+        CHECK(recon_plan.total_segments >= 2);
+        CHECK(recon_plan.total_events == 8);
+
+        fs::remove_all(test_dir);
+    }
 }
diff --git a/tests/utilities/composites/dft/reorganize/test_reconstruction_planner.cpp b/tests/utilities/composites/dft/reorganize/test_reconstruction_planner.cpp
index e5e42a23..d0470119 100644
--- a/tests/utilities/composites/dft/reorganize/test_reconstruction_planner.cpp
+++ b/tests/utilities/composites/dft/reorganize/test_reconstruction_planner.cpp
@@ -43,28 +43,29 @@ TEST_SUITE("ReconstructionPlanner") {
         }
 
         // Create .pidx sidecar with provenance
-        std::string pidx_path = determine_provenance_index_path(reorg_file, "");
+        std::string provenance_path =
+            determine_provenance_index_path(reorg_file, "");
         {
-            ProvenanceDatabase pdb(pidx_path);
+            ProvenanceDatabase pdb(provenance_path);
             pdb.init_schema();
             int fid = pdb.get_or_create_file_info(reorg_file, 0);
 
             pdb.begin_transaction();
 
             // Provenance info
-            pdb.insert_info("version", "1.0");
-            pdb.insert_info("tool", "dftracer_organize");
+            pdb.insert_info(fid, "version", "1.0");
+            pdb.insert_info(fid, "tool", "dftracer_organize");
 
             // Provenance group
-            pdb.insert_group("io", "cat=POSIX");
+            pdb.insert_group(fid, "io", "cat=POSIX");
 
             // Provenance source
             pdb.insert_source(fid, 0, "/original/trace.pfw.gz", 3, "abc123");
 
             // Provenance segments (3 checkpoints)
-            pdb.insert_segment(0, 0, 0, 100, 100);
-            pdb.insert_segment(0, 1, 100, 250, 150);
-            pdb.insert_segment(0, 2, 250, 400, 150);
+            pdb.insert_segment(fid, 0, 0, 0, 100, 100);
+            pdb.insert_segment(fid, 0, 1, 100, 250, 150);
+            pdb.insert_segment(fid, 0, 2, 250, 400, 150);
 
             pdb.commit_transaction();
         }
@@ -127,42 +128,42 @@ TEST_SUITE("ReconstructionPlanner") {
 
         // Create .pidx for io.pfw.gz
         {
-            std::string pidx_path =
+            std::string provenance_path =
                 determine_provenance_index_path(io_file, "");
-            ProvenanceDatabase pdb(pidx_path);
+            ProvenanceDatabase pdb(provenance_path);
             pdb.init_schema();
             int fid = pdb.get_or_create_file_info(io_file, 0);
 
             pdb.begin_transaction();
-            pdb.insert_info("version", "1.0");
-            pdb.insert_info("tool", "dftracer_organize");
-            pdb.insert_group("io", "cat=POSIX");
+            pdb.insert_info(fid, "version", "1.0");
+            pdb.insert_info(fid, "tool", "dftracer_organize");
+            pdb.insert_group(fid, "io", "cat=POSIX");
             pdb.insert_source(fid, 0, "/original/trace.pfw.gz", 2, "hash1");
 
             // Segments for checkpoints 0 and 1
-            pdb.insert_segment(0, 0, 0, 50, 50);
-            pdb.insert_segment(0, 1, 50, 120, 70);
+            pdb.insert_segment(fid, 0, 0, 0, 50, 50);
+            pdb.insert_segment(fid, 0, 1, 50, 120, 70);
 
             pdb.commit_transaction();
         }
 
         // Create .pidx for compute.pfw.gz
         {
-            std::string pidx_path =
+            std::string provenance_path =
                 determine_provenance_index_path(compute_file, "");
-            ProvenanceDatabase pdb(pidx_path);
+            ProvenanceDatabase pdb(provenance_path);
             pdb.init_schema();
             int fid = pdb.get_or_create_file_info(compute_file, 0);
 
             pdb.begin_transaction();
-            pdb.insert_info("version", "1.0");
-            pdb.insert_info("tool", "dftracer_organize");
-            pdb.insert_group("compute", "cat=APP");
+            pdb.insert_info(fid, "version", "1.0");
+            pdb.insert_info(fid, "tool", "dftracer_organize");
+            pdb.insert_group(fid, "compute", "cat=APP");
             pdb.insert_source(fid, 0, "/original/trace.pfw.gz", 2, "hash1");
 
             // Segments for checkpoints 0 and 1
-            pdb.insert_segment(0, 0, 0, 30, 30);
-            pdb.insert_segment(0, 1, 30, 80, 50);
+            pdb.insert_segment(fid, 0, 0, 0, 30, 30);
+            pdb.insert_segment(fid, 0, 1, 30, 80, 50);
 
             pdb.commit_transaction();
         }
@@ -207,9 +208,10 @@ TEST_SUITE("ReconstructionPlanner") {
         }
 
         // Create .pidx with NO provenance tables
-        std::string pidx_path = determine_provenance_index_path(reorg_file, "");
+        std::string provenance_path =
+            determine_provenance_index_path(reorg_file, "");
         {
-            ProvenanceDatabase pdb(pidx_path);
+            ProvenanceDatabase pdb(provenance_path);
             pdb.init_schema();
             pdb.get_or_create_file_info(reorg_file, 0);
             // No provenance data inserted
diff --git a/tests/utilities/composites/dft/reorganize/test_reorganization_planner.cpp b/tests/utilities/composites/dft/reorganize/test_reorganization_planner.cpp
index dfbd7082..d566ffa6 100644
--- a/tests/utilities/composites/dft/reorganize/test_reorganization_planner.cpp
+++ b/tests/utilities/composites/dft/reorganize/test_reorganization_planner.cpp
@@ -265,26 +265,26 @@ TEST_SUITE("ReorganizationPlanner") {
         std::string test_dir =
             dft_utils_test::make_unique_test_path("test_planner_prov").string();
         fs::create_directories(test_dir);
-        std::string pidx_path = test_dir + "/test_prov.pfw.gz.pidx";
+        std::string provenance_path = test_dir + "/test_prov.pfw.gz.pidx";
 
-        ProvenanceDatabase pdb(pidx_path);
+        ProvenanceDatabase pdb(provenance_path);
         pdb.init_schema();
         int fid = pdb.get_or_create_file_info("test.pfw.gz", 0);
 
         pdb.begin_transaction();
 
-        pdb.insert_info("version", "1.0");
-        pdb.insert_info("created_at", "2026-02-17");
+        pdb.insert_info(fid, "version", "1.0");
+        pdb.insert_info(fid, "created_at", "2026-02-17");
         pdb.insert_source(fid, 0, "/data/trace.pfw.gz", 9, "abc123");
-        pdb.insert_group("io", R"(cat == "POSIX")");
-        pdb.insert_segment(0, 0, 0, 100, 50);
-        pdb.insert_segment(0, 1, 100, 200, 45);
+        pdb.insert_group(fid, "io", R"(cat == "POSIX")");
+        pdb.insert_segment(fid, 0, 0, 0, 100, 50);
+        pdb.insert_segment(fid, 0, 1, 100, 200, 45);
 
         pdb.commit_transaction();
 
-        CHECK(pdb.query_info("version") == "1.0");
-        CHECK(pdb.query_info("created_at") == "2026-02-17");
-        CHECK(pdb.query_info("nonexistent").empty());
+        CHECK(pdb.query_info(fid, "version") == "1.0");
+        CHECK(pdb.query_info(fid, "created_at") == "2026-02-17");
+        CHECK(pdb.query_info(fid, "nonexistent").empty());
 
         auto sources = pdb.query_sources(fid);
         REQUIRE(sources.size() == 1);
@@ -293,7 +293,7 @@ TEST_SUITE("ReorganizationPlanner") {
         CHECK(sources[0].num_checkpoints == 9);
         CHECK(sources[0].event_hash == "abc123");
 
-        auto segments = pdb.query_segments(0);
+        auto segments = pdb.query_segments(fid, 0);
         REQUIRE(segments.size() == 2);
         CHECK(segments[0].source_checkpoint == 0);
         CHECK(segments[0].output_line_start == 0);
@@ -301,8 +301,8 @@ TEST_SUITE("ReorganizationPlanner") {
         CHECK(segments[0].event_count == 50);
         CHECK(segments[1].source_checkpoint == 1);
 
-        CHECK(pdb.query_group_name() == "io");
-        CHECK(pdb.query_group_predicate() == R"(cat == "POSIX")");
+        CHECK(pdb.query_group_name(fid) == "io");
+        CHECK(pdb.query_group_predicate(fid) == R"(cat == "POSIX")");
 
         fs::remove_all(test_dir);
     }
diff --git a/tests/utilities/composites/dft/reorganize/test_reorganize_integration.cpp b/tests/utilities/composites/dft/reorganize/test_reorganize_integration.cpp
index a65262d4..29c8e6ee 100644
--- a/tests/utilities/composites/dft/reorganize/test_reorganize_integration.cpp
+++ b/tests/utilities/composites/dft/reorganize/test_reorganize_integration.cpp
@@ -164,10 +164,10 @@ static void execute_extraction(const ExtractionPlan& plan,
             }
         }
 
-        std::string idx_path =
+        std::string index_path =
             internal::determine_index_path(src.file_path, index_dir);
         auto reader_input =
-            IndexedReadInput::from_file(src.file_path).with_index(idx_path);
+            IndexedReadInput::from_file(src.file_path).with_index(index_path);
         IndexedFileReaderUtility reader_utility;
         auto reader = reader_utility.process(reader_input).get();
 
@@ -405,11 +405,11 @@ TEST_SUITE("ReorganizeIntegration") {
             REQUIRE(fid >= 0);
 
             pdb.begin_transaction();
-            pdb.insert_info("version", "1.0");
-            pdb.insert_info("tool", "dftracer_organize");
-            pdb.insert_group("io", R"(cat == "POSIX")");
+            pdb.insert_info(fid, "version", "1.0");
+            pdb.insert_info(fid, "tool", "dftracer_organize");
+            pdb.insert_group(fid, "io", R"(cat == "POSIX")");
             pdb.insert_source(fid, 0, trace_file, 1, "");
-            pdb.insert_segment(0, 0, 0, 5, 3);
+            pdb.insert_segment(fid, 0, 0, 0, 5, 3);
             pdb.commit_transaction();
         }
 
@@ -420,16 +420,16 @@ TEST_SUITE("ReorganizeIntegration") {
             int fid = pdb.get_file_info_id(io_gz);
             REQUIRE(fid >= 0);
 
-            CHECK(pdb.query_info("version") == "1.0");
-            CHECK(pdb.query_info("tool") == "dftracer_organize");
-            CHECK(pdb.query_group_name() == "io");
-            CHECK(pdb.query_group_predicate() == R"(cat == "POSIX")");
+            CHECK(pdb.query_info(fid, "version") == "1.0");
+            CHECK(pdb.query_info(fid, "tool") == "dftracer_organize");
+            CHECK(pdb.query_group_name(fid) == "io");
+            CHECK(pdb.query_group_predicate(fid) == R"(cat == "POSIX")");
 
             auto sources = pdb.query_sources(fid);
             REQUIRE(sources.size() == 1);
             CHECK(sources[0].path == trace_file);
 
-            auto segments = pdb.query_segments(0);
+            auto segments = pdb.query_segments(fid, 0);
             REQUIRE(segments.size() == 1);
             CHECK(segments[0].output_line_start == 0);
             CHECK(segments[0].output_line_end == 5);
diff --git a/tests/utilities/composites/dft/statistics/test_statistics_aggregator.cpp b/tests/utilities/composites/dft/statistics/test_statistics_aggregator.cpp
index 7e6242c0..205c85fd 100644
--- a/tests/utilities/composites/dft/statistics/test_statistics_aggregator.cpp
+++ b/tests/utilities/composites/dft/statistics/test_statistics_aggregator.cpp
@@ -1,6 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/statistics/statistics_aggregator_utility.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <dftracer/utils/utilities/indexer/internal/helpers.h>
@@ -12,8 +12,8 @@
 #include "testing_utilities.h"
 
 using namespace dftracer::utils;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dftracer::utils::utilities::composites::dft::indexing;
-using namespace dftracer::utils::utilities::composites::dft::indexing::queries;
 using namespace dftracer::utils::utilities::composites::dft::statistics;
 using dftracer::utils::utilities::indexer::IndexDatabase;
 using dftracer::utils::utilities::indexer::internal::get_logical_path;
@@ -22,7 +22,7 @@ static void write_chunk(
     IndexDatabase& db, int fid, std::uint64_t checkpoint_idx,
     ChunkStatistics& stats,
     const std::vector<std::pair<std::string, std::string>>& dim_values) {
-    queries::insert_chunk_statistics(db.sql_db(), fid, checkpoint_idx, stats);
+    db.insert_chunk_statistics(fid, checkpoint_idx, stats);
 
     std::unordered_map<std::string, ChunkDimensionStats> dim_stats;
     for (const auto& [dim, val] : dim_values) {
@@ -32,14 +32,13 @@ static void write_chunk(
         ds.observe(val);
     }
     for (const auto& [dim, ds] : dim_stats) {
-        queries::insert_chunk_dimension_stats(db.sql_db(), fid, checkpoint_idx,
-                                              ds);
+        db.insert_chunk_dimension_stats(fid, checkpoint_idx, ds);
     }
 }
 
-static void populate_test_idx(const std::string& idx_path,
-                              const std::string& file_path) {
-    IndexDatabase idx_db(idx_path);
+static void populate_test_db(const std::string& db_root,
+                             const std::string& file_path) {
+    IndexDatabase idx_db(db_root);
     idx_db.init_base_schema();
     idx_db.init_bloom_schema();
 
@@ -93,14 +92,15 @@ TEST_SUITE("StatisticsAggregatorUtility") {
             dft_utils_test::make_unique_test_path("test_stats_agg").string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string db_root =
+            determine_index_path(test_dir + "/test.pfw.gz", "");
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_db(db_root, file_path);
 
         StatisticsAggregatorUtility aggregator;
         StatisticsAggregatorInput input;
         input.file_path = file_path;
-        input.idx_path = idx_path;
+        input.index_path = db_root;
 
         auto result = aggregator.process(input).get();
 
@@ -135,9 +135,9 @@ TEST_SUITE("StatisticsAggregatorUtility") {
         StatisticsAggregatorUtility aggregator;
         StatisticsAggregatorInput input;
         input.file_path = "/fake/nonexistent.pfw.gz";
-        input.idx_path =
-            dft_utils_test::make_unique_test_path("nonexistent").string() +
-            ".idx";
+        input.index_path =
+            (dft_utils_test::make_unique_test_path("nonexistent") / ".dftindex")
+                .string();
 
         auto result = aggregator.process(input).get();
 
@@ -151,14 +151,15 @@ TEST_SUITE("StatisticsAggregatorUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string db_root =
+            determine_index_path(test_dir + "/test.pfw.gz", "");
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_db(db_root, file_path);
 
         StatisticsAggregatorUtility aggregator;
         StatisticsAggregatorInput input;
         input.file_path = "/fake/other_file.pfw.gz";
-        input.idx_path = idx_path;
+        input.index_path = db_root;
 
         auto result = aggregator.process(input).get();
 
@@ -174,11 +175,12 @@ TEST_SUITE("StatisticsAggregatorUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string db_root =
+            determine_index_path(test_dir + "/test.pfw.gz", "");
         std::string file_path = "/fake/test.pfw.gz";
 
         // Create idx with file_info but no chunk_statistics
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(db_root);
         idx_db.init_base_schema();
         idx_db.init_bloom_schema();
         idx_db.get_or_create_file_info(get_logical_path(file_path), 12345);
@@ -186,7 +188,7 @@ TEST_SUITE("StatisticsAggregatorUtility") {
         StatisticsAggregatorUtility aggregator;
         StatisticsAggregatorInput input;
         input.file_path = file_path;
-        input.idx_path = idx_path;
+        input.index_path = db_root;
 
         auto result = aggregator.process(input).get();
 
@@ -204,10 +206,11 @@ TEST_SUITE("StatisticsAggregatorUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string db_root =
+            determine_index_path(test_dir + "/test.pfw.gz", "");
         std::string file_path = "/fake/test.pfw.gz";
 
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(db_root);
         idx_db.init_base_schema();
         idx_db.init_bloom_schema();
         int fid =
@@ -220,7 +223,7 @@ TEST_SUITE("StatisticsAggregatorUtility") {
             ChunkStatistics stats;
             stats.update_from_event("op", "cat", 1, 1, 1000, 10);
             stats.update_from_event("op", "cat", 1, 1, 2000, 20);
-            queries::insert_chunk_statistics(idx_db.sql_db(), fid, 0, stats);
+            idx_db.insert_chunk_statistics(fid, 0, stats);
         }
 
         // Chunk 1: durations 30, 40, 50
@@ -229,7 +232,7 @@ TEST_SUITE("StatisticsAggregatorUtility") {
             stats.update_from_event("op", "cat", 1, 1, 3000, 30);
             stats.update_from_event("op", "cat", 1, 1, 4000, 40);
             stats.update_from_event("op", "cat", 1, 1, 5000, 50);
-            queries::insert_chunk_statistics(idx_db.sql_db(), fid, 1, stats);
+            idx_db.insert_chunk_statistics(fid, 1, stats);
         }
 
         idx_db.commit_transaction();
@@ -237,7 +240,7 @@ TEST_SUITE("StatisticsAggregatorUtility") {
         StatisticsAggregatorUtility aggregator;
         StatisticsAggregatorInput input;
         input.file_path = file_path;
-        input.idx_path = idx_path;
+        input.index_path = db_root;
 
         auto result = aggregator.process(input).get();
 
diff --git a/tests/utilities/composites/dft/statistics/test_statistics_query.cpp b/tests/utilities/composites/dft/statistics/test_statistics_query.cpp
index 8d98f2f8..125fc6a2 100644
--- a/tests/utilities/composites/dft/statistics/test_statistics_query.cpp
+++ b/tests/utilities/composites/dft/statistics/test_statistics_query.cpp
@@ -13,7 +13,7 @@ static TraceStatistics make_test_stats() {
     ts.success = true;
     ts.num_chunks = 2;
     ts.file_path = "/test/file.pfw.gz";
-    ts.idx_path = "/test/file.pfw.gz.idx";
+    ts.index_path = "/test/file.pfw.gz.idx";
 
     // Simulate a variety of events
     ts.merged.update_from_event("read", "POSIX", 1, 1, 1000, 100);
diff --git a/tests/utilities/composites/dft/statistics/test_trace_statistics.cpp b/tests/utilities/composites/dft/statistics/test_trace_statistics.cpp
index 16e3a0e7..6af11bcb 100644
--- a/tests/utilities/composites/dft/statistics/test_trace_statistics.cpp
+++ b/tests/utilities/composites/dft/statistics/test_trace_statistics.cpp
@@ -53,7 +53,7 @@ TEST_SUITE("TraceStatistics") {
     TEST_CASE("TraceStatistics - to_json produces valid JSON") {
         TraceStatistics ts;
         ts.file_path = "/test/file.pfw.gz";
-        ts.idx_path = "/test/file.pfw.gz.idx";
+        ts.index_path = "/test/file.pfw.gz.idx";
         ts.success = true;
         ts.num_chunks = 2;
 
@@ -97,7 +97,7 @@ TEST_SUITE("TraceStatistics") {
     TEST_CASE("TraceStatistics - to_json with error") {
         TraceStatistics ts;
         ts.file_path = "/test/missing.pfw.gz";
-        ts.idx_path = "/test/missing.pfw.gz.idx";
+        ts.index_path = "/test/missing.pfw.gz.idx";
         ts.success = false;
         ts.error_message = "File not found";
 
diff --git a/tests/utilities/composites/dft/test_index_builder.cpp b/tests/utilities/composites/dft/test_index_builder.cpp
index 85be7531..19ae5f69 100644
--- a/tests/utilities/composites/dft/test_index_builder.cpp
+++ b/tests/utilities/composites/dft/test_index_builder.cpp
@@ -4,6 +4,7 @@
 #include <dftracer/utils/core/tasks/coro_scope.h>
 #include <dftracer/utils/core/utilities/behaviors/behavior_chain.h>
 #include <dftracer/utils/core/utilities/utility_executor.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/index_builder_utility.h>
 #include <doctest/doctest.h>
 #include <testing_utilities.h>
@@ -14,6 +15,7 @@
 using namespace dftracer::utils;
 using namespace dftracer::utils::utilities::indexer;
 using namespace dftracer::utils::utilities::behaviors;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dft_utils_test;
 
 namespace tags = dftracer::utils::utilities::tags;
@@ -46,7 +48,7 @@ TEST_SUITE("IndexBuilder") {
 
         SUBCASE("Build index for gzip file") {
             std::string gz_file = env.create_dft_test_gzip_file(50);
-            std::string idx_path = gz_file + ".idx";
+            std::string db_root = determine_index_path(gz_file, "");
 
             auto input = IndexBuildConfig::for_file(gz_file)
                              .with_index_dir("")
@@ -56,16 +58,15 @@ TEST_SUITE("IndexBuilder") {
             auto output = run_builder(input);
 
             CHECK(output.file_path == gz_file);
-            CHECK(output.idx_path == idx_path);
+            CHECK(output.index_path == db_root);
             CHECK(output.success == true);
             CHECK(output.was_skipped == false);
 
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(db_root));
         }
 
         SUBCASE("Use existing index without force rebuild") {
             std::string gz_file = env.create_dft_test_gzip_file(20);
-            std::string idx_path = gz_file + ".idx";
 
             auto input1 = IndexBuildConfig::for_file(gz_file)
                               .with_index_dir("")
@@ -85,7 +86,6 @@ TEST_SUITE("IndexBuilder") {
         TestEnvironment env(100);
 
         std::string gz_file = env.create_dft_test_gzip_file(30);
-        std::string idx_path = gz_file + ".idx";
 
         auto input = IndexBuildConfig::for_file(gz_file)
                          .with_index_dir("")
diff --git a/tests/utilities/composites/dft/test_metadata_collector.cpp b/tests/utilities/composites/dft/test_metadata_collector.cpp
index 5762974e..7e4af6c1 100644
--- a/tests/utilities/composites/dft/test_metadata_collector.cpp
+++ b/tests/utilities/composites/dft/test_metadata_collector.cpp
@@ -1,6 +1,7 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/archive_format.h>
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/metadata_collector_utility.h>
 #include <doctest/doctest.h>
 
@@ -145,9 +146,10 @@ TEST_SUITE("MetadataCollector") {
             REQUIRE(fs::exists(gz_file));
 
             // Create input with index
-            std::string idx_path = gz_file + ".idx";
+            std::string index_path =
+                internal::determine_index_path(gz_file, "");
             auto input = MetadataCollectorUtilityInput::from_file(gz_file)
-                             .with_index(idx_path)
+                             .with_index(index_path)
                              .with_checkpoint_size(1024 * 1024)  // 1MB
                              .with_force_rebuild(true)
                              .with_compute_hash(true);
@@ -165,7 +167,7 @@ TEST_SUITE("MetadataCollector") {
             CHECK(output.format == ArchiveFormat::GZIP);
             CHECK(output.has_index == true);
             CHECK(output.index_valid == true);
-            CHECK(output.idx_path == idx_path);
+            CHECK(output.index_path == index_path);
             CHECK(output.compressed_size > 0);
             CHECK(output.uncompressed_size > 0);
             CHECK(output.compressed_size <
@@ -175,7 +177,7 @@ TEST_SUITE("MetadataCollector") {
             CHECK(output.error_message.empty());
 
             // Verify index file was created
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(index_path));
         }
 
         SUBCASE("Reuse existing index") {
@@ -186,12 +188,13 @@ TEST_SUITE("MetadataCollector") {
             int result = std::system(cmd.c_str());
             REQUIRE(result == 0);
 
-            std::string idx_path = gz_file + ".idx";
+            std::string index_path =
+                internal::determine_index_path(gz_file, "");
 
             // First run - build index
             {
                 auto input = MetadataCollectorUtilityInput::from_file(gz_file)
-                                 .with_index(idx_path)
+                                 .with_index(index_path)
                                  .with_force_rebuild(true)
                                  .with_compute_hash(true);
 
@@ -201,13 +204,13 @@ TEST_SUITE("MetadataCollector") {
                 CHECK(output.success == true);
                 CHECK(output.has_index == true);
                 CHECK(output.index_valid == true);
-                CHECK(fs::exists(idx_path));
+                CHECK(fs::exists(index_path));
             }
 
             // Second run - reuse index (no force rebuild)
             {
                 auto input = MetadataCollectorUtilityInput::from_file(gz_file)
-                                 .with_index(idx_path)
+                                 .with_index(index_path)
                                  .with_force_rebuild(false)
                                  .with_compute_hash(true);
 
diff --git a/tests/utilities/composites/dft/views/test_view_builder.cpp b/tests/utilities/composites/dft/views/test_view_builder.cpp
index 17f06cac..ca2c86f3 100644
--- a/tests/utilities/composites/dft/views/test_view_builder.cpp
+++ b/tests/utilities/composites/dft/views/test_view_builder.cpp
@@ -1,7 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/utilities/composites/dft/indexing/bloom_filter.h>
-#include <dftracer/utils/utilities/composites/dft/indexing/queries/queries.h>
 #include <dftracer/utils/utilities/composites/dft/views/view_builder_utility.h>
 #include <dftracer/utils/utilities/composites/dft/views/view_definition.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
@@ -24,9 +23,9 @@ using dftracer::utils::utilities::indexer::internal::get_logical_path;
 //   1: name={open,close}, cat={POSIX}
 //   2: name={train}, cat={compute}
 //   3: name={forward}, cat={compute,ai_framework}
-static void populate_test_idx(const std::string& idx_path,
+static void populate_test_idx(const std::string& index_path,
                               const std::string& file_path) {
-    IndexDatabase idx_db(idx_path);
+    IndexDatabase idx_db(index_path);
     idx_db.init_base_schema();
     idx_db.init_bloom_schema();
 
@@ -58,10 +57,9 @@ static void populate_test_idx(const std::string& idx_path,
             file_name_bloom.add(n);
         }
         auto name_blob = name_bloom.serialize();
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, static_cast<std::uint64_t>(ckpt), "name",
-            name_blob.data(), static_cast<int>(name_blob.size()),
-            name_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(
+            fid, static_cast<std::uint64_t>(ckpt), "name", name_blob.data(),
+            static_cast<int>(name_blob.size()), name_bloom.num_entries());
 
         BloomFilter cat_bloom(100, 0.01);
         for (const auto& c : chunks[ckpt].cats) {
@@ -69,25 +67,24 @@ static void populate_test_idx(const std::string& idx_path,
             file_cat_bloom.add(c);
         }
         auto cat_blob = cat_bloom.serialize();
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, static_cast<std::uint64_t>(ckpt), "cat",
-            cat_blob.data(), static_cast<int>(cat_blob.size()),
-            cat_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(
+            fid, static_cast<std::uint64_t>(ckpt), "cat", cat_blob.data(),
+            static_cast<int>(cat_blob.size()), cat_bloom.num_entries());
     }
 
     // File-level bloom filters
     auto name_blob = file_name_bloom.serialize();
-    queries::insert_file_bloom_filter(
-        idx_db.sql_db(), fid, "name", name_blob.data(),
-        static_cast<int>(name_blob.size()), file_name_bloom.num_entries());
+    idx_db.insert_file_bloom_filter(fid, "name", name_blob.data(),
+                                    static_cast<int>(name_blob.size()),
+                                    file_name_bloom.num_entries());
 
     auto cat_blob = file_cat_bloom.serialize();
-    queries::insert_file_bloom_filter(
-        idx_db.sql_db(), fid, "cat", cat_blob.data(),
-        static_cast<int>(cat_blob.size()), file_cat_bloom.num_entries());
+    idx_db.insert_file_bloom_filter(fid, "cat", cat_blob.data(),
+                                    static_cast<int>(cat_blob.size()),
+                                    file_cat_bloom.num_entries());
 
-    queries::insert_index_dimension(idx_db.sql_db(), fid, "name");
-    queries::insert_index_dimension(idx_db.sql_db(), fid, "cat");
+    idx_db.insert_index_dimension(fid, "name");
+    idx_db.insert_index_dimension(fid, "cat");
 
     idx_db.commit_transaction();
 }
@@ -99,14 +96,14 @@ TEST_SUITE("ViewBuilderUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         ViewBuilderInput input;
         input.with_view(ViewDefinition::io_view())
             .with_file_path(file_path)
-            .with_idx_path(idx_path)
+            .with_index_path(index_path)
             .with_uncompressed_size(40000)
             .with_num_checkpoints(4);
 
@@ -135,14 +132,14 @@ TEST_SUITE("ViewBuilderUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         ViewBuilderInput input;
         input.with_view(ViewDefinition::compute_view())
             .with_file_path(file_path)
-            .with_idx_path(idx_path)
+            .with_index_path(index_path)
             .with_uncompressed_size(40000)
             .with_num_checkpoints(4);
 
@@ -165,9 +162,9 @@ TEST_SUITE("ViewBuilderUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         ViewDefinition view;
         view.with_name("nonexistent").with_query(R"(cat == "NONEXISTENT")");
@@ -175,7 +172,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path(file_path)
-            .with_idx_path(idx_path)
+            .with_index_path(index_path)
             .with_uncompressed_size(40000)
             .with_num_checkpoints(4);
 
@@ -196,9 +193,9 @@ TEST_SUITE("ViewBuilderUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
-        populate_test_idx(idx_path, file_path);
+        populate_test_idx(index_path, file_path);
 
         ViewDefinition view;
         view.with_name("time_only").with_query(R"(ts >= 0 and ts <= 100000)");
@@ -206,7 +203,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path(file_path)
-            .with_idx_path(idx_path)
+            .with_index_path(index_path)
             .with_uncompressed_size(40000)
             .with_num_checkpoints(4);
 
@@ -228,7 +225,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path("/fake/file.pfw.gz")
-            .with_idx_path("")  // No bloom index
+            .with_index_path("")  // No bloom index
             .with_uncompressed_size(30000)
             .with_num_checkpoints(3);
 
@@ -249,7 +246,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path("/fake/file.pfw.gz")
-            .with_idx_path("")
+            .with_index_path("")
             .with_uncompressed_size(12000)
             .with_num_checkpoints(3);
 
@@ -281,7 +278,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path("/fake/file.pfw.gz")
-            .with_idx_path("")
+            .with_index_path("")
             .with_uncompressed_size(10000)
             .with_num_checkpoints(0);
 
@@ -301,11 +298,11 @@ TEST_SUITE("ViewBuilderUtility") {
                 .string();
         fs::create_directories(test_dir);
 
-        std::string idx_path = test_dir + "/test.pfw.gz.idx";
+        std::string index_path = test_dir + "/test.pfw.gz.idx";
         std::string file_path = "/fake/test.pfw.gz";
 
         // Create idx with fhash dimension
-        IndexDatabase idx_db(idx_path);
+        IndexDatabase idx_db(index_path);
         idx_db.init_base_schema();
         idx_db.init_bloom_schema();
         int fid =
@@ -316,15 +313,14 @@ TEST_SUITE("ViewBuilderUtility") {
         fhash_bloom.add("hash123");
         auto blob = fhash_bloom.serialize();
 
-        queries::insert_file_bloom_filter(
-            idx_db.sql_db(), fid, "fhash", blob.data(),
-            static_cast<int>(blob.size()), fhash_bloom.num_entries());
-        queries::insert_chunk_bloom_filter(
-            idx_db.sql_db(), fid, 0, "fhash", blob.data(),
-            static_cast<int>(blob.size()), fhash_bloom.num_entries());
-        queries::insert_index_dimension(idx_db.sql_db(), fid, "fhash");
-        queries::insert_hash_resolution(idx_db.sql_db(), fid, "fhash",
-                                        "hash123", "/data/file.h5");
+        idx_db.insert_file_bloom_filter(fid, "fhash", blob.data(),
+                                        static_cast<int>(blob.size()),
+                                        fhash_bloom.num_entries());
+        idx_db.insert_chunk_bloom_filter(fid, 0, "fhash", blob.data(),
+                                         static_cast<int>(blob.size()),
+                                         fhash_bloom.num_entries());
+        idx_db.insert_index_dimension(fid, "fhash");
+        idx_db.insert_hash_resolution(fid, "fhash", "hash123", "/data/file.h5");
         idx_db.commit_transaction();
 
         // Use "file" alias which should resolve to "fhash"
@@ -334,7 +330,7 @@ TEST_SUITE("ViewBuilderUtility") {
         ViewBuilderInput input;
         input.with_view(view)
             .with_file_path(file_path)
-            .with_idx_path(idx_path)
+            .with_index_path(index_path)
             .with_uncompressed_size(10000)
             .with_num_checkpoints(1);
 
diff --git a/tests/utilities/composites/dft/views/test_view_reader.cpp b/tests/utilities/composites/dft/views/test_view_reader.cpp
index 4c38a49b..b988040b 100644
--- a/tests/utilities/composites/dft/views/test_view_reader.cpp
+++ b/tests/utilities/composites/dft/views/test_view_reader.cpp
@@ -3,6 +3,7 @@
 #include <dftracer/utils/core/coro/async_generator.h>
 #include <dftracer/utils/core/coro/task.h>
 #include <dftracer/utils/utilities/common/query/query.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/dft/views/view_definition.h>
 #include <dftracer/utils/utilities/composites/dft/views/view_reader_utility.h>
 #include <doctest/doctest.h>
@@ -15,6 +16,7 @@
 #include <vector>
 
 using namespace dftracer::utils;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dftracer::utils::utilities::composites::dft::views;
 using namespace dft_utils_test;
 using dftracer::utils::utilities::common::query::Query;
@@ -62,10 +64,11 @@ TEST_SUITE("ViewReader") {
         TestEnvironment env(200);
         REQUIRE(env.is_valid());
         std::string gz = create_pfw_gz(env, 50);
+        std::string db_root = determine_index_path(gz, "");
 
         ViewReaderInput input;
         input.with_file_path(gz)
-            .with_idx_path(gz + ".idx")
+            .with_index_path(db_root)
             .with_checkpoint_size(1024)
             .with_byte_range(0, std::numeric_limits<std::size_t>::max());
         input.view.with_include_metadata(false);
@@ -82,10 +85,11 @@ TEST_SUITE("ViewReader") {
         TestEnvironment env(200);
         REQUIRE(env.is_valid());
         std::string gz = create_pfw_gz(env, 50);
+        std::string db_root = determine_index_path(gz, "");
 
         ViewReaderInput input;
         input.with_file_path(gz)
-            .with_idx_path(gz + ".idx")
+            .with_index_path(db_root)
             .with_checkpoint_size(1024)
             .with_byte_range(0, std::numeric_limits<std::size_t>::max());
         input.view.with_include_metadata(false);
@@ -105,10 +109,11 @@ TEST_SUITE("ViewReader") {
         TestEnvironment env(200);
         REQUIRE(env.is_valid());
         std::string gz = create_pfw_gz(env, 50);
+        std::string db_root = determine_index_path(gz, "");
 
         ViewReaderInput input;
         input.with_file_path(gz)
-            .with_idx_path(gz + ".idx")
+            .with_index_path(db_root)
             .with_checkpoint_size(1024)
             .with_byte_range(0, std::numeric_limits<std::size_t>::max());
         input.view.with_include_metadata(false);
diff --git a/tests/utilities/composites/test_indexed_file_reader.cpp b/tests/utilities/composites/test_indexed_file_reader.cpp
index b0abe56b..e8441d57 100644
--- a/tests/utilities/composites/test_indexed_file_reader.cpp
+++ b/tests/utilities/composites/test_indexed_file_reader.cpp
@@ -1,4 +1,5 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/composites/indexed_file_reader_utility.h>
 #include <dftracer/utils/utilities/composites/types.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
@@ -14,6 +15,7 @@ using namespace dftracer::utils;
 using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dftracer::utils::utilities::reader::internal;
 using namespace dftracer::utils::utilities::composites;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dft_utils_test;
 
 TEST_SUITE("IndexedFileReader") {
@@ -21,23 +23,23 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Process gzip file without existing index") {
             TestEnvironment env(10);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             // Ensure no index exists initially
-            if (fs::exists(idx_path)) {
-                fs::remove(idx_path);
+            if (fs::exists(db_root)) {
+                fs::remove_all(db_root);
             }
 
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input = IndexedReadInput::from_file(gz_path)
-                                         .with_index(idx_path)
+                                         .with_index(db_root)
                                          .with_checkpoint_size(1024);
 
             // Process should create index and return reader
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            CHECK(fs::exists(idx_path));  // Index should be created
+            CHECK(fs::exists(db_root));
 
             // Verify reader can read lines
             auto stream =
@@ -55,45 +57,37 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Process gzip file with existing index") {
             TestEnvironment env(5);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             // Create index first
-            auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+            auto indexer = IndexerFactory::create(gz_path, db_root, 1024, true);
             REQUIRE(indexer != nullptr);
             indexer->build();
-            REQUIRE(fs::exists(idx_path));
-
-            // Get initial modification time
-            auto initial_mtime = fs::last_write_time(idx_path);
+            REQUIRE(fs::exists(db_root));
 
             // Process with existing index (should not rebuild)
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input = IndexedReadInput::from_file(gz_path)
-                                         .with_index(idx_path)
+                                         .with_index(db_root)
                                          .with_checkpoint_size(1024);
 
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            CHECK(fs::exists(idx_path));
-
-            // Index should not be rebuilt (same modification time)
-            auto current_mtime = fs::last_write_time(idx_path);
-            CHECK(current_mtime == initial_mtime);
+            CHECK(fs::exists(db_root));
+            CHECK(reader->get_num_lines() > 0);
         }
 
         SUBCASE("Force rebuild existing index") {
             TestEnvironment env(5);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             // Create index first
-            auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+            auto indexer = IndexerFactory::create(gz_path, db_root, 1024, true);
             REQUIRE(indexer != nullptr);
             indexer->build();
-            REQUIRE(fs::exists(idx_path));
+            REQUIRE(fs::exists(db_root));
 
             // Sleep to ensure different timestamp
             std::this_thread::sleep_for(std::chrono::milliseconds(10));
@@ -101,14 +95,14 @@ TEST_SUITE("IndexedFileReader") {
             // Process with force rebuild
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input = IndexedReadInput::from_file(gz_path)
-                                         .with_index(idx_path)
+                                         .with_index(db_root)
                                          .with_checkpoint_size(1024)
                                          .with_force_rebuild(true);
 
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(db_root));
 
             // Reader should work
             CHECK(reader->get_num_lines() > 0);
@@ -119,19 +113,19 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Configure checkpoint size") {
             TestEnvironment env(20);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             IndexedFileReaderUtility reader_utility;
 
             // Use custom checkpoint size
             IndexedReadInput input = IndexedReadInput::from_file(gz_path)
-                                         .with_index(idx_path)
+                                         .with_index(db_root)
                                          .with_checkpoint_size(2048);
 
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(db_root));
 
             // Verify reader works
             CHECK(reader->get_num_lines() == 20);
@@ -145,7 +139,7 @@ TEST_SUITE("IndexedFileReader") {
 
             // Test fluent API
             auto input = IndexedReadInput::from_file(gz_path)
-                             .with_index(gz_path + ".idx")
+                             .with_index(determine_index_path(gz_path, ""))
                              .with_checkpoint_size(512)
                              .with_force_rebuild(false);
 
@@ -158,17 +152,17 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Constructor with all parameters") {
             TestEnvironment env(5);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             IndexedFileReaderUtility reader_utility;
 
             // Use constructor directly
-            IndexedReadInput input(gz_path, idx_path, 1024, false);
+            IndexedReadInput input(gz_path, db_root, 1024, false);
 
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(db_root));
         }
     }
 
@@ -177,7 +171,7 @@ TEST_SUITE("IndexedFileReader") {
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
                 IndexedReadInput::from_file("non_existent.gz")
-                    .with_index("non_existent.gz.idx");
+                    .with_index("non_existent.gz.dftindex");
 
             CHECK_THROWS_AS(reader_utility.process(input).get(),
                             std::runtime_error);
@@ -187,7 +181,7 @@ TEST_SUITE("IndexedFileReader") {
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
                 IndexedReadInput::from_file("/invalid/path/file.gz")
-                    .with_index("/invalid/path/file.gz.idx");
+                    .with_index("/invalid/path/.dftindex");
 
             CHECK_THROWS_AS(reader_utility.process(input).get(),
                             std::runtime_error);
@@ -196,7 +190,7 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Empty file path") {
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
-                IndexedReadInput::from_file("").with_index("file.gz.idx");
+                IndexedReadInput::from_file("").with_index(".dftindex");
 
             CHECK_THROWS_AS(reader_utility.process(input).get(),
                             std::runtime_error);
@@ -210,8 +204,8 @@ TEST_SUITE("IndexedFileReader") {
 
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
-                IndexedReadInput::from_file(gz_path).with_index(gz_path +
-                                                                ".idx");
+                IndexedReadInput::from_file(gz_path).with_index(
+                    determine_index_path(gz_path, ""));
 
             auto reader = reader_utility.process(input).get();
 
@@ -240,13 +234,14 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("Rebuild when file modified after index") {
             TestEnvironment env(5);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             // Create index
             auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+                IndexerFactory::create(gz_path, index_path, 1024, true);
             indexer->build();
-            REQUIRE(fs::exists(idx_path));
+            REQUIRE(fs::exists(db_root));
 
             // Sleep to ensure different timestamp
             std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -258,7 +253,7 @@ TEST_SUITE("IndexedFileReader") {
             // Process should detect outdated index and rebuild
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
-                IndexedReadInput::from_file(gz_path).with_index(idx_path);
+                IndexedReadInput::from_file(gz_path).with_index(index_path);
 
             auto reader = reader_utility.process(input).get();
 
@@ -271,24 +266,23 @@ TEST_SUITE("IndexedFileReader") {
         SUBCASE("No rebuild when index is up to date") {
             TestEnvironment env(5);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = gz_path + ".idx";
+            std::string db_root = determine_index_path(gz_path, "");
 
             // Create index
             auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+                IndexerFactory::create(gz_path, index_path, 1024, true);
             indexer->build();
-            auto initial_mtime = fs::last_write_time(idx_path);
-
             // Process without modifying file
             IndexedFileReaderUtility reader_utility;
             IndexedReadInput input =
-                IndexedReadInput::from_file(gz_path).with_index(idx_path);
+                IndexedReadInput::from_file(gz_path).with_index(index_path);
 
             auto reader = reader_utility.process(input).get();
 
             CHECK(reader != nullptr);
-            auto final_mtime = fs::last_write_time(idx_path);
-            CHECK(initial_mtime == final_mtime);
+            CHECK(fs::exists(db_root));
+            CHECK(reader->get_num_lines() > 0);
         }
     }
 
@@ -338,7 +332,8 @@ TEST_SUITE("IndexedFileReader") {
 
             CHECK(reader->get_num_lines() == 10);
             CHECK(reader->get_archive_path() == gz_path);
-            CHECK(reader->get_idx_path() == gz_path + ".idx");
+            CHECK(reader->get_index_path() ==
+                  determine_index_path(gz_path, ""));
         }
     }
 }
diff --git a/tests/utilities/composites/test_line_batch_processor.cpp b/tests/utilities/composites/test_line_batch_processor.cpp
index eddb45fa..50dbd490 100644
--- a/tests/utilities/composites/test_line_batch_processor.cpp
+++ b/tests/utilities/composites/test_line_batch_processor.cpp
@@ -66,11 +66,11 @@ TEST_SUITE("LineBatchProcessor") {
         SUBCASE("Process lines from compressed file") {
             TestEnvironment env(15);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = gz_path + ".idx";
 
             // Create index
             auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+                IndexerFactory::create(gz_path, index_path, 1024, true);
             REQUIRE(indexer != nullptr);
             indexer->build();
 
@@ -85,7 +85,7 @@ TEST_SUITE("LineBatchProcessor") {
 
             LineReadInput input;
             input.file_path = gz_path;
-            input.idx_path = idx_path;
+            input.index_path = index_path;
 
             auto results = batch.process(input).get();
 
@@ -217,11 +217,11 @@ TEST_SUITE("LineBatchProcessor") {
         SUBCASE("Process line range from compressed file") {
             TestEnvironment env(20);
             std::string gz_path = env.create_test_gzip_file();
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = gz_path + ".idx";
 
             // Create index
             auto indexer =
-                IndexerFactory::create(gz_path, idx_path, 1024, true);
+                IndexerFactory::create(gz_path, index_path, 1024, true);
             REQUIRE(indexer != nullptr);
             indexer->build();
 
@@ -236,7 +236,7 @@ TEST_SUITE("LineBatchProcessor") {
 
             LineReadInput input;
             input.file_path = gz_path;
-            input.idx_path = idx_path;
+            input.index_path = index_path;
             input.start_line = 10;
             input.end_line = 15;
 
diff --git a/tests/utilities/fileio/lines/test_streaming_line_reader.cpp b/tests/utilities/fileio/lines/test_streaming_line_reader.cpp
index 486b199f..a00446b7 100644
--- a/tests/utilities/fileio/lines/test_streaming_line_reader.cpp
+++ b/tests/utilities/fileio/lines/test_streaming_line_reader.cpp
@@ -1,5 +1,6 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/fileio/lines/streaming_line_reader.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 #include <doctest/doctest.h>
@@ -15,14 +16,10 @@ using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dft_utils_test;
 
 TEST_SUITE("StreamingLineReader") {
-    fs::path test_file =
-        make_unique_test_path("test_streaming_line_reader.txt");
-    fs::path gz_file = make_unique_test_path("test_streaming_line_reader.gz");
-    fs::path tar_gz_file = make_unique_test_path("test_archive.tar.gz");
-    fs::path tgz_file = make_unique_test_path("test_archive.tgz");
-
     TEST_CASE("StreamingLineReader - Basic Plain File Reading") {
         SUBCASE("Read entire plain text file") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Line 1\n";
@@ -50,6 +47,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Read plain file with line range") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 for (int i = 1; i <= 10; ++i) {
@@ -78,6 +77,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Read empty file") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);  // Create empty file
             }
@@ -93,6 +94,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Direct read_plain method") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Direct line 1\n";
@@ -119,6 +122,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Direct read_plain with line range") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 for (int i = 1; i <= 5; ++i) {
@@ -146,6 +151,11 @@ TEST_SUITE("StreamingLineReader") {
 
     TEST_CASE("StreamingLineReader - Format Detection") {
         SUBCASE("Detect .gz extension without index") {
+            fs::path test_dir =
+                make_unique_test_path("test_streaming_line_reader_gz_dir");
+            fs::create_directories(test_dir);
+            fs::path gz_file = test_dir / "test_streaming_line_reader.gz";
+
             // Create a file with .gz extension (not actually compressed)
             {
                 std::ofstream ofs(gz_file);
@@ -153,9 +163,10 @@ TEST_SUITE("StreamingLineReader") {
             }
 
             // Ensure no index file exists
-            std::string idx_path = gz_file.string() + ".idx";
-            if (fs::exists(idx_path)) {
-                fs::remove(idx_path);
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(gz_file.string(), "");
+            if (fs::exists(index_path)) {
+                fs::remove_all(index_path);
             }
 
             auto config =
@@ -170,15 +181,26 @@ TEST_SUITE("StreamingLineReader") {
                 CHECK(std::string(line.content) == "Fake gz content");
             }
 
-            fs::remove(gz_file);
+            fs::remove_all(test_dir);
         }
 
         SUBCASE("Detect .tar.gz extension") {
+            fs::path test_dir =
+                make_unique_test_path("test_streaming_line_reader_targz_dir");
+            fs::create_directories(test_dir);
+            fs::path tar_gz_file = test_dir / "test_archive.tar.gz";
+
             {
                 std::ofstream ofs(tar_gz_file);
                 ofs << "Fake tar.gz content\n";
             }
 
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(tar_gz_file.string(), "");
+            if (fs::exists(index_path)) {
+                fs::remove_all(index_path);
+            }
+
             auto config =
                 StreamingLineReaderConfig().with_file(tar_gz_file.string());
 
@@ -191,15 +213,26 @@ TEST_SUITE("StreamingLineReader") {
                 CHECK(std::string(line.content) == "Fake tar.gz content");
             }
 
-            fs::remove(tar_gz_file);
+            fs::remove_all(test_dir);
         }
 
         SUBCASE("Detect .tgz extension") {
+            fs::path test_dir =
+                make_unique_test_path("test_streaming_line_reader_tgz_dir");
+            fs::create_directories(test_dir);
+            fs::path tgz_file = test_dir / "test_archive.tgz";
+
             {
                 std::ofstream ofs(tgz_file);
                 ofs << "Fake tgz content\n";
             }
 
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(tgz_file.string(), "");
+            if (fs::exists(index_path)) {
+                fs::remove_all(index_path);
+            }
+
             auto config =
                 StreamingLineReaderConfig().with_file(tgz_file.string());
 
@@ -211,7 +244,7 @@ TEST_SUITE("StreamingLineReader") {
                 CHECK(std::string(line.content) == "Fake tgz content");
             }
 
-            fs::remove(tgz_file);
+            fs::remove_all(test_dir);
         }
 
         SUBCASE("Auto-detect index file with real compressed file") {
@@ -224,10 +257,11 @@ TEST_SUITE("StreamingLineReader") {
             REQUIRE(indexer != nullptr);
             indexer->build();  // Actually build the index
 
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(gz_path, "");
 
             // Verify index file was created
-            CHECK(fs::exists(idx_path));
+            CHECK(fs::exists(index_path));
 
             auto config = StreamingLineReaderConfig().with_file(gz_path);
 
@@ -262,14 +296,16 @@ TEST_SUITE("StreamingLineReader") {
             REQUIRE(indexer != nullptr);
             indexer->build();
 
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(gz_path, "");
+            CHECK(fs::exists(index_path));
 
             // Test with explicit index path
             auto config =
                 StreamingLineReaderConfig().with_file(gz_path).with_index(
-                    idx_path);
+                    index_path);
 
-            CHECK(config.index_path() == idx_path);
+            CHECK(config.index_path() == index_path);
 
             auto range = StreamingLineReader::read(config);
 
@@ -284,6 +320,8 @@ TEST_SUITE("StreamingLineReader") {
 
     TEST_CASE("StreamingLineReader - Configuration API") {
         SUBCASE("Fluent configuration API") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 for (int i = 1; i <= 10; ++i) {
@@ -340,6 +378,8 @@ TEST_SUITE("StreamingLineReader") {
 
     TEST_CASE("StreamingLineReader - Special Cases") {
         SUBCASE("File with no trailing newline") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Line 1\n";
@@ -365,6 +405,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("File with empty lines") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Line 1\n";
@@ -396,6 +438,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Very long lines") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 std::string long_line(10000, 'A');
@@ -437,6 +481,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Line range beyond file") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Line 1\n";
@@ -464,6 +510,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Line range starting beyond file") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Line 1\n";
@@ -484,6 +532,8 @@ TEST_SUITE("StreamingLineReader") {
 
     TEST_CASE("StreamingLineReader - Large Files") {
         SUBCASE("Many lines") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 for (int i = 1; i <= 1000; ++i) {
@@ -517,6 +567,8 @@ TEST_SUITE("StreamingLineReader") {
 
     TEST_CASE("StreamingLineReader - Real World Scenarios") {
         SUBCASE("CSV file processing") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "Name,Age,City\n";
@@ -545,6 +597,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("Log file processing") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << "2024-01-01 INFO: Application started\n";
@@ -576,6 +630,8 @@ TEST_SUITE("StreamingLineReader") {
         }
 
         SUBCASE("JSONL file processing") {
+            fs::path test_file =
+                make_unique_test_path("test_streaming_line_reader.txt");
             {
                 std::ofstream ofs(test_file);
                 ofs << R"({"id": 1, "name": "Item 1"})" << "\n";
@@ -614,7 +670,9 @@ TEST_SUITE("StreamingLineReader") {
             REQUIRE(indexer != nullptr);
             indexer->build();
 
-            std::string idx_path = gz_path + ".idx";
+            std::string index_path = dftracer::utils::utilities::composites::
+                dft::internal::determine_index_path(gz_path, "");
+            CHECK(fs::exists(index_path));
 
             auto config =
                 StreamingLineReaderConfig().with_file(gz_path).with_line_range(
diff --git a/tests/utilities/indexer/test_index_builder.cpp b/tests/utilities/indexer/test_index_builder.cpp
index 4a066748..ce0f4432 100644
--- a/tests/utilities/indexer/test_index_builder.cpp
+++ b/tests/utilities/indexer/test_index_builder.cpp
@@ -53,7 +53,7 @@ TEST_SUITE("IndexBuilder") {
 
         CHECK(result.success);
         CHECK_FALSE(result.was_skipped);
-        CHECK(fs::exists(result.idx_path));
+        CHECK(fs::exists(result.index_path));
     }
 
     TEST_CASE("BloomVisitor direct test") {
@@ -105,9 +105,9 @@ TEST_SUITE("IndexBuilder") {
         });
 
         REQUIRE(result.success);
-        REQUIRE(fs::exists(result.idx_path));
+        REQUIRE(fs::exists(result.index_path));
 
-        IndexDatabase db(result.idx_path);
+        IndexDatabase db(result.index_path);
         int fid =
             db.get_file_info_id(internal::get_logical_path(result.file_path));
         REQUIRE(fid >= 0);
@@ -134,9 +134,9 @@ TEST_SUITE("IndexBuilder") {
         });
 
         REQUIRE(result.success);
-        REQUIRE(fs::exists(result.idx_path));
+        REQUIRE(fs::exists(result.index_path));
 
-        IndexDatabase db(result.idx_path);
+        IndexDatabase db(result.index_path);
         int fid =
             db.get_file_info_id(internal::get_logical_path(result.file_path));
         REQUIRE(fid >= 0);
@@ -163,9 +163,9 @@ TEST_SUITE("IndexBuilder") {
         });
 
         REQUIRE(result.success);
-        REQUIRE(fs::exists(result.idx_path));
+        REQUIRE(fs::exists(result.index_path));
 
-        IndexDatabase db(result.idx_path);
+        IndexDatabase db(result.index_path);
         int fid =
             db.get_file_info_id(internal::get_logical_path(result.file_path));
         REQUIRE(fid >= 0);
@@ -297,7 +297,7 @@ TEST_SUITE("IndexBuilder") {
 
         // Verify no bloom data yet
         {
-            IndexDatabase db(r1.idx_path);
+            IndexDatabase db(r1.index_path);
             int fid = db.get_file_info_id(internal::get_logical_path(gz_file));
             CHECK(fid >= 0);
             CHECK_FALSE(db.has_bloom_data(fid));
@@ -323,7 +323,7 @@ TEST_SUITE("IndexBuilder") {
 
         // Verify bloom data now exists
         {
-            IndexDatabase db(r2.idx_path);
+            IndexDatabase db(r2.index_path);
             int fid = db.get_file_info_id(internal::get_logical_path(gz_file));
             CHECK(fid >= 0);
             CHECK(db.has_bloom_data(fid));
@@ -352,7 +352,7 @@ TEST_SUITE("IndexBuilder") {
         REQUIRE(r1.success);
 
         {
-            IndexDatabase db(r1.idx_path);
+            IndexDatabase db(r1.index_path);
             int fid = db.get_file_info_id(internal::get_logical_path(gz_file));
             CHECK(db.has_bloom_data(fid));
             CHECK_FALSE(db.has_manifest_data(fid));
@@ -378,7 +378,7 @@ TEST_SUITE("IndexBuilder") {
 
         // Verify both bloom and manifest exist
         {
-            IndexDatabase db(r2.idx_path);
+            IndexDatabase db(r2.index_path);
             int fid = db.get_file_info_id(internal::get_logical_path(gz_file));
             CHECK(db.has_bloom_data(fid));
             CHECK(db.has_manifest_data(fid));
diff --git a/tests/utilities/indexer/test_index_database.cpp b/tests/utilities/indexer/test_index_database.cpp
index 914ce5f7..b8345791 100644
--- a/tests/utilities/indexer/test_index_database.cpp
+++ b/tests/utilities/indexer/test_index_database.cpp
@@ -1,620 +1,98 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
-#include <dftracer/utils/utilities/composites/dft/indexing/chunk_statistics.h>
+#include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/utilities/indexer/index_database.h>
 #include <doctest/doctest.h>
-#include <sqlite3.h>
 #include <testing_utilities.h>
 
-#include <cstdint>
-#include <filesystem>
-#include <span>
 #include <string>
-#include <string_view>
-#include <utility>
 #include <vector>
 
 namespace fs = std::filesystem;
 using dftracer::utils::utilities::indexer::IndexDatabase;
 
-namespace {
-
-bool table_exists(sqlite3* db, const char* name) {
-    sqlite3_stmt* stmt = nullptr;
-    sqlite3_prepare_v2(
-        db, "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?;", -1,
-        &stmt, nullptr);
-    sqlite3_bind_text(stmt, 1, name, -1, SQLITE_STATIC);
-    bool found = sqlite3_step(stmt) == SQLITE_ROW;
-    sqlite3_finalize(stmt);
-    return found;
-}
-
-int row_count(sqlite3* db, const char* table) {
-    std::string sql = std::string("SELECT count(*) FROM ") + table + ";";
-    sqlite3_stmt* stmt = nullptr;
-    sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, nullptr);
-    int count = 0;
-    if (sqlite3_step(stmt) == SQLITE_ROW) count = sqlite3_column_int(stmt, 0);
-    sqlite3_finalize(stmt);
-    return count;
-}
-
-}  // namespace
-
 TEST_SUITE("IndexDatabase") {
-    TEST_CASE("Create and open database") {
-        auto path = dft_utils_test::make_unique_test_path("idx_create");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            CHECK(db.db() != nullptr);
-        }
-
-        CHECK(fs::exists(db_path));
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("init_base_schema creates tables") {
-        auto path = dft_utils_test::make_unique_test_path("idx_base");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            CHECK_NOTHROW(db.init_base_schema());
-
-            CHECK(table_exists(db.db(), "files"));
-            CHECK(table_exists(db.db(), "checkpoints"));
-            CHECK(table_exists(db.db(), "metadata"));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("init_bloom_schema creates tables") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            CHECK_NOTHROW(db.init_bloom_schema());
-
-            CHECK(table_exists(db.db(), "chunk_bloom_filters"));
-            CHECK(table_exists(db.db(), "file_bloom_filters"));
-            CHECK(table_exists(db.db(), "chunk_statistics"));
-            CHECK(table_exists(db.db(), "hash_resolutions"));
-            CHECK(table_exists(db.db(), "index_dimensions"));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("init_manifest_schema creates tables") {
-        auto path = dft_utils_test::make_unique_test_path("idx_manifest");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            CHECK_NOTHROW(db.init_manifest_schema());
-
-            CHECK(table_exists(db.db(), "checkpoint_event_ranges"));
-            CHECK(table_exists(db.db(), "checkpoint_metadata_lines"));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("Additive schema — bloom without base") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom_only");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            CHECK_NOTHROW(db.init_bloom_schema());
-            CHECK(table_exists(db.db(), "chunk_bloom_filters"));
-            CHECK(table_exists(db.db(), "file_bloom_filters"));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("get_or_create_file_info") {
-        auto path = dft_utils_test::make_unique_test_path("idx_file_info");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-
-            SUBCASE("Insert returns a positive id") {
-                int id =
-                    db.get_or_create_file_info("/trace/foo.pfw.gz", 0xDEAD);
-                CHECK(id > 0);
-            }
-
-            SUBCASE("Same path and hash returns same id") {
-                int id1 =
-                    db.get_or_create_file_info("/trace/bar.pfw.gz", 0xBEEF);
-                int id2 =
-                    db.get_or_create_file_info("/trace/bar.pfw.gz", 0xBEEF);
-                CHECK(id1 == id2);
-            }
-
-            SUBCASE("Hash mismatch re-inserts") {
-                int id1 =
-                    db.get_or_create_file_info("/trace/baz.pfw.gz", 0x1111);
-                int id2 =
-                    db.get_or_create_file_info("/trace/baz.pfw.gz", 0x2222);
-                CHECK(id2 > 0);
-                // id may or may not equal id1 — SQLite can reuse rowids
-                (void)id1;
-            }
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("get_file_info_id returns -1 for unknown path") {
-        auto path = dft_utils_test::make_unique_test_path("idx_unknown");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-
-            CHECK(db.get_file_info_id("/nonexistent/path.pfw.gz") == -1);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("get_file_info_id returns correct id after insert") {
-        auto path = dft_utils_test::make_unique_test_path("idx_lookup");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-
-            int inserted =
-                db.get_or_create_file_info("/trace/lookup.pfw.gz", 0xABCD);
-            int looked_up = db.get_file_info_id("/trace/lookup.pfw.gz");
-            CHECK(inserted == looked_up);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("has_bloom_data returns false when no data") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom_empty");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            CHECK_FALSE(db.has_bloom_data(1));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("has_bloom_data returns true after insert") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom_data");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int file_id =
-                db.get_or_create_file_info("/trace/bloom.pfw.gz", 0x1234);
-
-            const char* sql =
-                "INSERT INTO chunk_bloom_filters"
-                "(file_info_id, checkpoint_idx, dimension, bloom_data,"
-                " num_entries)"
-                " VALUES(?, 0, 'name', X'DEADBEEF', 1);";
-            sqlite3_stmt* stmt = nullptr;
-            sqlite3_prepare_v2(db.db(), sql, -1, &stmt, nullptr);
-            sqlite3_bind_int(stmt, 1, file_id);
-            REQUIRE(sqlite3_step(stmt) == SQLITE_DONE);
-            sqlite3_finalize(stmt);
-
-            CHECK(db.has_bloom_data(file_id));
-            CHECK_FALSE(db.has_bloom_data(file_id + 999));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("has_manifest_data returns false when no data") {
-        auto path = dft_utils_test::make_unique_test_path("idx_manifest_empty");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_manifest_schema();
-
-            CHECK_FALSE(db.has_manifest_data(1));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("has_manifest_data returns true after insert") {
-        auto path = dft_utils_test::make_unique_test_path("idx_manifest_data");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_manifest_schema();
-
-            int file_id =
-                db.get_or_create_file_info("/trace/manifest.pfw.gz", 0x5678);
-
-            const char* sql =
-                "INSERT INTO checkpoint_event_ranges"
-                "(file_info_id, checkpoint_idx, cat, name,"
-                " line_numbers, event_count)"
-                " VALUES(?, 0, 'cat', 'ev', X'01', 1);";
-            sqlite3_stmt* stmt = nullptr;
-            sqlite3_prepare_v2(db.db(), sql, -1, &stmt, nullptr);
-            sqlite3_bind_int(stmt, 1, file_id);
-            REQUIRE(sqlite3_step(stmt) == SQLITE_DONE);
-            sqlite3_finalize(stmt);
-
-            CHECK(db.has_manifest_data(file_id));
-            CHECK_FALSE(db.has_manifest_data(file_id + 999));
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("Transaction commit") {
-        auto path = dft_utils_test::make_unique_test_path("idx_txn");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-
-            CHECK_NOTHROW(db.begin_transaction());
-            db.get_or_create_file_info("/trace/txn_a.pfw.gz", 0xAAAA);
-            db.get_or_create_file_info("/trace/txn_b.pfw.gz", 0xBBBB);
-            CHECK_NOTHROW(db.commit_transaction());
-
-            CHECK(row_count(db.db(), "files") == 2);
-            CHECK(db.get_file_info_id("/trace/txn_a.pfw.gz") > 0);
-            CHECK(db.get_file_info_id("/trace/txn_b.pfw.gz") > 0);
-        }
-
-        fs::remove(db_path);
-    }
+    TEST_CASE("normalizes legacy .idx-style input to root-local .dftindex") {
+        auto root = dft_utils_test::make_unique_test_path("idx_root");
+        fs::create_directories(root);
+        auto legacy_like = (root / "trace.pfw.gz.idx").string();
 
-    TEST_CASE("Move semantics") {
-        auto path = dft_utils_test::make_unique_test_path("idx_move");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase original(db_path);
-            original.init_base_schema();
-            int id =
-                original.get_or_create_file_info("/trace/move.pfw.gz", 0x9999);
-
-            IndexDatabase moved(std::move(original));
-
-            REQUIRE(moved.db() != nullptr);
-            CHECK(moved.get_file_info_id("/trace/move.pfw.gz") == id);
-        }
-
-        fs::remove(db_path);
-    }
-}
-
-TEST_SUITE("IndexDatabase - Bloom wrapper methods") {
-    TEST_CASE("insert and query chunk bloom filter (span overload)") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/wrap.pfw.gz", 0x1234);
-
-            std::vector<unsigned char> blob = {0xDE, 0xAD, 0xBE, 0xEF};
-            db.insert_chunk_bloom_filter(fid, 0, "name", std::span(blob), 42);
-
-            auto results = db.query_chunk_bloom_filters(fid, "name");
-            REQUIRE(results.size() == 1);
-            CHECK(results[0].checkpoint_idx == 0);
-            CHECK(results[0].bloom_data == blob);
-            CHECK(results[0].num_entries == 42);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("insert and query chunk bloom filter (void* overload)") {
-        auto path = dft_utils_test::make_unique_test_path("idx_bloom_wrap_raw");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/raw.pfw.gz", 0x5678);
-
-            std::vector<unsigned char> blob = {0xCA, 0xFE};
-            db.insert_chunk_bloom_filter(fid, 1, "fhash", blob.data(),
-                                         static_cast<int>(blob.size()), 10);
-
-            auto results = db.query_chunk_bloom_filters(fid, "fhash");
-            REQUIRE(results.size() == 1);
-            CHECK(results[0].checkpoint_idx == 1);
-            CHECK(results[0].bloom_data == blob);
-            CHECK(results[0].num_entries == 10);
-        }
-
-        fs::remove(db_path);
+        IndexDatabase db(legacy_like);
+        CHECK(fs::exists(root / ".dftindex"));
     }
 
-    TEST_CASE("insert and query file bloom filter") {
-        auto path =
-            dft_utils_test::make_unique_test_path("idx_file_bloom_wrap");
-        auto db_path = path.string() + ".idx";
+    TEST_CASE("file registry is shared within one .dftindex root") {
+        auto root = dft_utils_test::make_unique_test_path("idx_shared");
+        fs::create_directories(root);
 
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
+        IndexDatabase db1((root / ".dftindex").string());
+        IndexDatabase db2((root / "other-name.idx").string());
 
-            int fid =
-                db.get_or_create_file_info("/trace/fbloom.pfw.gz", 0xABCD);
+        db1.init_base_schema();
+        db2.init_base_schema();
 
-            std::vector<unsigned char> blob = {0x11, 0x22, 0x33};
-            db.insert_file_bloom_filter(fid, "name", std::span(blob), 99);
+        int id1 = db1.get_or_create_file_info("a.pfw.gz", 0x1111);
+        int id2 = db2.get_file_info_id("a.pfw.gz");
 
-            auto result = db.query_file_bloom_filter(fid, "name");
-            REQUIRE(result.has_value());
-            CHECK(result->bloom_data == blob);
-            CHECK(result->num_entries == 99);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("insert and query hash resolution") {
-        auto path = dft_utils_test::make_unique_test_path("idx_hash_res_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/hres.pfw.gz", 0xFACE);
-
-            db.insert_hash_resolution(fid, "fhash", "abc123", "/path/to/file");
-
-            auto resolved = db.query_resolved_by_hash("fhash", "abc123");
-            REQUIRE(resolved.has_value());
-            CHECK(resolved.value() == "/path/to/file");
-
-            auto not_found = db.query_resolved_by_hash("fhash", "nonexistent");
-            CHECK_FALSE(not_found.has_value());
-        }
-
-        fs::remove(db_path);
+        CHECK(id1 > 0);
+        CHECK(id1 == id2);
     }
 
-    TEST_CASE("insert and query index dimensions") {
-        auto path = dft_utils_test::make_unique_test_path("idx_dim_wrap");
-        auto db_path = path.string() + ".idx";
+    TEST_CASE("rebuild clears per-file bloom and manifest data before reuse") {
+        auto root = dft_utils_test::make_unique_test_path("idx_rebuild");
+        fs::create_directories(root);
 
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
+        IndexDatabase db((root / ".dftindex").string());
+        db.init_base_schema();
+        db.init_bloom_schema();
+        db.init_manifest_schema();
 
-            int fid = db.get_or_create_file_info("/trace/dim.pfw.gz", 0xBBBB);
+        const int file_id = db.get_or_create_file_info("trace.pfw.gz", 0xAAAA);
 
-            db.insert_index_dimension(fid, "name");
-            db.insert_index_dimension(fid, "fhash");
+        std::vector<unsigned char> blob = {0xDE, 0xAD, 0xBE, 0xEF};
+        db.insert_chunk_bloom_filter(file_id, 0, "name", std::span(blob), 4);
+        db.insert_file_bloom_filter(file_id, "name", std::span(blob), 4);
+        db.insert_index_dimension(file_id, "name");
+        db.insert_hash_resolution(file_id, "fhash", "hashA", "resolvedA");
+        db.insert_event_range(file_id, 0, "POSIX", "read",
+                              std::vector<std::uint32_t>{1, 2, 3});
+        db.insert_metadata_lines(file_id, 0, "HH",
+                                 std::vector<std::uint32_t>{0, 4});
 
-            auto dims = db.query_index_dimensions(fid);
-            CHECK(dims.size() == 2);
+        CHECK(db.has_bloom_data(file_id));
+        CHECK(db.has_manifest_data(file_id));
+        CHECK(db.query_file_bloom_filter(file_id, "name").has_value());
+        CHECK(db.query_resolved_by_hash("fhash", "hashA").has_value());
 
-            CHECK(db.has_index_dimension(fid, "name"));
-            CHECK(db.has_index_dimension(fid, "fhash"));
-            CHECK_FALSE(db.has_index_dimension(fid, "nonexistent"));
-        }
+        const int rebuilt_id =
+            db.get_or_create_file_info("trace.pfw.gz", 0xBBBB);
+        CHECK(rebuilt_id == file_id);
 
-        fs::remove(db_path);
+        CHECK_FALSE(db.has_bloom_data(file_id));
+        CHECK_FALSE(db.has_manifest_data(file_id));
+        CHECK_FALSE(db.query_file_bloom_filter(file_id, "name").has_value());
+        CHECK(db.query_chunk_bloom_filters(file_id, "name").empty());
+        CHECK(db.query_event_ranges(file_id).empty());
+        CHECK(db.query_metadata_lines(file_id).empty());
+        CHECK_FALSE(db.query_resolved_by_hash("fhash", "hashA").has_value());
     }
 
-    TEST_CASE("insert and query chunk statistics") {
-        auto path = dft_utils_test::make_unique_test_path("idx_stats_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/stats.pfw.gz", 0xCCCC);
-
-            using ChunkStatistics = dftracer::utils::utilities::composites::
-                dft::indexing::ChunkStatistics;
-            ChunkStatistics stats;
-            stats.total_events = 100;
-            stats.min_timestamp_us = 1000;
-            stats.max_timestamp_us = 5000;
-
-            db.insert_chunk_statistics(fid, 0, stats);
-
-            auto results = db.query_chunk_statistics(fid);
-            REQUIRE(results.size() == 1);
-            CHECK(results[0].checkpoint_idx == 0);
-            CHECK(results[0].stats.total_events == 100);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("delete operations") {
-        auto path = dft_utils_test::make_unique_test_path("idx_delete_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/del.pfw.gz", 0xDDDD);
-
-            std::vector<unsigned char> blob = {0x01};
-            db.insert_chunk_bloom_filter(fid, 0, "name", std::span(blob), 1);
-            db.insert_file_bloom_filter(fid, "name", std::span(blob), 1);
-            db.insert_hash_resolution(fid, "name", "h1", "v1");
-
-            db.delete_chunk_bloom_filters(fid, "name");
-            CHECK(db.query_chunk_bloom_filters(fid, "name").empty());
-
-            db.delete_file_bloom_filter(fid, "name");
-            CHECK_FALSE(db.query_file_bloom_filter(fid, "name").has_value());
-
-            db.delete_hash_resolutions(fid);
-            CHECK_FALSE(db.query_resolved_by_hash("name", "h1").has_value());
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("string_view accepts std::string and const char*") {
-        auto path = dft_utils_test::make_unique_test_path("idx_sv_compat");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_bloom_schema();
-
-            int fid = db.get_or_create_file_info("/trace/sv.pfw.gz", 0xEEEE);
-
-            // const char*
-            std::vector<unsigned char> blob = {0x01};
-            db.insert_chunk_bloom_filter(fid, 0, "name", std::span(blob), 1);
-
-            // std::string
-            std::string dim = "fhash";
-            db.insert_chunk_bloom_filter(fid, 1, dim, std::span(blob), 2);
-
-            // std::string_view
-            std::string_view sv_dim = "hhash";
-            db.insert_chunk_bloom_filter(fid, 2, sv_dim, std::span(blob), 3);
-
-            CHECK(db.query_chunk_bloom_filters(fid, "name").size() == 1);
-            CHECK(db.query_chunk_bloom_filters(fid, dim).size() == 1);
-            CHECK(db.query_chunk_bloom_filters(fid, sv_dim).size() == 1);
-        }
-
-        fs::remove(db_path);
-    }
-}
-
-TEST_SUITE("IndexDatabase - Manifest wrapper methods") {
-    TEST_CASE("insert and query event ranges") {
-        auto path =
-            dft_utils_test::make_unique_test_path("idx_event_range_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_manifest_schema();
-
-            int fid = db.get_or_create_file_info("/trace/ev.pfw.gz", 0x1111);
-
-            std::vector<std::uint32_t> lines = {10, 20, 30};
-
-            // vector overload
-            db.insert_event_range(fid, 0, "cat1", "event1", lines);
-
-            // span overload
-            std::vector<std::uint32_t> lines2 = {40, 50};
-            db.insert_event_range(fid, 1, "cat2", "event2", std::span(lines2));
-
-            auto results = db.query_event_ranges(fid);
-            CHECK(results.size() == 2);
-
-            auto ckpt0 = db.query_event_ranges_for_checkpoint(fid, 0);
-            REQUIRE(ckpt0.size() == 1);
-            CHECK(ckpt0[0].cat == "cat1");
-            CHECK(ckpt0[0].name == "event1");
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("insert and query metadata lines") {
-        auto path =
-            dft_utils_test::make_unique_test_path("idx_meta_lines_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_manifest_schema();
-
-            int fid = db.get_or_create_file_info("/trace/meta.pfw.gz", 0x2222);
-
-            std::vector<std::uint32_t> lines = {5, 15, 25};
-            db.insert_metadata_lines(fid, 0, "traceEvents", lines);
-
-            auto results = db.query_metadata_lines(fid);
-            REQUIRE(results.size() == 1);
-            CHECK(results[0].meta_type == "traceEvents");
-
-            auto ckpt0 = db.query_metadata_lines_for_checkpoint(fid, 0);
-            CHECK(ckpt0.size() == 1);
-        }
-
-        fs::remove(db_path);
-    }
-
-    TEST_CASE("delete event ranges and metadata lines") {
-        auto path =
-            dft_utils_test::make_unique_test_path("idx_manifest_del_wrap");
-        auto db_path = path.string() + ".idx";
-
-        {
-            IndexDatabase db(db_path);
-            db.init_base_schema();
-            db.init_manifest_schema();
-
-            int fid = db.get_or_create_file_info("/trace/mdel.pfw.gz", 0x3333);
+    TEST_CASE("rollback discards transactional writes") {
+        auto root = dft_utils_test::make_unique_test_path("idx_rollback");
+        fs::create_directories(root);
 
-            std::vector<std::uint32_t> lines = {1, 2, 3};
-            db.insert_event_range(fid, 0, "cat", "name", lines);
-            db.insert_metadata_lines(fid, 0, "meta", lines);
+        IndexDatabase db((root / ".dftindex").string());
+        db.init_base_schema();
+        db.init_bloom_schema();
 
-            db.delete_event_ranges(fid);
-            CHECK(db.query_event_ranges(fid).empty());
+        const int file_id = db.get_or_create_file_info("trace.pfw.gz", 0xAAAA);
+        std::vector<unsigned char> blob = {0xAB, 0xCD};
 
-            db.delete_metadata_lines(fid);
-            CHECK(db.query_metadata_lines(fid).empty());
-        }
+        db.begin_transaction();
+        db.insert_file_bloom_filter(file_id, "name", std::span(blob), 2);
+        db.insert_hash_resolution(file_id, "fhash", "hashA", "resolvedA");
+        db.rollback_transaction();
 
-        fs::remove(db_path);
+        CHECK_FALSE(db.query_file_bloom_filter(file_id, "name").has_value());
+        CHECK_FALSE(db.query_resolved_by_hash("fhash", "hashA").has_value());
     }
 }
diff --git a/tests/utilities/indexer/test_provenance_database.cpp b/tests/utilities/indexer/test_provenance_database.cpp
index d5ca015f..5686b01d 100644
--- a/tests/utilities/indexer/test_provenance_database.cpp
+++ b/tests/utilities/indexer/test_provenance_database.cpp
@@ -1,136 +1,164 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
-#include <dftracer/utils/core/sqlite/statement.h>
 #include <dftracer/utils/utilities/indexer/provenance_database.h>
 #include <doctest/doctest.h>
-#include <sqlite3.h>
 #include <testing_utilities.h>
 
-#include <string>
-
+namespace fs = std::filesystem;
 using namespace dftracer::utils::utilities::indexer;
-using dftracer::utils::sqlite::SqliteStmt;
-
-static bool table_exists(sqlite3* db, const std::string& table_name) {
-    SqliteStmt stmt(
-        db, "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?;");
-    stmt.bind_text(1, table_name);
-    return sqlite3_step(stmt) == SQLITE_ROW;
-}
 
 TEST_SUITE("ProvenanceDatabase") {
-    TEST_CASE("Create and open database") {
-        auto path =
-            dft_utils_test::make_unique_test_path("provdb_create").string() +
-            ".pidx";
-        CHECK_NOTHROW(ProvenanceDatabase db(path));
-        CHECK(fs::exists(path));
-        fs::remove(path);
-    }
+    TEST_CASE("uses the same root-local .dftindex path") {
+        auto root = dft_utils_test::make_unique_test_path("prov_root");
+        fs::create_directories(root);
 
-    TEST_CASE("init_schema creates tables") {
-        auto path =
-            dft_utils_test::make_unique_test_path("provdb_schema").string() +
-            ".pidx";
-        ProvenanceDatabase db(path);
-        CHECK_NOTHROW(db.init_schema());
-
-        sqlite3* raw = db.db().get();
-        CHECK(table_exists(raw, "file_info"));
-        CHECK(table_exists(raw, "provenance_info"));
-        CHECK(table_exists(raw, "provenance_sources"));
-        CHECK(table_exists(raw, "provenance_group"));
-        CHECK(table_exists(raw, "provenance_segments"));
-
-        fs::remove(path);
+        auto resolved =
+            determine_provenance_index_path((root / "trace.pfw.gz").string());
+        CHECK(resolved == (root / ".dftindex").string());
+
+        ProvenanceDatabase db(resolved);
+        CHECK(fs::exists(root / ".dftindex"));
     }
 
-    TEST_CASE("get_or_create_file_info") {
-        auto path =
-            dft_utils_test::make_unique_test_path("provdb_file_info").string() +
-            ".pidx";
-        ProvenanceDatabase db(path);
+    TEST_CASE("stores and queries provenance records in shared DB") {
+        auto root = dft_utils_test::make_unique_test_path("prov_records");
+        fs::create_directories(root);
+
+        ProvenanceDatabase db((root / ".dftindex").string());
         db.init_schema();
 
-        SUBCASE("insert returns valid id") {
-            int id = db.get_or_create_file_info("/data/trace.pfw.gz", 0xDEAD);
-            CHECK(id >= 1);
-        }
-
-        SUBCASE("same path and hash returns same id") {
-            int id1 = db.get_or_create_file_info("/data/trace.pfw.gz", 0xBEEF);
-            int id2 = db.get_or_create_file_info("/data/trace.pfw.gz", 0xBEEF);
-            CHECK(id1 == id2);
-        }
-
-        SUBCASE("same path different hash replaces row") {
-            int id1 = db.get_or_create_file_info("/data/other.pfw.gz", 0xAAAA);
-            int id2 = db.get_or_create_file_info("/data/other.pfw.gz", 0xBBBB);
-            CHECK(id2 >= 1);
-            (void)id1;
-        }
-
-        SUBCASE("distinct paths get distinct ids") {
-            int id1 = db.get_or_create_file_info("/data/a.pfw.gz", 0x1111);
-            int id2 = db.get_or_create_file_info("/data/b.pfw.gz", 0x2222);
-            CHECK(id1 != id2);
-        }
-
-        fs::remove(path);
+        int file_id =
+            db.get_or_create_file_info((root / "out.pfw.gz").string(), 0xCAFE);
+        CHECK(file_id > 0);
+        CHECK(db.get_file_info_id((root / "out.pfw.gz").string()) == file_id);
+
+        db.insert_info(file_id, "tool", "dftracer_organize");
+        db.insert_group(file_id, "group0", "cat == POSIX");
+        db.insert_source(file_id, 7, "/src/a.pfw.gz", 12, "hash7");
+        db.insert_segment(file_id, 7, 3, 100, 140, 9);
+
+        auto sources = db.query_sources(file_id);
+        REQUIRE(sources.size() == 1);
+        CHECK(sources[0].source_idx == 7);
+        CHECK(sources[0].path == "/src/a.pfw.gz");
+        CHECK(sources[0].num_checkpoints == 12);
+        CHECK(sources[0].event_hash == "hash7");
+
+        auto segments = db.query_segments(file_id, 7);
+        REQUIRE(segments.size() == 1);
+        CHECK(segments[0].source_checkpoint == 3);
+        CHECK(segments[0].output_line_start == 100);
+        CHECK(segments[0].output_line_end == 140);
+        CHECK(segments[0].event_count == 9);
+
+        CHECK(db.query_info(file_id, "tool") == "dftracer_organize");
+        CHECK(db.query_group_name(file_id) == "group0");
+        CHECK(db.query_group_predicate(file_id) == "cat == POSIX");
     }
 
-    TEST_CASE("get_file_info_id returns -1 for unknown") {
-        auto path =
-            dft_utils_test::make_unique_test_path("provdb_unknown").string() +
-            ".pidx";
-        ProvenanceDatabase db(path);
-        db.init_schema();
+    TEST_CASE("keeps provenance for multiple outputs in one shared root") {
+        auto root = dft_utils_test::make_unique_test_path("prov_multi");
+        fs::create_directories(root);
 
-        CHECK(db.get_file_info_id("/nonexistent/path.pfw.gz") == -1);
+        ProvenanceDatabase db((root / ".dftindex").string());
+        db.init_schema();
 
-        fs::remove(path);
+        const auto out_a = (root / "io.pfw.gz").string();
+        const auto out_b = (root / "compute.pfw.gz").string();
+
+        const int file_a = db.get_or_create_file_info(out_a, 0xA001);
+        const int file_b = db.get_or_create_file_info(out_b, 0xB002);
+        CHECK(file_a > 0);
+        CHECK(file_b > 0);
+        CHECK(file_a != file_b);
+
+        db.begin_transaction();
+        db.insert_group(file_a, "io", R"(cat == "POSIX")");
+        db.insert_source(file_a, 0, "/src/trace0.pfw.gz", 3, "ha");
+        db.insert_segment(file_a, 0, 1, 0, 5, 3);
+
+        db.insert_group(file_b, "compute", R"(cat == "APP")");
+        db.insert_source(file_b, 1, "/src/trace1.pfw.gz", 2, "hb");
+        db.insert_segment(file_b, 1, 0, 0, 3, 1);
+        db.commit_transaction();
+
+        CHECK(db.get_file_info_id(out_a) == file_a);
+        CHECK(db.get_file_info_id(out_b) == file_b);
+
+        CHECK(db.query_group_name(file_a) == "io");
+        CHECK(db.query_group_name(file_b) == "compute");
+
+        auto segments_a = db.query_all_segments(file_a);
+        auto segments_b = db.query_all_segments(file_b);
+        REQUIRE(segments_a.size() == 1);
+        REQUIRE(segments_b.size() == 1);
+        CHECK(segments_a[0].event_count == 3);
+        CHECK(segments_b[0].event_count == 1);
     }
 
-    TEST_CASE("Transaction commit") {
-        auto path =
-            dft_utils_test::make_unique_test_path("provdb_txn").string() +
-            ".pidx";
-        ProvenanceDatabase db(path);
-        db.init_schema();
-
-        CHECK_NOTHROW(db.begin_transaction());
-        int id = db.get_or_create_file_info("/data/txn.pfw.gz", 0xCAFE);
-        CHECK_NOTHROW(db.commit_transaction());
+    TEST_CASE("rebuild-style writes overwrite provenance for the same output") {
+        auto root = dft_utils_test::make_unique_test_path("prov_rebuild");
+        fs::create_directories(root);
 
-        CHECK(id >= 1);
-        CHECK(db.get_file_info_id("/data/txn.pfw.gz") == id);
+        ProvenanceDatabase db((root / ".dftindex").string());
+        db.init_schema();
 
-        fs::remove(path);
+        const auto out = (root / "group.pfw.gz").string();
+
+        const int original_id = db.get_or_create_file_info(out, 0x1111);
+        db.begin_transaction();
+        db.insert_info(original_id, "tool", "dftracer_organize");
+        db.insert_group(original_id, "io", R"(cat == "POSIX")");
+        db.insert_source(original_id, 0, "/src/trace0.pfw.gz", 4, "old");
+        db.insert_segment(original_id, 0, 0, 0, 4, 2);
+        db.commit_transaction();
+
+        const int rebuilt_id = db.get_or_create_file_info(out, 0x2222);
+        CHECK(rebuilt_id == original_id);
+
+        db.begin_transaction();
+        db.insert_info(rebuilt_id, "tool", "dftracer_organize_v2");
+        db.insert_group(rebuilt_id, "io", R"(cat == "MPI")");
+        db.insert_source(rebuilt_id, 0, "/src/trace0.pfw.gz", 8, "new");
+        db.insert_segment(rebuilt_id, 0, 0, 10, 18, 5);
+        db.commit_transaction();
+
+        CHECK(db.query_info(rebuilt_id, "tool") == "dftracer_organize_v2");
+        CHECK(db.query_group_predicate(rebuilt_id) == R"(cat == "MPI")");
+
+        auto sources = db.query_sources(rebuilt_id);
+        REQUIRE(sources.size() == 1);
+        CHECK(sources[0].num_checkpoints == 8);
+        CHECK(sources[0].event_hash == "new");
+
+        auto segments = db.query_segments(rebuilt_id, 0);
+        REQUIRE(segments.size() == 1);
+        CHECK(segments[0].output_line_start == 10);
+        CHECK(segments[0].output_line_end == 18);
+        CHECK(segments[0].event_count == 5);
     }
 
-    TEST_CASE("determine_provenance_index_path - empty index_dir") {
-        SUBCASE("plain path gets .pidx suffix") {
-            auto result = determine_provenance_index_path("/data/trace.pfw.gz");
-            CHECK(result == "/data/trace.pfw.gz.pidx");
-        }
+    TEST_CASE("rollback discards provenance writes") {
+        auto root = dft_utils_test::make_unique_test_path("prov_rollback");
+        fs::create_directories(root);
 
-        SUBCASE("path without extension gets .pidx suffix") {
-            auto result = determine_provenance_index_path("/data/trace");
-            CHECK(result == "/data/trace.pidx");
-        }
-    }
+        ProvenanceDatabase db((root / ".dftindex").string());
+        db.init_schema();
 
-    TEST_CASE("determine_provenance_index_path - with index_dir") {
-        SUBCASE("places filename.pidx under index_dir") {
-            auto result =
-                determine_provenance_index_path("/data/trace.pfw.gz", "/idx");
-            CHECK(result == "/idx/trace.pfw.gz.pidx");
-        }
-
-        SUBCASE("nested source path uses only filename") {
-            auto result = determine_provenance_index_path(
-                "/deep/nested/dir/run.pfw.gz", "/scratch/indices");
-            CHECK(result == "/scratch/indices/run.pfw.gz.pidx");
-        }
+        const int file_id =
+            db.get_or_create_file_info((root / "out.pfw.gz").string(), 0xCAFE);
+
+        db.begin_transaction();
+        db.insert_info(file_id, "tool", "dftracer_organize");
+        db.insert_group(file_id, "group0", "cat == POSIX");
+        db.insert_source(file_id, 7, "/src/a.pfw.gz", 12, "hash7");
+        db.insert_segment(file_id, 7, 3, 100, 140, 9);
+        db.rollback_transaction();
+
+        CHECK(db.query_info(file_id, "tool").empty());
+        CHECK(db.query_group_name(file_id).empty());
+        CHECK(db.query_group_predicate(file_id).empty());
+        CHECK(db.query_sources(file_id).empty());
+        CHECK(db.query_segments(file_id, 7).empty());
     }
 }
diff --git a/tests/utilities/indexer/test_rocksdb_storage.cpp b/tests/utilities/indexer/test_rocksdb_storage.cpp
new file mode 100644
index 00000000..48dfeb97
--- /dev/null
+++ b/tests/utilities/indexer/test_rocksdb_storage.cpp
@@ -0,0 +1,224 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <dftracer/utils/core/common/filesystem.h>
+#include <dftracer/utils/core/rocksdb/database.h>
+#include <dftracer/utils/core/rocksdb/db_manager.h>
+#include <dftracer/utils/core/rocksdb/filesystem.h>
+#include <dftracer/utils/core/rocksdb/key_codec.h>
+#include <doctest/doctest.h>
+#include <rocksdb/file_system.h>
+#include <testing_utilities.h>
+
+#include <array>
+#include <cstring>
+#include <memory>
+
+namespace fs = std::filesystem;
+using dftracer::utils::rocksdb::KeyBuilder;
+using dftracer::utils::rocksdb::KeyCodec;
+using dftracer::utils::rocksdb::RocksDatabase;
+using dftracer::utils::rocksdb::RocksDBManager;
+
+TEST_SUITE("RocksDBStorage") {
+    TEST_CASE("key codec round-trips big-endian integers") {
+        const std::uint32_t v32 = 0x10203040U;
+        const std::uint64_t v64 = 0x0102030405060708ULL;
+
+        CHECK(KeyCodec::decode_be32(KeyCodec::encode_be32(v32)) == v32);
+        CHECK(KeyCodec::decode_be64(KeyCodec::encode_be64(v64)) == v64);
+
+        KeyBuilder builder;
+        builder.append_tag("f|").append_be32(17).append_separator().append_be64(
+            9);
+        auto built = builder.build();
+        CHECK(built.size() == 2 + 4 + 1 + 8);
+    }
+
+    TEST_CASE("basic put/get works across column families") {
+        auto root = dft_utils_test::make_unique_test_path("rocksdb_put_get");
+        fs::create_directories(root);
+
+        RocksDatabase db((root / ".dftindex").string());
+
+        CHECK(db.is_open());
+        CHECK(db.put("hello", "world").ok());
+        CHECK(db.put("k1", "v1", "provenance").ok());
+
+        std::string value;
+        CHECK(db.get("hello", &value).ok());
+        CHECK(value == "world");
+
+        CHECK(db.get("k1", &value, "provenance").ok());
+        CHECK(value == "v1");
+    }
+
+    TEST_CASE("manager reuses one live instance per db path") {
+        auto root = dft_utils_test::make_unique_test_path("rocksdb_manager");
+        fs::create_directories(root);
+
+        auto path = (root / ".dftindex").string();
+        auto& manager = RocksDBManager::instance();
+
+        auto rw = manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(rw != nullptr);
+        REQUIRE(rw->is_open());
+        CHECK_FALSE(rw->is_read_only());
+
+        auto rw_again =
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        CHECK(rw_again == rw);
+
+        auto ro = manager.get_or_open(path, RocksDatabase::OpenMode::ReadOnly);
+        CHECK(ro == rw);
+        CHECK_FALSE(ro->is_read_only());
+    }
+
+    TEST_CASE("manager reset drops the cached instance for one path") {
+        auto root =
+            dft_utils_test::make_unique_test_path("rocksdb_manager_reset");
+        fs::create_directories(root);
+
+        auto path = (root / ".dftindex").string();
+        auto& manager = RocksDBManager::instance();
+
+        auto first =
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(first != nullptr);
+        auto* first_raw = first.get();
+
+        manager.reset(path);
+        first.reset();
+
+        auto second =
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(second != nullptr);
+        CHECK(second.get() != first_raw);
+    }
+
+    TEST_CASE("manager shutdown clears cached instances") {
+        auto root =
+            dft_utils_test::make_unique_test_path("rocksdb_manager_shutdown");
+        fs::create_directories(root);
+
+        auto path = (root / ".dftindex").string();
+        auto& manager = RocksDBManager::instance();
+
+        auto first =
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(first != nullptr);
+        auto* first_raw = first.get();
+
+        manager.shutdown();
+        first.reset();
+
+        auto second =
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(second != nullptr);
+        CHECK(second.get() != first_raw);
+    }
+
+    TEST_CASE("manager rejects read-only upgrade while handle is alive") {
+        auto root =
+            dft_utils_test::make_unique_test_path("rocksdb_manager_upgrade");
+        fs::create_directories(root);
+
+        auto path = (root / ".dftindex").string();
+        RocksDatabase seed(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(seed.is_open());
+
+        auto& manager = RocksDBManager::instance();
+        auto ro = manager.get_or_open(path, RocksDatabase::OpenMode::ReadOnly);
+        REQUIRE(ro != nullptr);
+        CHECK(ro->is_read_only());
+        CHECK_THROWS_WITH_AS(
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite),
+            doctest::Contains("still in use"), std::runtime_error);
+    }
+
+    TEST_CASE("manager rejects read-only upgrade while handle is shared") {
+        auto root = dft_utils_test::make_unique_test_path(
+            "rocksdb_manager_upgrade_shared");
+        fs::create_directories(root);
+
+        auto path = (root / ".dftindex").string();
+        RocksDatabase seed(path, RocksDatabase::OpenMode::ReadWrite);
+        REQUIRE(seed.is_open());
+
+        auto& manager = RocksDBManager::instance();
+        auto ro = manager.get_or_open(path, RocksDatabase::OpenMode::ReadOnly);
+        REQUIRE(ro != nullptr);
+        CHECK(ro->is_read_only());
+
+        auto ro_shared = ro;
+        CHECK_THROWS_WITH_AS(
+            manager.get_or_open(path, RocksDatabase::OpenMode::ReadWrite),
+            doctest::Contains("still in use"), std::runtime_error);
+    }
+
+    TEST_CASE("custom filesystem supports async read polling") {
+        auto root = dft_utils_test::make_unique_test_path("rocksdb_async_read");
+        fs::create_directories(root);
+
+        auto file_system =
+            dftracer::utils::rocksdb::make_dftracer_file_system();
+        auto test_file = (root / "async-read.bin").string();
+
+        {
+            std::unique_ptr<::rocksdb::FSWritableFile> writable;
+            REQUIRE(file_system
+                        ->NewWritableFile(test_file, ::rocksdb::FileOptions(),
+                                          &writable, nullptr)
+                        .ok());
+            const std::string payload = "abcdefghijklmnop";
+            REQUIRE(writable
+                        ->Append(::rocksdb::Slice(payload),
+                                 ::rocksdb::IOOptions(), nullptr)
+                        .ok());
+            REQUIRE(writable->Close(::rocksdb::IOOptions(), nullptr).ok());
+        }
+
+        std::unique_ptr<::rocksdb::FSRandomAccessFile> random;
+        REQUIRE(file_system
+                    ->NewRandomAccessFile(test_file, ::rocksdb::FileOptions(),
+                                          &random, nullptr)
+                    .ok());
+
+        int64_t supported_ops = 0;
+        file_system->SupportedOps(supported_ops);
+        CHECK((supported_ops & (1LL << ::rocksdb::FSSupportedOps::kAsyncIO)) !=
+              0);
+
+        std::array<char, 5> scratch{};
+        ::rocksdb::FSReadRequest request;
+        request.offset = 2;
+        request.len = 4;
+        request.scratch = scratch.data();
+
+        bool callback_called = false;
+        bool callback_status_ok = false;
+        std::string callback_result;
+        void* io_handle = nullptr;
+        ::rocksdb::IOHandleDeleter deleter;
+
+        REQUIRE(
+            random
+                ->ReadAsync(
+                    request, ::rocksdb::IOOptions(),
+                    [&callback_called, &callback_status_ok, &callback_result](
+                        ::rocksdb::FSReadRequest& completed, void*) {
+                        callback_called = true;
+                        callback_status_ok = completed.status.ok();
+                        callback_result = completed.result.ToString();
+                    },
+                    nullptr, &io_handle, &deleter, nullptr)
+                .ok());
+        REQUIRE(io_handle != nullptr);
+
+        std::vector<void*> io_handles{io_handle};
+        REQUIRE(file_system->Poll(io_handles, 1).ok());
+        CHECK(callback_called);
+        CHECK(callback_status_ok);
+        CHECK(callback_result == "cdef");
+
+        deleter(io_handle);
+    }
+}
diff --git a/tests/utilities/indexer/test_scan_prefix.cpp b/tests/utilities/indexer/test_scan_prefix.cpp
new file mode 100644
index 00000000..c5ebf575
--- /dev/null
+++ b/tests/utilities/indexer/test_scan_prefix.cpp
@@ -0,0 +1,123 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <dftracer/utils/utilities/indexer/internal/error.h>
+#include <dftracer/utils/utilities/indexer/internal/scan_prefix.h>
+#include <doctest/doctest.h>
+#include <rocksdb/iterator.h>
+#include <rocksdb/slice.h>
+#include <rocksdb/status.h>
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+using dftracer::utils::utilities::indexer::internal::IndexerError;
+using dftracer::utils::utilities::indexer::internal::scan_prefix_iterator;
+
+namespace {
+
+class FakeIterator final : public ::rocksdb::Iterator {
+   public:
+    FakeIterator(std::vector<std::pair<std::string, std::string>> entries,
+                 ::rocksdb::Status status = ::rocksdb::Status::OK())
+        : entries_(std::move(entries)), status_(std::move(status)) {}
+
+    bool Valid() const override {
+        return index_ < entries_.size() && status_.ok();
+    }
+
+    void SeekToFirst() override {
+        index_ = entries_.empty() ? entries_.size() : 0;
+    }
+
+    void SeekToLast() override {
+        index_ = entries_.empty() ? entries_.size() : entries_.size() - 1;
+    }
+
+    void Seek(const ::rocksdb::Slice& target) override {
+        const auto key = target.ToString();
+        index_ = 0;
+        while (index_ < entries_.size() && entries_[index_].first < key) {
+            ++index_;
+        }
+    }
+
+    void SeekForPrev(const ::rocksdb::Slice& target) override {
+        const auto key = target.ToString();
+        index_ = entries_.size();
+        while (index_ > 0 && entries_[index_ - 1].first > key) {
+            --index_;
+        }
+        if (index_ > 0) {
+            --index_;
+        }
+    }
+
+    void Next() override {
+        if (index_ < entries_.size()) {
+            ++index_;
+        }
+    }
+
+    void Prev() override {
+        if (index_ == 0 || entries_.empty()) {
+            index_ = entries_.size();
+            return;
+        }
+        --index_;
+    }
+
+    ::rocksdb::Slice key() const override { return entries_[index_].first; }
+
+    ::rocksdb::Slice value() const override { return entries_[index_].second; }
+
+    ::rocksdb::Status status() const override { return status_; }
+
+   private:
+    std::vector<std::pair<std::string, std::string>> entries_;
+    std::size_t index_ = 0;
+    ::rocksdb::Status status_;
+};
+
+}  // namespace
+
+TEST_SUITE("ScanPrefix") {
+    TEST_CASE("iterates matching prefix entries and stops at the first miss") {
+        std::vector<std::string> seen;
+        scan_prefix_iterator(
+            "scan failed", "ab|",
+            [] {
+                return std::make_unique<FakeIterator>(
+                    std::vector<std::pair<std::string, std::string>>{
+                        {"aa|0", "skip"},
+                        {"ab|0", "v0"},
+                        {"ab|1", "v1"},
+                        {"ac|0", "stop"},
+                    });
+            },
+            [&](::rocksdb::Iterator& it) {
+                seen.push_back(it.key().ToString());
+            });
+
+        REQUIRE(seen.size() == 2);
+        CHECK(seen[0] == "ab|0");
+        CHECK(seen[1] == "ab|1");
+    }
+
+    TEST_CASE("throws IndexerError when iterator status is non-ok") {
+        CHECK_THROWS_AS(
+            scan_prefix_iterator(
+                "scan failed", "ab|",
+                [] {
+                    return std::make_unique<FakeIterator>(
+                        std::vector<std::pair<std::string, std::string>>{
+                            {"ab|0", "v0"},
+                        },
+                        ::rocksdb::Status::IOError(
+                            "synthetic iterator failure"));
+                },
+                [](::rocksdb::Iterator&) {}),
+            IndexerError);
+    }
+}
diff --git a/tests/utilities/reader/test_trace_reader.cpp b/tests/utilities/reader/test_trace_reader.cpp
index 90e9f58b..e1a1ed0d 100644
--- a/tests/utilities/reader/test_trace_reader.cpp
+++ b/tests/utilities/reader/test_trace_reader.cpp
@@ -1,6 +1,7 @@
 #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
 #include <dftracer/utils/core/common/filesystem.h>
 #include <dftracer/utils/core/coro/task.h>
+#include <dftracer/utils/utilities/composites/dft/internal/utils.h>
 #include <dftracer/utils/utilities/indexer/index_builder_utility.h>
 #include <dftracer/utils/utilities/indexer/internal/indexer_factory.h>
 #include <dftracer/utils/utilities/reader/trace_reader.h>
@@ -16,6 +17,7 @@
 using namespace dftracer::utils::utilities::reader;
 using namespace dftracer::utils::utilities::indexer::internal;
 using namespace dftracer::utils::coro;
+using namespace dftracer::utils::utilities::composites::dft::internal;
 using namespace dft_utils_test;
 
 namespace {
@@ -88,13 +90,13 @@ TEST_SUITE("TraceReader") {
         TestEnvironment env(100);
         std::string gz_file = env.create_dft_test_gzip_file(100);
         std::string index_dir = env.get_dir();
-        std::string idx_path = env.get_index_path(gz_file);
+        std::string index_path = env.get_index_path(gz_file);
 
-        auto indexer =
-            IndexerFactory::create(gz_file, idx_path, 32 * 1024 * 1024, false);
+        auto indexer = IndexerFactory::create(gz_file, index_path,
+                                              32 * 1024 * 1024, false);
         REQUIRE(indexer != nullptr);
         indexer->build();
-        REQUIRE(fs::exists(idx_path));
+        REQUIRE(fs::exists(determine_index_path(gz_file, index_dir)));
 
         TraceReader reader({.file_path = gz_file, .index_dir = index_dir});
 
@@ -106,7 +108,7 @@ TEST_SUITE("TraceReader") {
 
         SUBCASE("Indexed and unindexed counts match") {
             // Remove the index and re-read to compare.
-            fs::remove(idx_path);
+            fs::remove_all(determine_index_path(gz_file, index_dir));
             TraceReader plain_reader({.file_path = gz_file});
             CHECK_FALSE(plain_reader.has_index());
             auto n_plain = count_lines(plain_reader.read_lines()).get();
@@ -208,13 +210,13 @@ TEST_SUITE("TraceReader") {
         TestEnvironment env(100);
         std::string gz_file = env.create_dft_test_gzip_file(100);
         std::string index_dir = env.get_dir();
-        std::string idx_path = env.get_index_path(gz_file);
+        std::string index_path = env.get_index_path(gz_file);
 
-        auto indexer =
-            IndexerFactory::create(gz_file, idx_path, 32 * 1024 * 1024, false);
+        auto indexer = IndexerFactory::create(gz_file, index_path,
+                                              32 * 1024 * 1024, false);
         REQUIRE(indexer != nullptr);
         indexer->build();
-        REQUIRE(fs::exists(idx_path));
+        REQUIRE(fs::exists(determine_index_path(gz_file, index_dir)));
 
         TraceReader reader({.file_path = gz_file, .index_dir = index_dir});
         CHECK(reader.has_index());
@@ -289,7 +291,7 @@ TEST_SUITE("TraceReader") {
         TestEnvironment env(100);
         std::string gz_file = env.create_dft_test_gzip_file(100);
         std::string index_dir = env.get_dir();
-        std::string idx_path = env.get_index_path(gz_file);
+        std::string index_path = env.get_index_path(gz_file);
 
         TraceReader plain_reader({.file_path = gz_file});
         CHECK_FALSE(plain_reader.has_index());
@@ -299,8 +301,8 @@ TEST_SUITE("TraceReader") {
         auto plain_chunks =
             count_raw_chunks(plain_reader.read_raw(single_line)).get();
 
-        auto indexer =
-            IndexerFactory::create(gz_file, idx_path, 32 * 1024 * 1024, false);
+        auto indexer = IndexerFactory::create(gz_file, index_path,
+                                              32 * 1024 * 1024, false);
         REQUIRE(indexer != nullptr);
         indexer->build();
 
@@ -438,13 +440,13 @@ TEST_SUITE("TraceReader") {
         TestEnvironment env(100);
         std::string gz_file = env.create_dft_test_gzip_file(100);
         std::string index_dir = env.get_dir();
-        std::string idx_path = env.get_index_path(gz_file);
+        std::string index_path = env.get_index_path(gz_file);
 
-        auto indexer =
-            IndexerFactory::create(gz_file, idx_path, 32 * 1024 * 1024, false);
+        auto indexer = IndexerFactory::create(gz_file, index_path,
+                                              32 * 1024 * 1024, false);
         REQUIRE(indexer != nullptr);
         indexer->build();
-        REQUIRE(fs::exists(idx_path));
+        REQUIRE(fs::exists(determine_index_path(gz_file, index_dir)));
 
         TraceReader reader({.file_path = gz_file, .index_dir = index_dir});
         CHECK(reader.has_index());