Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Changelog

## Unreleased
- Embedded section handles in `BitVectorData` and added `BitVectorDataMeta` with
`Serializable` support for both `BitVectorData` and `BitVector`, enabling
zero-copy reconstruction from arena metadata.
- Introduced a `Serializable` trait for metadata-based reconstruction and
implemented it for `CompactVector`, `DacsByte`, and `WaveletMatrix`.
- Audited `DacsByte` and `WaveletMatrix` to leverage `SectionHandle::view`
Expand Down
1 change: 0 additions & 1 deletion INVENTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
manual offset math in complex `from_bytes` implementations like `DacsByte`.
- Investigate slimming `DacsByte` per-level metadata to avoid storing unused
flag handles for the last level.

## Discovered Issues
- `katex.html` performs manual string replacements; consider DOM-based manipulation.
- Revisit zero-copy storage strategy: avoid extra copies when storing serialized bytes in structures.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ RUSTDOCFLAGS="--html-in-header katex.html" cargo doc --no-deps
is backed by `anybytes::View`. Metadata describing a stored sequence includes
[`SectionHandle`](anybytes::area::SectionHandle)s so the raw
`Bytes` returned by `ByteArea::freeze` can be handed to
`BitVectorData::from_bytes` for zero‑copy reconstruction.
`BitVectorData::from_bytes` with its `BitVectorDataMeta` for zero‑copy reconstruction.

Types following this pattern implement the [`Serializable`](src/serialization.rs) trait,
which exposes a `metadata` accessor and a `from_bytes` constructor.
Expand Down
107 changes: 97 additions & 10 deletions src/bit_vector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pub trait Select {
/// The number of bits in a machine word.
pub const WORD_LEN: usize = core::mem::size_of::<u64>() * 8;

use crate::serialization::Serializable;
use anybytes::{area::SectionHandle, ByteArea, Bytes, Section, SectionWriter, View};
use anyhow::{anyhow, Result};

Expand Down Expand Up @@ -296,11 +297,13 @@ impl<'a> BitVectorBuilder<'a> {
}

fn into_data(self) -> BitVectorData {
let handle = self.words.handle();
let words_bytes = self.words.freeze().expect("freeze section");
let words = words_bytes.view::<[u64]>().unwrap();
BitVectorData {
words,
len: self.len,
handle: Some(handle),
}
}

Expand All @@ -318,19 +321,39 @@ impl<'a> BitVectorBuilder<'a> {
}

/// Immutable bit vector data without auxiliary indexes.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub struct BitVectorData {
/// Underlying machine words storing bit data.
pub words: View<[u64]>,
/// Number of valid bits in `words`.
pub len: usize,
/// Handle to the backing words section, if available.
pub handle: Option<SectionHandle<u64>>,
}

impl PartialEq for BitVectorData {
fn eq(&self, other: &Self) -> bool {
self.words == other.words && self.len == other.len
}
}

impl Eq for BitVectorData {}

/// Metadata describing a [`BitVectorData`] stored in a [`ByteArea`].
#[derive(Debug, Clone, Copy)]
pub struct BitVectorDataMeta {
/// Number of bits stored.
pub len: usize,
/// Handle to the raw `u64` words backing the vector.
pub handle: SectionHandle<u64>,
}

impl Default for BitVectorData {
fn default() -> Self {
Self {
words: Bytes::empty().view::<[u64]>().unwrap(),
len: 0,
handle: None,
}
}
}
Expand All @@ -348,10 +371,23 @@ impl BitVectorData {
builder.into_data()
}

/// Reconstructs the data from zero-copy [`Bytes`].
pub fn from_bytes(len: usize, bytes: Bytes) -> Result<Self> {
let words = bytes.view::<[u64]>().map_err(|e| anyhow::anyhow!(e))?;
Ok(Self { words, len })
/// Serializes the data into a [`Bytes`] buffer and accompanying metadata.
/// Returns metadata describing this data.
pub fn metadata(&self) -> BitVectorDataMeta {
BitVectorDataMeta {
len: self.len,
handle: self.handle.expect("missing handle"),
}
}

/// Reconstructs the data from zero-copy [`Bytes`] and its metadata.
pub fn from_bytes(meta: BitVectorDataMeta, bytes: Bytes) -> Result<Self> {
let words = meta.handle.view(&bytes).map_err(|e| anyhow::anyhow!(e))?;
Ok(Self {
words,
len: meta.len,
handle: Some(meta.handle),
})
}

/// Returns the number of bits stored.
Expand Down Expand Up @@ -393,6 +429,18 @@ impl BitVectorData {
}
}

impl Serializable for BitVectorData {
type Meta = BitVectorDataMeta;

fn metadata(&self) -> Self::Meta {
BitVectorData::metadata(self)
}

fn from_bytes(meta: Self::Meta, bytes: Bytes) -> Result<Self> {
BitVectorData::from_bytes(meta, bytes)
}
}

impl From<BitVectorData> for BitVector<NoIndex> {
fn from(data: BitVectorData) -> Self {
BitVector::new(data, NoIndex)
Expand Down Expand Up @@ -514,14 +562,22 @@ impl BitVectorIndex for NoIndex {
}

/// Immutable bit vector data combined with an auxiliary index.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone)]
pub struct BitVector<I> {
/// Raw data bits.
pub data: BitVectorData,
/// Associated index.
pub index: I,
}

impl<I: PartialEq> PartialEq for BitVector<I> {
fn eq(&self, other: &Self) -> bool {
self.data == other.data && self.index == other.index
}
}

impl<I: Eq> Eq for BitVector<I> {}

/// Iterator over bits in a [`BitVector`].
pub struct Iter<'a, I> {
bv: &'a BitVector<I>,
Expand Down Expand Up @@ -578,6 +634,11 @@ impl<I> BitVector<I> {
pub fn to_vec(&self) -> Vec<bool> {
self.iter().collect()
}

/// Returns the handle to the backing words section, if available.
pub fn handle(&self) -> Option<SectionHandle<u64>> {
self.data.handle
}
}

impl<I: BitVectorIndex> NumBits for BitVector<I> {
Expand Down Expand Up @@ -616,6 +677,33 @@ impl<I: BitVectorIndex> Select for BitVector<I> {
}
}

impl<I: BitVectorIndex> BitVector<I> {
/// Serializes the vector into a [`Bytes`] buffer and accompanying metadata.
/// Returns metadata describing this vector's data.
pub fn metadata(&self) -> BitVectorDataMeta {
<Self as Serializable>::metadata(self)
}

/// Reconstructs the vector from zero-copy [`Bytes`] and its metadata.
pub fn from_bytes(meta: BitVectorDataMeta, bytes: Bytes) -> Result<Self> {
<Self as Serializable>::from_bytes(meta, bytes)
}
}

impl<I: BitVectorIndex> Serializable for BitVector<I> {
type Meta = BitVectorDataMeta;

fn metadata(&self) -> Self::Meta {
self.data.metadata()
}

fn from_bytes(meta: Self::Meta, bytes: Bytes) -> Result<Self> {
let data = BitVectorData::from_bytes(meta, bytes)?;
let index = I::build(&data);
Ok(BitVector::new(data, index))
}
}

pub use rank9sel::Rank9SelIndex;

#[cfg(test)]
Expand Down Expand Up @@ -659,10 +747,9 @@ mod tests {
let expected: BitVector<NoIndex> =
BitVectorData::from_bits([true, false, true, true, false]).into();
let bv: BitVector<NoIndex> = builder.freeze::<NoIndex>();
let len = bv.data.len;
let bytes = bv.data.words.clone().bytes();
let data = BitVectorData::from_bytes(len, bytes).unwrap();
let other: BitVector<NoIndex> = data.into();
let meta = bv.metadata();
let bytes = area.freeze().unwrap();
let other: BitVector<NoIndex> = BitVector::from_bytes(meta, bytes).unwrap();
assert_eq!(expected, other);
}

Expand Down
15 changes: 9 additions & 6 deletions src/char_sequences/wavelet_matrix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use anybytes::{area::SectionHandle, ByteArea, Bytes, Section, SectionWriter};
use anyhow::{anyhow, Result};

use crate::bit_vector::{
Access, BitVector, BitVectorBuilder, BitVectorData, BitVectorIndex, NumBits, Rank, Select,
Access, BitVector, BitVectorBuilder, BitVectorData, BitVectorDataMeta, BitVectorIndex, NumBits,
Rank, Select,
};
use crate::serialization::Serializable;
use crate::utils;
Expand Down Expand Up @@ -806,11 +807,13 @@ impl<I: BitVectorIndex> Serializable for WaveletMatrix<I> {
let handles_view = meta.layers.view(&bytes).map_err(anyhow::Error::from)?;
let mut layers = Vec::with_capacity(meta.alph_width);
for h in handles_view.as_ref() {
let words = h.view(&bytes).map_err(anyhow::Error::from)?;
let data = BitVectorData {
words,
len: meta.len,
};
let data = BitVectorData::from_bytes(
BitVectorDataMeta {
len: meta.len,
handle: *h,
},
bytes.clone(),
)?;
let index = I::build(&data);
layers.push(BitVector::new(data, index));
}
Expand Down
29 changes: 10 additions & 19 deletions src/int_vectors/compact_vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ use anyhow::{anyhow, Result};
use num_traits::ToPrimitive;
use std::iter::ExactSizeIterator;

use crate::bit_vector::BitVectorBuilder;
use crate::bit_vector::{BitVector, BitVectorData, NoIndex};
use crate::bit_vector::{BitVector, BitVectorBuilder, BitVectorData, BitVectorDataMeta, NoIndex};
use crate::int_vectors::prelude::*;
use crate::serialization::Serializable;
use crate::utils;
Expand Down Expand Up @@ -136,13 +135,11 @@ impl<'a> CompactVectorBuilder<'a> {
/// # }
/// ```
pub fn freeze(self) -> CompactVector {
let handle = self.chunks.handle();
let chunks: BitVector<NoIndex> = self.chunks.freeze::<NoIndex>();
CompactVector {
chunks,
len: self.len,
width: self.width,
handle,
}
}
}
Expand Down Expand Up @@ -178,16 +175,11 @@ pub struct CompactVector {
chunks: BitVector<NoIndex>,
len: usize,
width: usize,
handle: SectionHandle<u64>,
}

impl PartialEq for CompactVector {
fn eq(&self, other: &Self) -> bool {
self.chunks == other.chunks
&& self.len == other.len
&& self.width == other.width
&& self.handle.offset == other.handle.offset
&& self.handle.len == other.handle.len
self.chunks == other.chunks && self.len == other.len && self.width == other.width
}
}

Expand All @@ -198,13 +190,11 @@ impl Default for CompactVector {
let mut area = ByteArea::new().expect("byte area");
let mut sections = area.sections();
let builder = BitVectorBuilder::with_capacity(0, &mut sections).unwrap();
let handle = builder.handle();
let chunks = builder.freeze::<NoIndex>();
Self {
chunks,
len: 0,
width: 0,
handle,
}
}
}
Expand Down Expand Up @@ -455,23 +445,24 @@ impl Serializable for CompactVector {
CompactVectorMeta {
len: self.len,
width: self.width,
handle: self.handle,
handle: self.chunks.handle().expect("missing handle"),
}
}

fn from_bytes(meta: Self::Meta, bytes: Bytes) -> Result<Self> {
let data_len = meta.len * meta.width;
let words_view = meta.handle.view(&bytes).map_err(|e| anyhow!(e))?;
let data = BitVectorData {
words: words_view,
len: data_len,
};
let data = BitVectorData::from_bytes(
BitVectorDataMeta {
len: data_len,
handle: meta.handle,
},
bytes,
)?;
let chunks = BitVector::new(data, NoIndex);
Ok(Self {
chunks,
len: meta.len,
width: meta.width,
handle: meta.handle,
})
}
}
Expand Down
17 changes: 8 additions & 9 deletions src/int_vectors/dacs_byte.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,15 @@ impl<I: BitVectorIndex> Serializable for DacsByte<I> {
let mut data = Vec::with_capacity(meta.num_levels);
for (idx, info) in infos.as_ref().iter().enumerate() {
if idx + 1 < meta.num_levels {
let words = info.flag.view(&bytes).map_err(anyhow::Error::from)?;
let bv_data = bit_vector::BitVectorData {
words,
len: info.flag_bits,
};
let bv_data = bit_vector::BitVectorData::from_bytes(
bit_vector::BitVectorDataMeta {
len: info.flag_bits,
handle: info.flag,
},
bytes.clone(),
)?;
let index = I::build(&bv_data);
flags.push(bit_vector::BitVector {
data: bv_data,
index,
});
flags.push(bit_vector::BitVector::new(bv_data, index));
}
let lvl_view = info.level.view(&bytes).map_err(anyhow::Error::from)?;
data.push(lvl_view);
Expand Down