From 9ba92e1100bf448f924dcb438ef044be71d892f5 Mon Sep 17 00:00:00 2001 From: Bhekani Khumalo Date: Wed, 10 Jun 2026 20:13:31 +0100 Subject: [PATCH] refactor: consolidate the parse+schema+build query idiom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several daemon search sites repeated the same three-step dance: parse_query(str) -> MxrSchema::build() -> QueryBuilder::new(&schema).build(&ast). If the query-compilation path ever changed, every caller had to change with it. Add `mxr_search::CompiledQuery` (parses once, holds the schema, builds a fresh Tantivy query per call — paging loops consume the query each page) plus a `build_query(&str)` convenience for single-use sites. Switch the batch-mutation address loop to CompiledQuery (preserving its parse-once / schema-once behaviour) and archive_ask to build_query. A golden test asserts the helper produces byte-identical query output to the manual path, so this is a pure consolidation. --- crates/daemon/src/handler/archive_ask.rs | 4 +- crates/daemon/src/handler/mutations.rs | 5 +- crates/search/src/lib.rs | 2 +- crates/search/src/query_builder.rs | 64 +++++++++++++++++++++++- 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/crates/daemon/src/handler/archive_ask.rs b/crates/daemon/src/handler/archive_ask.rs index b5393ead..697f5cf4 100644 --- a/crates/daemon/src/handler/archive_ask.rs +++ b/crates/daemon/src/handler/archive_ask.rs @@ -258,9 +258,7 @@ async fn retrieve_candidates( let lexical_ids: Vec = if want_lexical { let query = lexical_query_with_filters(question, filters); let page = if has_structured_filters(filters) { - let ast = mxr_search::parse_query(&query).map_err(|e| e.to_string())?; - let schema = mxr_search::MxrSchema::build(); - let query = mxr_search::QueryBuilder::new(&schema).build(&ast); + let query = mxr_search::build_query(&query).map_err(|e| e.to_string())?; state .search .search_ast(query, pool_size, 0, SortOrder::Relevance) diff --git a/crates/daemon/src/handler/mutations.rs b/crates/daemon/src/handler/mutations.rs index 36d7f60d..4c617ce2 100644 --- a/crates/daemon/src/handler/mutations.rs +++ b/crates/daemon/src/handler/mutations.rs @@ -2444,10 +2444,9 @@ async fn select_sender_footprint( let mut envelopes = Vec::new(); let mut offset = 0usize; const PAGE_SIZE: usize = 500; - let ast = mxr_search::parse_query(&query).map_err(|e| e.to_string())?; - let schema = mxr_search::MxrSchema::build(); + let compiled = mxr_search::CompiledQuery::parse(&query).map_err(|e| e.to_string())?; loop { - let query_ast = mxr_search::QueryBuilder::new(&schema).build(&ast); + let query_ast = compiled.build(); let page = state .search .search_ast( diff --git a/crates/search/src/lib.rs b/crates/search/src/lib.rs index cd6205db..5932062c 100644 --- a/crates/search/src/lib.rs +++ b/crates/search/src/lib.rs @@ -25,7 +25,7 @@ pub use mail_query::{ }; pub use index::{SearchIndex, SearchPage, SearchResult}; -pub use query_builder::QueryBuilder; +pub use query_builder::{build_query, CompiledQuery, QueryBuilder}; pub use schema::MxrSchema; pub use service::{SearchIndexEntry, SearchServiceHandle, SearchUpdateBatch}; diff --git a/crates/search/src/query_builder.rs b/crates/search/src/query_builder.rs index db8a0ccc..a02a8772 100644 --- a/crates/search/src/query_builder.rs +++ b/crates/search/src/query_builder.rs @@ -1,7 +1,7 @@ use crate::schema::MxrSchema; use crate::{ - DateBound, DateValue, FilterKind, QueryField, QueryNode, RelativeUnit, SizeOp, - FILTER_OWED_REPLY, FILTER_REPLY_LATER, + parse_query, DateBound, DateValue, FilterKind, ParseError, QueryField, QueryNode, RelativeUnit, + SizeOp, FILTER_OWED_REPLY, FILTER_REPLY_LATER, }; use chrono::{Datelike, Local, NaiveDate}; use std::ops::Bound; @@ -589,6 +589,38 @@ fn normalize_message_id(value: &str) -> String { .to_ascii_lowercase() } +/// A parsed query plus the schema needed to compile it, so callers +/// don't repeat the `parse_query` + `MxrSchema::build` + `QueryBuilder` +/// dance at every search site. Holds the schema so it isn't rebuilt per +/// call in a paging loop — Tantivy queries are consumed by `search_ast`, +/// so each page needs a fresh `build()`. +pub struct CompiledQuery { + schema: MxrSchema, + ast: QueryNode, +} + +impl CompiledQuery { + /// Parse a query string and capture the schema once. + pub fn parse(query: &str) -> Result { + Ok(Self { + schema: MxrSchema::build(), + ast: parse_query(query)?, + }) + } + + /// Produce a fresh Tantivy query for one search call. + pub fn build(&self) -> Box { + QueryBuilder::new(&self.schema).build(&self.ast) + } +} + +/// Parse and compile a query string into a Tantivy query in one call. +/// For a paging loop, use [`CompiledQuery::parse`] once and `build()` +/// per page instead so the schema isn't rebuilt each iteration. +pub fn build_query(query: &str) -> Result, ParseError> { + Ok(CompiledQuery::parse(query)?.build()) +} + #[cfg(test)] mod tests { #![expect( @@ -601,6 +633,34 @@ mod tests { use crate::test_fixtures::TestEnvelopeBuilder; use crate::{parse_query, ParseError}; use mxr_core::types::*; + + #[test] + fn build_query_matches_the_manual_parse_schema_build_path() { + for query in [ + "from:alice@example.com", + "is:unread subject:report", + "has:link-heavy", + ] { + let schema = MxrSchema::build(); + let manual = QueryBuilder::new(&schema).build(&parse_query(query).unwrap()); + let helper = build_query(query).unwrap(); + assert_eq!( + format!("{manual:?}"), + format!("{helper:?}"), + "build_query must match the manual path for {query:?}" + ); + // CompiledQuery builds an equivalent query and can do so + // repeatedly (each call yields a fresh boxed query). + let compiled = CompiledQuery::parse(query).unwrap(); + assert_eq!(format!("{:?}", compiled.build()), format!("{manual:?}")); + assert_eq!(format!("{:?}", compiled.build()), format!("{manual:?}")); + } + } + + #[test] + fn build_query_propagates_parse_errors() { + assert!(build_query("subject:(unbalanced").is_err()); + } use proptest::prelude::*; fn make_test_envelope(