From 3222f50d46214f6d515c678fc264d2511bdf767c Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 18 Jul 2025 20:44:16 +0100 Subject: [PATCH 1/8] Extract CST builder --- src/parser/cst_builder.rs | 442 ++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 432 +------------------------------------ 2 files changed, 448 insertions(+), 426 deletions(-) create mode 100644 src/parser/cst_builder.rs diff --git a/src/parser/cst_builder.rs b/src/parser/cst_builder.rs new file mode 100644 index 00000000..95e597f3 --- /dev/null +++ b/src/parser/cst_builder.rs @@ -0,0 +1,442 @@ +//! CST construction utilities. +//! +//! This module builds a `rowan::GreenNode` from a sequence of tokens and +//! statement spans. It validates span ordering and provides the [`Parsed`] and +//! [`ParsedSpans`] types used by the parser entry point. + +use chumsky::error::Simple; +use log::warn; +use rowan::{GreenNode, GreenNodeBuilder, Language}; + +use crate::{DdlogLanguage, Span, SyntaxKind}; + +/// Result of a parse operation. +#[derive(Debug)] +pub struct Parsed { + green: GreenNode, + root: super::ast::Root, + errors: Vec>, +} + +impl Parsed { + pub(super) fn new( + green: GreenNode, + root: super::ast::Root, + errors: Vec>, + ) -> Self { + Self { + green, + root, + errors, + } + } + + /// Access the `rowan` green tree. + #[must_use] + pub fn green(&self) -> &GreenNode { + &self.green + } + + /// Access the typed AST root. + #[must_use] + pub fn root(&self) -> &super::ast::Root { + &self.root + } + + /// Access parser errors collected during recovery. + #[must_use] + pub fn errors(&self) -> &[Simple] { + &self.errors + } +} + +/// Spans for each parsed statement category. +/// +/// Instances are constructed via [`ParsedSpans::new`] to ensure span lists are +/// sorted and non-overlapping in debug builds. +#[non_exhaustive] +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ParsedSpans { + /// `import` statement spans. + imports: Vec, + /// `typedef` statement spans. + typedefs: Vec, + /// `relation` declaration spans. + relations: Vec, + /// `index` declaration spans. + indexes: Vec, + /// `function` definition spans. + functions: Vec, + /// `transformer` declaration spans. + transformers: Vec, + /// Rule spans. + rules: Vec, +} + +impl ParsedSpans { + /// Construct a new [`ParsedSpans`]. + /// + /// The caller must provide span lists that are sorted and free from + /// overlaps. In debug builds every list is validated and the function will + /// panic if any ordering violation is detected. + #[must_use] + pub fn new( + imports: Vec, + typedefs: Vec, + relations: Vec, + indexes: Vec, + functions: Vec, + transformers: Vec, + rules: Vec, + ) -> Self { + if cfg!(debug_assertions) { + ensure_span_lists_sorted(&[ + ("imports", &imports), + ("typedefs", &typedefs), + ("relations", &relations), + ("indexes", &indexes), + ("functions", &functions), + ("transformers", &transformers), + ("rules", &rules), + ]); + } + + Self { + imports, + typedefs, + relations, + indexes, + functions, + transformers, + rules, + } + } + + /// Access `import` statement spans. + #[must_use] + pub fn imports(&self) -> &[Span] { + &self.imports + } + + /// Access `typedef` statement spans. + #[must_use] + pub fn typedefs(&self) -> &[Span] { + &self.typedefs + } + + /// Access `relation` declaration spans. + #[must_use] + pub fn relations(&self) -> &[Span] { + &self.relations + } + + /// Access `index` declaration spans. + #[must_use] + pub fn indexes(&self) -> &[Span] { + &self.indexes + } + + /// Access `function` definition spans. + #[must_use] + pub fn functions(&self) -> &[Span] { + &self.functions + } + + /// Access `transformer` declaration spans. + #[must_use] + pub fn transformers(&self) -> &[Span] { + &self.transformers + } + + /// Access rule spans. + #[must_use] + pub fn rules(&self) -> &[Span] { + &self.rules + } +} + +/// Construct the CST from the token stream and recorded statement spans. +/// +/// Span lists must be sorted and non-overlapping so that tokens are wrapped +/// into well-formed nodes. Validation occurs in debug builds when +/// [`ParsedSpans`] is created. +pub(super) fn build_green_tree( + tokens: &[(SyntaxKind, Span)], + src: &str, + spans: &ParsedSpans, +) -> GreenNode { + let mut builder = GreenNodeBuilder::new(); + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); + + let mut import_iter = spans.imports().iter().peekable(); + let mut typedef_iter = spans.typedefs().iter().peekable(); + let mut relation_iter = spans.relations().iter().peekable(); + let mut index_iter = spans.indexes().iter().peekable(); + let mut function_iter = spans.functions().iter().peekable(); + let mut transformer_iter = spans.transformers().iter().peekable(); + let mut rule_iter = spans.rules().iter().peekable(); + + for &(kind, ref span) in tokens { + advance_span_iter(&mut import_iter, span.start); + advance_span_iter(&mut typedef_iter, span.start); + advance_span_iter(&mut relation_iter, span.start); + advance_span_iter(&mut index_iter, span.start); + advance_span_iter(&mut function_iter, span.start); + advance_span_iter(&mut transformer_iter, span.start); + advance_span_iter(&mut rule_iter, span.start); + + start_nodes( + &mut builder, + &mut [ + (&mut import_iter, SyntaxKind::N_IMPORT_STMT), + (&mut typedef_iter, SyntaxKind::N_TYPE_DEF), + (&mut relation_iter, SyntaxKind::N_RELATION_DECL), + (&mut index_iter, SyntaxKind::N_INDEX), + (&mut function_iter, SyntaxKind::N_FUNCTION), + (&mut transformer_iter, SyntaxKind::N_TRANSFORMER), + (&mut rule_iter, SyntaxKind::N_RULE), + ], + span.start, + ); + + push_token(&mut builder, kind, span, src); + + finish_nodes( + &mut builder, + &mut [ + &mut import_iter, + &mut typedef_iter, + &mut relation_iter, + &mut index_iter, + &mut function_iter, + &mut transformer_iter, + &mut rule_iter, + ], + span.end, + ); + } + + builder.finish_node(); + builder.finish() +} + +fn advance_span_iter(iter: &mut std::iter::Peekable>, pos: usize) { + while let Some(next) = iter.peek() { + if pos >= next.end { + iter.next(); + } else { + break; + } + } +} + +fn maybe_start( + builder: &mut GreenNodeBuilder, + iter: &mut std::iter::Peekable>, + pos: usize, + kind: SyntaxKind, +) { + if iter.peek().is_some_and(|current| pos == current.start) { + builder.start_node(DdlogLanguage::kind_to_raw(kind)); + } +} + +fn maybe_finish( + builder: &mut GreenNodeBuilder, + iter: &mut std::iter::Peekable>, + pos: usize, +) { + if iter.peek().is_some_and(|current| pos >= current.end) { + builder.finish_node(); + iter.next(); + } +} + +type SpanIter<'a> = std::iter::Peekable>; + +fn start_nodes( + builder: &mut GreenNodeBuilder, + pairs: &mut [(&mut SpanIter<'_>, SyntaxKind)], + pos: usize, +) { + for (iter, kind) in pairs.iter_mut() { + maybe_start(builder, iter, pos, *kind); + } +} + +fn finish_nodes(builder: &mut GreenNodeBuilder, iters: &mut [&mut SpanIter<'_>], pos: usize) { + for iter in iters.iter_mut() { + maybe_finish(builder, iter, pos); + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct SpanOrderError { + prev: Span, + next: Span, +} + +impl std::fmt::Display for SpanOrderError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "spans overlap or are unsorted: {:?} then {:?}", + self.prev, self.next + ) + } +} + +impl std::error::Error for SpanOrderError {} + +fn validate_spans_sorted(spans: &[Span]) -> Result<(), SpanOrderError> { + for pair in spans.windows(2) { + let [first, second] = pair else { continue }; + if first.end > second.start { + return Err(SpanOrderError { + prev: first.clone(), + next: second.clone(), + }); + } + } + Ok(()) +} + +fn ensure_span_lists_sorted(lists: &[(&str, &[Span])]) { + let mut errors = Vec::new(); + for (name, spans) in lists { + if let Err(e) = validate_spans_sorted(spans) { + errors.push(format!("{name} not sorted: {e}")); + } + } + assert!(errors.is_empty(), "{}", errors.join("\n")); +} + +fn push_token(builder: &mut GreenNodeBuilder, kind: SyntaxKind, span: &Span, src: &str) { + let text = src.get(span.clone()).map_or_else( + || { + warn!( + "token span {:?} out of bounds for source of length {}", + span, + src.len() + ); + "" + }, + |t| t, + ); + + let raw = DdlogLanguage::kind_to_raw(kind); + if kind == SyntaxKind::N_ERROR { + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); + } + builder.token(raw, text); + if kind == SyntaxKind::N_ERROR { + builder.finish_node(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tokenize; + use rstest::rstest; + + #[test] + fn validate_spans_sorted_err_on_overlap() { + let spans = vec![0..5, 4..8]; + let result = validate_spans_sorted(&spans); + assert!(result.is_err()); + } + + #[test] + fn validate_spans_sorted_err_on_unsorted() { + let spans = vec![5..10, 0..2]; + let result = validate_spans_sorted(&spans); + assert!(result.is_err()); + } + + #[test] + fn validate_spans_sorted_ok_on_empty() { + let spans: Vec = Vec::new(); + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn validate_spans_sorted_ok_on_single() { + let spans: Vec = vec![std::ops::Range { start: 0, end: 3 }]; + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn validate_spans_sorted_ok_on_sorted() { + let spans = vec![0..2, 3..5, 5..8]; + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn build_green_tree_panics_on_misordered_spans() { + let unsorted = vec![1..2, 0..1]; + let result = std::panic::catch_unwind(|| { + let _ = ParsedSpans::new( + unsorted, + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + ); + }); + let Err(msg) = result else { + panic!("expected panic") + }; + let text = msg.downcast_ref::().map_or_else( + || { + msg.downcast_ref::<&str>() + .map_or(String::new(), |s| (*s).to_string()) + }, + Clone::clone, + ); + assert!(text.contains("imports not sorted")); + assert!(text.contains("0..1")); + } + + #[test] + fn build_green_tree_reports_all_errors() { + let imports = vec![1..2, 0..1]; + let typedefs = vec![4..5, 3..4]; + let result = std::panic::catch_unwind(|| { + let _ = ParsedSpans::new( + imports, + typedefs, + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + Vec::new(), + ); + }); + let Err(msg) = result else { + panic!("expected panic") + }; + let text = msg.downcast_ref::().map_or_else( + || { + msg.downcast_ref::<&str>() + .map_or(String::new(), |s| (*s).to_string()) + }, + Clone::clone, + ); + assert!(text.contains("imports not sorted")); + assert!(text.contains("typedefs not sorted")); + } + + #[rstest] + fn build_green_tree_round_trip() { + let src = "import foo::bar;"; + let tokens = tokenize(src); + let (spans, errors) = super::super::span_scanner::parse_tokens(&tokens, src); + assert!(errors.is_empty()); + let green = build_green_tree(&tokens, src, &spans); + let root = super::super::ast::Root::from_green(green); + assert_eq!(root.text(), src); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7c14ab90..7031cf56 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6,11 +6,7 @@ //! and rules. It lays down the framework for integrating `chumsky` combinators //! and error recovery in later stages. -use chumsky::prelude::*; -use log::warn; -use rowan::{GreenNode, GreenNodeBuilder, Language}; - -use crate::{DdlogLanguage, Span, SyntaxKind, tokenize}; +use crate::tokenize; #[macro_use] mod lexer_helpers; @@ -21,143 +17,9 @@ mod span_collector; mod span_scanner; use span_scanner::parse_tokens; - -/// Result of a parse operation. -#[derive(Debug)] -pub struct Parsed { - green: GreenNode, - root: ast::Root, - errors: Vec>, -} - -/// Spans for each parsed statement category. -/// -/// Instances are constructed via [`ParsedSpans::new`] to guarantee the -/// internal lists remain sorted and non-overlapping. -#[non_exhaustive] -#[derive(Debug, Default, Clone, PartialEq)] -pub struct ParsedSpans { - /// `import` statement spans. - imports: Vec, - /// `typedef` statement spans. - typedefs: Vec, - /// `relation` declaration spans. - relations: Vec, - /// `index` declaration spans. - indexes: Vec, - /// `function` definition spans. - functions: Vec, - /// `transformer` declaration spans. - transformers: Vec, - /// Rule spans. - rules: Vec, -} - -impl ParsedSpans { - /// Construct a new `ParsedSpans`. - /// - /// The caller must ensure each span list is sorted and does not overlap - /// with itself. - /// - /// # Panics - /// - /// Panics in debug builds if any span list is unsorted or contains - /// overlapping spans. - #[must_use] - pub fn new( - imports: Vec, - typedefs: Vec, - relations: Vec, - indexes: Vec, - functions: Vec, - transformers: Vec, - rules: Vec, - ) -> Self { - if cfg!(debug_assertions) { - ensure_span_lists_sorted(&[ - ("imports", &imports), - ("typedefs", &typedefs), - ("relations", &relations), - ("indexes", &indexes), - ("functions", &functions), - ("transformers", &transformers), - ("rules", &rules), - ]); - } - - Self { - imports, - typedefs, - relations, - indexes, - functions, - transformers, - rules, - } - } - - /// Access `import` statement spans. - #[must_use] - pub fn imports(&self) -> &[Span] { - &self.imports - } - - /// Access `typedef` statement spans. - #[must_use] - pub fn typedefs(&self) -> &[Span] { - &self.typedefs - } - - /// Access `relation` declaration spans. - #[must_use] - pub fn relations(&self) -> &[Span] { - &self.relations - } - - /// Access `index` declaration spans. - #[must_use] - pub fn indexes(&self) -> &[Span] { - &self.indexes - } - - /// Access `function` definition spans. - #[must_use] - pub fn functions(&self) -> &[Span] { - &self.functions - } - - /// Access `transformer` declaration spans. - #[must_use] - pub fn transformers(&self) -> &[Span] { - &self.transformers - } - - /// Access rule spans. - #[must_use] - pub fn rules(&self) -> &[Span] { - &self.rules - } -} - -impl Parsed { - /// Access the `rowan` green tree. - #[must_use] - pub fn green(&self) -> &GreenNode { - &self.green - } - - /// Access the typed AST root. - #[must_use] - pub fn root(&self) -> &ast::Root { - &self.root - } - - /// Access parser errors collected during recovery. - #[must_use] - pub fn errors(&self) -> &[Simple] { - &self.errors - } -} +mod cst_builder; +use cst_builder::build_green_tree; +pub use cst_builder::{Parsed, ParsedSpans}; /// Parse the provided source string. /// @@ -182,199 +44,7 @@ pub fn parse(src: &str) -> Parsed { let green = build_green_tree(&tokens, src, &spans); let root = ast::Root::from_green(green.clone()); - Parsed { - green, - root, - errors, - } -} - -/// Construct the CST from the token stream and recorded statement spans. -/// -/// `spans.imports()` and `spans.typedefs()` must be sorted and non-overlapping so -/// that tokens are wrapped into well-formed nodes during tree construction. -/// Spans are validated in debug builds when [`ParsedSpans`] is constructed. -fn build_green_tree(tokens: &[(SyntaxKind, Span)], src: &str, spans: &ParsedSpans) -> GreenNode { - let mut builder = GreenNodeBuilder::new(); - builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); - - let mut import_iter = spans.imports().iter().peekable(); - let mut typedef_iter = spans.typedefs().iter().peekable(); - let mut relation_iter = spans.relations().iter().peekable(); - let mut index_iter = spans.indexes().iter().peekable(); - let mut function_iter = spans.functions().iter().peekable(); - let mut transformer_iter = spans.transformers().iter().peekable(); - let mut rule_iter = spans.rules().iter().peekable(); - - for &(kind, ref span) in tokens { - advance_span_iter(&mut import_iter, span.start); - advance_span_iter(&mut typedef_iter, span.start); - advance_span_iter(&mut relation_iter, span.start); - advance_span_iter(&mut index_iter, span.start); - advance_span_iter(&mut function_iter, span.start); - advance_span_iter(&mut transformer_iter, span.start); - advance_span_iter(&mut rule_iter, span.start); - - start_nodes( - &mut builder, - &mut [ - (&mut import_iter, SyntaxKind::N_IMPORT_STMT), - (&mut typedef_iter, SyntaxKind::N_TYPE_DEF), - (&mut relation_iter, SyntaxKind::N_RELATION_DECL), - (&mut index_iter, SyntaxKind::N_INDEX), - (&mut function_iter, SyntaxKind::N_FUNCTION), - (&mut transformer_iter, SyntaxKind::N_TRANSFORMER), - (&mut rule_iter, SyntaxKind::N_RULE), - ], - span.start, - ); - - push_token(&mut builder, kind, span, src); - - finish_nodes( - &mut builder, - &mut [ - &mut import_iter, - &mut typedef_iter, - &mut relation_iter, - &mut index_iter, - &mut function_iter, - &mut transformer_iter, - &mut rule_iter, - ], - span.end, - ); - } - - builder.finish_node(); - builder.finish() -} - -/// Move the iterator forward past any spans that end before `pos`. -/// -/// This keeps the peeked span aligned with the current token position. -fn advance_span_iter(iter: &mut std::iter::Peekable>, pos: usize) { - while let Some(next) = iter.peek() { - if pos >= next.end { - iter.next(); - } else { - break; - } - } -} - -/// Start a new syntax node if the current position matches the start of a span. -fn maybe_start( - builder: &mut GreenNodeBuilder, - iter: &mut std::iter::Peekable>, - pos: usize, - kind: SyntaxKind, -) { - if iter.peek().is_some_and(|current| pos == current.start) { - builder.start_node(DdlogLanguage::kind_to_raw(kind)); - } -} - -/// Finish the active syntax node when the current position reaches its end. -fn maybe_finish( - builder: &mut GreenNodeBuilder, - iter: &mut std::iter::Peekable>, - pos: usize, -) { - if iter.peek().is_some_and(|current| pos >= current.end) { - builder.finish_node(); - iter.next(); - } -} - -type SpanIter<'a> = std::iter::Peekable>; - -fn start_nodes( - builder: &mut GreenNodeBuilder, - pairs: &mut [(&mut SpanIter<'_>, SyntaxKind)], - pos: usize, -) { - for (iter, kind) in pairs.iter_mut() { - maybe_start(builder, iter, pos, *kind); - } -} - -fn finish_nodes(builder: &mut GreenNodeBuilder, iters: &mut [&mut SpanIter<'_>], pos: usize) { - for iter in iters.iter_mut() { - maybe_finish(builder, iter, pos); - } -} - -/// Validate that spans are sorted and non-overlapping. -/// -/// Returns an error describing the offending pair if any two consecutive spans -/// overlap or are out of order. This helps callers diagnose invalid span lists -/// before corrupting the CST. -#[derive(Debug, Clone, PartialEq, Eq)] -struct SpanOrderError { - prev: Span, - next: Span, -} - -impl std::fmt::Display for SpanOrderError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "spans overlap or are unsorted: {:?} then {:?}", - self.prev, self.next - ) - } -} - -impl std::error::Error for SpanOrderError {} - -fn validate_spans_sorted(spans: &[Span]) -> Result<(), SpanOrderError> { - for pair in spans.windows(2) { - let [first, second] = pair else { continue }; - if first.end > second.start { - return Err(SpanOrderError { - prev: first.clone(), - next: second.clone(), - }); - } - } - Ok(()) -} - -/// Panics if any span list is misordered, aggregating all violations. -fn ensure_span_lists_sorted(lists: &[(&str, &[Span])]) { - let mut errors = Vec::new(); - for (name, spans) in lists { - if let Err(e) = validate_spans_sorted(spans) { - errors.push(format!("{name} not sorted: {e}")); - } - } - assert!(errors.is_empty(), "{}", errors.join("\n")); -} - -/// Push a token to the tree, wrapping `N_ERROR` tokens in an error node. -fn push_token(builder: &mut GreenNodeBuilder, kind: SyntaxKind, span: &Span, src: &str) { - // `Span` is cloned because `str::get` takes the range by value. - let text = src.get(span.clone()).map_or_else( - || { - warn!( - "token span {:?} out of bounds for source of length {}", - span, - src.len() - ); - "" - }, - |t| t, - ); - - let raw = DdlogLanguage::kind_to_raw(kind); - if kind == SyntaxKind::N_ERROR { - builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); - } - builder.token(raw, text); - if kind == SyntaxKind::N_ERROR { - builder.finish_node(); - } + Parsed::new(green, root, errors) } pub mod ast { @@ -1349,8 +1019,7 @@ pub mod ast { mod tests { mod parser; use super::token_stream::TokenStream; - use super::*; - use crate::tokenize; + use crate::{SyntaxKind, tokenize}; use rstest::rstest; /// Tests that `skip_until` advances the token stream cursor past the specified span end. @@ -1446,93 +1115,4 @@ mod tests { let start = tokens.len(); assert_eq!(stream.line_end(start), src.len()); } - - #[test] - fn validate_spans_sorted_err_on_overlap() { - let spans = vec![0..5, 4..8]; - let result = super::validate_spans_sorted(&spans); - assert!(result.is_err()); - } - - #[test] - fn validate_spans_sorted_err_on_unsorted() { - let spans = vec![5..10, 0..2]; - let result = super::validate_spans_sorted(&spans); - assert!(result.is_err()); - } - - #[test] - fn validate_spans_sorted_ok_on_empty() { - let spans: Vec = Vec::new(); - assert!(super::validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn validate_spans_sorted_ok_on_single() { - let spans: Vec = vec![std::ops::Range { start: 0, end: 3 }]; - assert!(super::validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn validate_spans_sorted_ok_on_sorted() { - let spans = vec![0..2, 3..5, 5..8]; - assert!(super::validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn build_green_tree_panics_on_misordered_spans() { - let unsorted = vec![1..2, 0..1]; - let result = std::panic::catch_unwind(|| { - let _ = super::ParsedSpans::new( - unsorted, - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - ); - }); - let Err(msg) = result else { - panic!("expected panic") - }; - let text = msg.downcast_ref::().map_or_else( - || { - msg.downcast_ref::<&str>() - .map_or(String::new(), |s| (*s).to_string()) - }, - Clone::clone, - ); - assert!(text.contains("imports not sorted")); - assert!(text.contains("0..1")); - } - - #[test] - fn build_green_tree_reports_all_errors() { - let imports = vec![1..2, 0..1]; - let typedefs = vec![4..5, 3..4]; - let result = std::panic::catch_unwind(|| { - let _ = super::ParsedSpans::new( - imports, - typedefs, - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - ); - }); - let Err(msg) = result else { - panic!("expected panic") - }; - let text = msg.downcast_ref::().map_or_else( - || { - msg.downcast_ref::<&str>() - .map_or(String::new(), |s| (*s).to_string()) - }, - Clone::clone, - ); - assert!(text.contains("imports not sorted")); - assert!(text.contains("typedefs not sorted")); - } } From f47eb3668488c08fdad667476716dd81ae415ecf Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 18 Jul 2025 21:13:26 +0100 Subject: [PATCH 2/8] Add span builder and deduplicate tests --- src/parser/cst_builder.rs | 200 +++++++++++++++++++++++++++---------- src/parser/span_scanner.rs | 18 ++-- 2 files changed, 155 insertions(+), 63 deletions(-) diff --git a/src/parser/cst_builder.rs b/src/parser/cst_builder.rs index 95e597f3..88fda7b8 100644 --- a/src/parser/cst_builder.rs +++ b/src/parser/cst_builder.rs @@ -52,8 +52,8 @@ impl Parsed { /// Spans for each parsed statement category. /// -/// Instances are constructed via [`ParsedSpans::new`] to ensure span lists are -/// sorted and non-overlapping in debug builds. +/// Instances are constructed via [`ParsedSpans::builder`] to ensure span lists +/// are sorted and non-overlapping in debug builds. #[non_exhaustive] #[derive(Debug, Default, Clone, PartialEq)] pub struct ParsedSpans { @@ -73,14 +73,100 @@ pub struct ParsedSpans { rules: Vec, } +/// Builder for [`ParsedSpans`]. +#[derive(Default)] +pub struct ParsedSpansBuilder { + imports: Vec, + typedefs: Vec, + relations: Vec, + indexes: Vec, + functions: Vec, + transformers: Vec, + rules: Vec, +} + +impl ParsedSpansBuilder { + /// Set the `import` statement spans. + #[must_use] + pub fn imports(mut self, spans: Vec) -> Self { + self.imports = spans; + self + } + + /// Set the `typedef` statement spans. + #[must_use] + pub fn typedefs(mut self, spans: Vec) -> Self { + self.typedefs = spans; + self + } + + /// Set the `relation` declaration spans. + #[must_use] + pub fn relations(mut self, spans: Vec) -> Self { + self.relations = spans; + self + } + + /// Set the `index` declaration spans. + #[must_use] + pub fn indexes(mut self, spans: Vec) -> Self { + self.indexes = spans; + self + } + + /// Set the `function` definition spans. + #[must_use] + pub fn functions(mut self, spans: Vec) -> Self { + self.functions = spans; + self + } + + /// Set the `transformer` declaration spans. + #[must_use] + pub fn transformers(mut self, spans: Vec) -> Self { + self.transformers = spans; + self + } + + /// Set the rule spans. + #[must_use] + pub fn rules(mut self, spans: Vec) -> Self { + self.rules = spans; + self + } + + /// Build the [`ParsedSpans`]. + #[must_use] + pub fn build(self) -> ParsedSpans { + ParsedSpans::new( + self.imports, + self.typedefs, + self.relations, + self.indexes, + self.functions, + self.transformers, + self.rules, + ) + } +} + impl ParsedSpans { - /// Construct a new [`ParsedSpans`]. + /// Start building a [`ParsedSpans`] instance. + /// + /// # Examples /// - /// The caller must provide span lists that are sorted and free from - /// overlaps. In debug builds every list is validated and the function will - /// panic if any ordering violation is detected. + /// ```no_run + /// use ddlint::parser::cst_builder::ParsedSpans; + /// + /// let spans = ParsedSpans::builder().build(); + /// assert!(spans.imports().is_empty()); + /// ``` #[must_use] - pub fn new( + pub fn builder() -> ParsedSpansBuilder { + ParsedSpansBuilder::default() + } + + fn new( imports: Vec, typedefs: Vec, relations: Vec, @@ -230,15 +316,34 @@ fn advance_span_iter(iter: &mut std::iter::Peekable>, } } +fn maybe_execute_on_span( + builder: &mut GreenNodeBuilder, + iter: &mut std::iter::Peekable>, + pos: usize, + condition: C, + mut action: F, +) where + C: Fn(usize, &Span) -> bool, + F: FnMut(&mut GreenNodeBuilder, &mut std::iter::Peekable>), +{ + if iter.peek().is_some_and(|current| condition(pos, current)) { + action(builder, iter); + } +} + fn maybe_start( builder: &mut GreenNodeBuilder, iter: &mut std::iter::Peekable>, pos: usize, kind: SyntaxKind, ) { - if iter.peek().is_some_and(|current| pos == current.start) { - builder.start_node(DdlogLanguage::kind_to_raw(kind)); - } + maybe_execute_on_span( + builder, + iter, + pos, + |p, current| p == current.start, + |b, _| b.start_node(DdlogLanguage::kind_to_raw(kind)), + ); } fn maybe_finish( @@ -246,10 +351,16 @@ fn maybe_finish( iter: &mut std::iter::Peekable>, pos: usize, ) { - if iter.peek().is_some_and(|current| pos >= current.end) { - builder.finish_node(); - iter.next(); - } + maybe_execute_on_span( + builder, + iter, + pos, + |p, current| p >= current.end, + |b, it| { + b.finish_node(); + it.next(); + }, + ); } type SpanIter<'a> = std::iter::Peekable>; @@ -340,6 +451,20 @@ mod tests { use crate::tokenize; use rstest::rstest; + fn assert_panic_with_message(f: F) -> String + where + F: FnOnce() + std::panic::UnwindSafe, + { + let result = std::panic::catch_unwind(f); + let Err(err) = result else { + panic!("expected panic") + }; + err.downcast_ref::() + .cloned() + .or_else(|| err.downcast_ref::<&str>().map(|s| (*s).to_string())) + .unwrap_or_default() + } + #[test] fn validate_spans_sorted_err_on_overlap() { let spans = vec![0..5, 4..8]; @@ -375,27 +500,9 @@ mod tests { #[test] fn build_green_tree_panics_on_misordered_spans() { let unsorted = vec![1..2, 0..1]; - let result = std::panic::catch_unwind(|| { - let _ = ParsedSpans::new( - unsorted, - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - ); + let text = assert_panic_with_message(|| { + let _ = ParsedSpans::builder().imports(unsorted).build(); }); - let Err(msg) = result else { - panic!("expected panic") - }; - let text = msg.downcast_ref::().map_or_else( - || { - msg.downcast_ref::<&str>() - .map_or(String::new(), |s| (*s).to_string()) - }, - Clone::clone, - ); assert!(text.contains("imports not sorted")); assert!(text.contains("0..1")); } @@ -404,27 +511,12 @@ mod tests { fn build_green_tree_reports_all_errors() { let imports = vec![1..2, 0..1]; let typedefs = vec![4..5, 3..4]; - let result = std::panic::catch_unwind(|| { - let _ = ParsedSpans::new( - imports, - typedefs, - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - Vec::new(), - ); + let text = assert_panic_with_message(|| { + let _ = ParsedSpans::builder() + .imports(imports) + .typedefs(typedefs) + .build(); }); - let Err(msg) = result else { - panic!("expected panic") - }; - let text = msg.downcast_ref::().map_or_else( - || { - msg.downcast_ref::<&str>() - .map_or(String::new(), |s| (*s).to_string()) - }, - Clone::clone, - ); assert!(text.contains("imports not sorted")); assert!(text.contains("typedefs not sorted")); } diff --git a/src/parser/span_scanner.rs b/src/parser/span_scanner.rs index b62798ce..9734e0a3 100644 --- a/src/parser/span_scanner.rs +++ b/src/parser/span_scanner.rs @@ -36,15 +36,15 @@ pub(super) fn parse_tokens( all_errors.extend(rule_errors); ( - ParsedSpans::new( - import_spans, - typedef_spans, - relation_spans, - index_spans, - function_spans, - transformer_spans, - rule_spans, - ), + ParsedSpans::builder() + .imports(import_spans) + .typedefs(typedef_spans) + .relations(relation_spans) + .indexes(index_spans) + .functions(function_spans) + .transformers(transformer_spans) + .rules(rule_spans) + .build(), all_errors, ) } From 4307277e8f74b0e26b6a191fa7a2c0eb2ee7f3cb Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 18 Jul 2025 22:35:19 +0100 Subject: [PATCH 3/8] Split CST builder into modules --- src/parser/cst_builder.rs | 534 -------------------------------- src/parser/cst_builder/mod.rs | 54 ++++ src/parser/cst_builder/spans.rs | 301 ++++++++++++++++++ src/parser/cst_builder/tree.rs | 118 +++++++ 4 files changed, 473 insertions(+), 534 deletions(-) delete mode 100644 src/parser/cst_builder.rs create mode 100644 src/parser/cst_builder/mod.rs create mode 100644 src/parser/cst_builder/spans.rs create mode 100644 src/parser/cst_builder/tree.rs diff --git a/src/parser/cst_builder.rs b/src/parser/cst_builder.rs deleted file mode 100644 index 88fda7b8..00000000 --- a/src/parser/cst_builder.rs +++ /dev/null @@ -1,534 +0,0 @@ -//! CST construction utilities. -//! -//! This module builds a `rowan::GreenNode` from a sequence of tokens and -//! statement spans. It validates span ordering and provides the [`Parsed`] and -//! [`ParsedSpans`] types used by the parser entry point. - -use chumsky::error::Simple; -use log::warn; -use rowan::{GreenNode, GreenNodeBuilder, Language}; - -use crate::{DdlogLanguage, Span, SyntaxKind}; - -/// Result of a parse operation. -#[derive(Debug)] -pub struct Parsed { - green: GreenNode, - root: super::ast::Root, - errors: Vec>, -} - -impl Parsed { - pub(super) fn new( - green: GreenNode, - root: super::ast::Root, - errors: Vec>, - ) -> Self { - Self { - green, - root, - errors, - } - } - - /// Access the `rowan` green tree. - #[must_use] - pub fn green(&self) -> &GreenNode { - &self.green - } - - /// Access the typed AST root. - #[must_use] - pub fn root(&self) -> &super::ast::Root { - &self.root - } - - /// Access parser errors collected during recovery. - #[must_use] - pub fn errors(&self) -> &[Simple] { - &self.errors - } -} - -/// Spans for each parsed statement category. -/// -/// Instances are constructed via [`ParsedSpans::builder`] to ensure span lists -/// are sorted and non-overlapping in debug builds. -#[non_exhaustive] -#[derive(Debug, Default, Clone, PartialEq)] -pub struct ParsedSpans { - /// `import` statement spans. - imports: Vec, - /// `typedef` statement spans. - typedefs: Vec, - /// `relation` declaration spans. - relations: Vec, - /// `index` declaration spans. - indexes: Vec, - /// `function` definition spans. - functions: Vec, - /// `transformer` declaration spans. - transformers: Vec, - /// Rule spans. - rules: Vec, -} - -/// Builder for [`ParsedSpans`]. -#[derive(Default)] -pub struct ParsedSpansBuilder { - imports: Vec, - typedefs: Vec, - relations: Vec, - indexes: Vec, - functions: Vec, - transformers: Vec, - rules: Vec, -} - -impl ParsedSpansBuilder { - /// Set the `import` statement spans. - #[must_use] - pub fn imports(mut self, spans: Vec) -> Self { - self.imports = spans; - self - } - - /// Set the `typedef` statement spans. - #[must_use] - pub fn typedefs(mut self, spans: Vec) -> Self { - self.typedefs = spans; - self - } - - /// Set the `relation` declaration spans. - #[must_use] - pub fn relations(mut self, spans: Vec) -> Self { - self.relations = spans; - self - } - - /// Set the `index` declaration spans. - #[must_use] - pub fn indexes(mut self, spans: Vec) -> Self { - self.indexes = spans; - self - } - - /// Set the `function` definition spans. - #[must_use] - pub fn functions(mut self, spans: Vec) -> Self { - self.functions = spans; - self - } - - /// Set the `transformer` declaration spans. - #[must_use] - pub fn transformers(mut self, spans: Vec) -> Self { - self.transformers = spans; - self - } - - /// Set the rule spans. - #[must_use] - pub fn rules(mut self, spans: Vec) -> Self { - self.rules = spans; - self - } - - /// Build the [`ParsedSpans`]. - #[must_use] - pub fn build(self) -> ParsedSpans { - ParsedSpans::new( - self.imports, - self.typedefs, - self.relations, - self.indexes, - self.functions, - self.transformers, - self.rules, - ) - } -} - -impl ParsedSpans { - /// Start building a [`ParsedSpans`] instance. - /// - /// # Examples - /// - /// ```no_run - /// use ddlint::parser::cst_builder::ParsedSpans; - /// - /// let spans = ParsedSpans::builder().build(); - /// assert!(spans.imports().is_empty()); - /// ``` - #[must_use] - pub fn builder() -> ParsedSpansBuilder { - ParsedSpansBuilder::default() - } - - fn new( - imports: Vec, - typedefs: Vec, - relations: Vec, - indexes: Vec, - functions: Vec, - transformers: Vec, - rules: Vec, - ) -> Self { - if cfg!(debug_assertions) { - ensure_span_lists_sorted(&[ - ("imports", &imports), - ("typedefs", &typedefs), - ("relations", &relations), - ("indexes", &indexes), - ("functions", &functions), - ("transformers", &transformers), - ("rules", &rules), - ]); - } - - Self { - imports, - typedefs, - relations, - indexes, - functions, - transformers, - rules, - } - } - - /// Access `import` statement spans. - #[must_use] - pub fn imports(&self) -> &[Span] { - &self.imports - } - - /// Access `typedef` statement spans. - #[must_use] - pub fn typedefs(&self) -> &[Span] { - &self.typedefs - } - - /// Access `relation` declaration spans. - #[must_use] - pub fn relations(&self) -> &[Span] { - &self.relations - } - - /// Access `index` declaration spans. - #[must_use] - pub fn indexes(&self) -> &[Span] { - &self.indexes - } - - /// Access `function` definition spans. - #[must_use] - pub fn functions(&self) -> &[Span] { - &self.functions - } - - /// Access `transformer` declaration spans. - #[must_use] - pub fn transformers(&self) -> &[Span] { - &self.transformers - } - - /// Access rule spans. - #[must_use] - pub fn rules(&self) -> &[Span] { - &self.rules - } -} - -/// Construct the CST from the token stream and recorded statement spans. -/// -/// Span lists must be sorted and non-overlapping so that tokens are wrapped -/// into well-formed nodes. Validation occurs in debug builds when -/// [`ParsedSpans`] is created. -pub(super) fn build_green_tree( - tokens: &[(SyntaxKind, Span)], - src: &str, - spans: &ParsedSpans, -) -> GreenNode { - let mut builder = GreenNodeBuilder::new(); - builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); - - let mut import_iter = spans.imports().iter().peekable(); - let mut typedef_iter = spans.typedefs().iter().peekable(); - let mut relation_iter = spans.relations().iter().peekable(); - let mut index_iter = spans.indexes().iter().peekable(); - let mut function_iter = spans.functions().iter().peekable(); - let mut transformer_iter = spans.transformers().iter().peekable(); - let mut rule_iter = spans.rules().iter().peekable(); - - for &(kind, ref span) in tokens { - advance_span_iter(&mut import_iter, span.start); - advance_span_iter(&mut typedef_iter, span.start); - advance_span_iter(&mut relation_iter, span.start); - advance_span_iter(&mut index_iter, span.start); - advance_span_iter(&mut function_iter, span.start); - advance_span_iter(&mut transformer_iter, span.start); - advance_span_iter(&mut rule_iter, span.start); - - start_nodes( - &mut builder, - &mut [ - (&mut import_iter, SyntaxKind::N_IMPORT_STMT), - (&mut typedef_iter, SyntaxKind::N_TYPE_DEF), - (&mut relation_iter, SyntaxKind::N_RELATION_DECL), - (&mut index_iter, SyntaxKind::N_INDEX), - (&mut function_iter, SyntaxKind::N_FUNCTION), - (&mut transformer_iter, SyntaxKind::N_TRANSFORMER), - (&mut rule_iter, SyntaxKind::N_RULE), - ], - span.start, - ); - - push_token(&mut builder, kind, span, src); - - finish_nodes( - &mut builder, - &mut [ - &mut import_iter, - &mut typedef_iter, - &mut relation_iter, - &mut index_iter, - &mut function_iter, - &mut transformer_iter, - &mut rule_iter, - ], - span.end, - ); - } - - builder.finish_node(); - builder.finish() -} - -fn advance_span_iter(iter: &mut std::iter::Peekable>, pos: usize) { - while let Some(next) = iter.peek() { - if pos >= next.end { - iter.next(); - } else { - break; - } - } -} - -fn maybe_execute_on_span( - builder: &mut GreenNodeBuilder, - iter: &mut std::iter::Peekable>, - pos: usize, - condition: C, - mut action: F, -) where - C: Fn(usize, &Span) -> bool, - F: FnMut(&mut GreenNodeBuilder, &mut std::iter::Peekable>), -{ - if iter.peek().is_some_and(|current| condition(pos, current)) { - action(builder, iter); - } -} - -fn maybe_start( - builder: &mut GreenNodeBuilder, - iter: &mut std::iter::Peekable>, - pos: usize, - kind: SyntaxKind, -) { - maybe_execute_on_span( - builder, - iter, - pos, - |p, current| p == current.start, - |b, _| b.start_node(DdlogLanguage::kind_to_raw(kind)), - ); -} - -fn maybe_finish( - builder: &mut GreenNodeBuilder, - iter: &mut std::iter::Peekable>, - pos: usize, -) { - maybe_execute_on_span( - builder, - iter, - pos, - |p, current| p >= current.end, - |b, it| { - b.finish_node(); - it.next(); - }, - ); -} - -type SpanIter<'a> = std::iter::Peekable>; - -fn start_nodes( - builder: &mut GreenNodeBuilder, - pairs: &mut [(&mut SpanIter<'_>, SyntaxKind)], - pos: usize, -) { - for (iter, kind) in pairs.iter_mut() { - maybe_start(builder, iter, pos, *kind); - } -} - -fn finish_nodes(builder: &mut GreenNodeBuilder, iters: &mut [&mut SpanIter<'_>], pos: usize) { - for iter in iters.iter_mut() { - maybe_finish(builder, iter, pos); - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct SpanOrderError { - prev: Span, - next: Span, -} - -impl std::fmt::Display for SpanOrderError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "spans overlap or are unsorted: {:?} then {:?}", - self.prev, self.next - ) - } -} - -impl std::error::Error for SpanOrderError {} - -fn validate_spans_sorted(spans: &[Span]) -> Result<(), SpanOrderError> { - for pair in spans.windows(2) { - let [first, second] = pair else { continue }; - if first.end > second.start { - return Err(SpanOrderError { - prev: first.clone(), - next: second.clone(), - }); - } - } - Ok(()) -} - -fn ensure_span_lists_sorted(lists: &[(&str, &[Span])]) { - let mut errors = Vec::new(); - for (name, spans) in lists { - if let Err(e) = validate_spans_sorted(spans) { - errors.push(format!("{name} not sorted: {e}")); - } - } - assert!(errors.is_empty(), "{}", errors.join("\n")); -} - -fn push_token(builder: &mut GreenNodeBuilder, kind: SyntaxKind, span: &Span, src: &str) { - let text = src.get(span.clone()).map_or_else( - || { - warn!( - "token span {:?} out of bounds for source of length {}", - span, - src.len() - ); - "" - }, - |t| t, - ); - - let raw = DdlogLanguage::kind_to_raw(kind); - if kind == SyntaxKind::N_ERROR { - builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); - } - builder.token(raw, text); - if kind == SyntaxKind::N_ERROR { - builder.finish_node(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::tokenize; - use rstest::rstest; - - fn assert_panic_with_message(f: F) -> String - where - F: FnOnce() + std::panic::UnwindSafe, - { - let result = std::panic::catch_unwind(f); - let Err(err) = result else { - panic!("expected panic") - }; - err.downcast_ref::() - .cloned() - .or_else(|| err.downcast_ref::<&str>().map(|s| (*s).to_string())) - .unwrap_or_default() - } - - #[test] - fn validate_spans_sorted_err_on_overlap() { - let spans = vec![0..5, 4..8]; - let result = validate_spans_sorted(&spans); - assert!(result.is_err()); - } - - #[test] - fn validate_spans_sorted_err_on_unsorted() { - let spans = vec![5..10, 0..2]; - let result = validate_spans_sorted(&spans); - assert!(result.is_err()); - } - - #[test] - fn validate_spans_sorted_ok_on_empty() { - let spans: Vec = Vec::new(); - assert!(validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn validate_spans_sorted_ok_on_single() { - let spans: Vec = vec![std::ops::Range { start: 0, end: 3 }]; - assert!(validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn validate_spans_sorted_ok_on_sorted() { - let spans = vec![0..2, 3..5, 5..8]; - assert!(validate_spans_sorted(&spans).is_ok()); - } - - #[test] - fn build_green_tree_panics_on_misordered_spans() { - let unsorted = vec![1..2, 0..1]; - let text = assert_panic_with_message(|| { - let _ = ParsedSpans::builder().imports(unsorted).build(); - }); - assert!(text.contains("imports not sorted")); - assert!(text.contains("0..1")); - } - - #[test] - fn build_green_tree_reports_all_errors() { - let imports = vec![1..2, 0..1]; - let typedefs = vec![4..5, 3..4]; - let text = assert_panic_with_message(|| { - let _ = ParsedSpans::builder() - .imports(imports) - .typedefs(typedefs) - .build(); - }); - assert!(text.contains("imports not sorted")); - assert!(text.contains("typedefs not sorted")); - } - - #[rstest] - fn build_green_tree_round_trip() { - let src = "import foo::bar;"; - let tokens = tokenize(src); - let (spans, errors) = super::super::span_scanner::parse_tokens(&tokens, src); - assert!(errors.is_empty()); - let green = build_green_tree(&tokens, src, &spans); - let root = super::super::ast::Root::from_green(green); - assert_eq!(root.text(), src); - } -} diff --git a/src/parser/cst_builder/mod.rs b/src/parser/cst_builder/mod.rs new file mode 100644 index 00000000..e2ca945c --- /dev/null +++ b/src/parser/cst_builder/mod.rs @@ -0,0 +1,54 @@ +//! CST construction utilities. +//! +//! Provides [`Parsed`], [`ParsedSpans`] and [`build_green_tree`]. + +use chumsky::error::Simple; +use rowan::GreenNode; + +use crate::SyntaxKind; + +mod spans; +mod tree; + +pub use self::spans::ParsedSpans; +pub(crate) use self::tree::build_green_tree; + +/// Result of a parse operation. +#[derive(Debug)] +pub struct Parsed { + green: GreenNode, + root: super::ast::Root, + errors: Vec>, +} + +impl Parsed { + pub(super) fn new( + green: GreenNode, + root: super::ast::Root, + errors: Vec>, + ) -> Self { + Self { + green, + root, + errors, + } + } + + /// Access the `rowan` green tree. + #[must_use] + pub fn green(&self) -> &GreenNode { + &self.green + } + + /// Access the typed AST root. + #[must_use] + pub fn root(&self) -> &super::ast::Root { + &self.root + } + + /// Access parser errors collected during recovery. + #[must_use] + pub fn errors(&self) -> &[Simple] { + &self.errors + } +} diff --git a/src/parser/cst_builder/spans.rs b/src/parser/cst_builder/spans.rs new file mode 100644 index 00000000..37f2fab7 --- /dev/null +++ b/src/parser/cst_builder/spans.rs @@ -0,0 +1,301 @@ +//! Span storage and validation helpers. + +use crate::Span; + +/// Spans for each parsed statement category. +/// +/// Instances are constructed via [`ParsedSpans::builder`] to ensure span lists +/// are sorted and non-overlapping in debug builds. +#[non_exhaustive] +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ParsedSpans { + /// `import` statement spans. + imports: Vec, + /// `typedef` statement spans. + typedefs: Vec, + /// `relation` declaration spans. + relations: Vec, + /// `index` declaration spans. + indexes: Vec, + /// `function` definition spans. + functions: Vec, + /// `transformer` declaration spans. + transformers: Vec, + /// Rule spans. + rules: Vec, +} + +/// Builder for [`ParsedSpans`]. +#[derive(Default)] +pub struct ParsedSpansBuilder { + imports: Vec, + typedefs: Vec, + relations: Vec, + indexes: Vec, + functions: Vec, + transformers: Vec, + rules: Vec, +} + +impl ParsedSpansBuilder { + /// Set the `import` statement spans. + #[must_use] + pub fn imports(mut self, spans: Vec) -> Self { + self.imports = spans; + self + } + + /// Set the `typedef` statement spans. + #[must_use] + pub fn typedefs(mut self, spans: Vec) -> Self { + self.typedefs = spans; + self + } + + /// Set the `relation` declaration spans. + #[must_use] + pub fn relations(mut self, spans: Vec) -> Self { + self.relations = spans; + self + } + + /// Set the `index` declaration spans. + #[must_use] + pub fn indexes(mut self, spans: Vec) -> Self { + self.indexes = spans; + self + } + + /// Set the `function` definition spans. + #[must_use] + pub fn functions(mut self, spans: Vec) -> Self { + self.functions = spans; + self + } + + /// Set the `transformer` declaration spans. + #[must_use] + pub fn transformers(mut self, spans: Vec) -> Self { + self.transformers = spans; + self + } + + /// Set the rule spans. + #[must_use] + pub fn rules(mut self, spans: Vec) -> Self { + self.rules = spans; + self + } + + /// Build the [`ParsedSpans`]. + #[must_use] + pub fn build(self) -> ParsedSpans { + ParsedSpans::new( + self.imports, + self.typedefs, + self.relations, + self.indexes, + self.functions, + self.transformers, + self.rules, + ) + } +} + +impl ParsedSpans { + /// Start building a [`ParsedSpans`] instance. + #[must_use] + pub fn builder() -> ParsedSpansBuilder { + ParsedSpansBuilder::default() + } + + pub(super) fn new( + imports: Vec, + typedefs: Vec, + relations: Vec, + indexes: Vec, + functions: Vec, + transformers: Vec, + rules: Vec, + ) -> Self { + let result = validate_span_lists_sorted(&[ + ("imports", &imports), + ("typedefs", &typedefs), + ("relations", &relations), + ("indexes", &indexes), + ("functions", &functions), + ("transformers", &transformers), + ("rules", &rules), + ]); + debug_assert!(result.is_ok(), "{}", result.err().unwrap_or_default()); + + Self { + imports, + typedefs, + relations, + indexes, + functions, + transformers, + rules, + } + } + + /// Access `import` statement spans. + #[must_use] + pub fn imports(&self) -> &[Span] { + &self.imports + } + + /// Access `typedef` statement spans. + #[must_use] + pub fn typedefs(&self) -> &[Span] { + &self.typedefs + } + + /// Access `relation` declaration spans. + #[must_use] + pub fn relations(&self) -> &[Span] { + &self.relations + } + + /// Access `index` declaration spans. + #[must_use] + pub fn indexes(&self) -> &[Span] { + &self.indexes + } + + /// Access `function` definition spans. + #[must_use] + pub fn functions(&self) -> &[Span] { + &self.functions + } + + /// Access `transformer` declaration spans. + #[must_use] + pub fn transformers(&self) -> &[Span] { + &self.transformers + } + + /// Access rule spans. + #[must_use] + pub fn rules(&self) -> &[Span] { + &self.rules + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct SpanOrderError { + prev: Span, + next: Span, +} + +impl std::fmt::Display for SpanOrderError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "spans overlap or are unsorted: {:?} then {:?}", + self.prev, self.next + ) + } +} + +impl std::error::Error for SpanOrderError {} + +fn validate_spans_sorted(spans: &[Span]) -> Result<(), SpanOrderError> { + for pair in spans.windows(2) { + let [first, second] = pair else { continue }; + if first.end > second.start { + return Err(SpanOrderError { + prev: first.clone(), + next: second.clone(), + }); + } + } + Ok(()) +} + +fn validate_span_lists_sorted(lists: &[(&str, &[Span])]) -> Result<(), String> { + let mut errors = Vec::new(); + for (name, spans) in lists { + if let Err(e) = validate_spans_sorted(spans) { + errors.push(format!("{name} not sorted: {e}")); + } + } + if errors.is_empty() { + Ok(()) + } else { + Err(errors.join("\n")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_panic_with_message(f: F) -> String + where + F: FnOnce() + std::panic::UnwindSafe, + { + let result = std::panic::catch_unwind(f); + let Err(err) = result else { + panic!("expected panic") + }; + err.downcast_ref::() + .cloned() + .or_else(|| err.downcast_ref::<&str>().map(|s| (*s).to_string())) + .unwrap_or_default() + } + + #[test] + fn validate_spans_sorted_err_on_overlap() { + let spans = vec![0..5, 4..8]; + assert!(validate_spans_sorted(&spans).is_err()); + } + + #[test] + fn validate_spans_sorted_err_on_unsorted() { + let spans = vec![5..10, 0..2]; + assert!(validate_spans_sorted(&spans).is_err()); + } + + #[test] + fn validate_spans_sorted_ok_on_empty() { + let spans: Vec = Vec::new(); + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn validate_spans_sorted_ok_on_single() { + let spans: Vec = std::iter::once(0..3).collect(); + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn validate_spans_sorted_ok_on_sorted() { + let spans = vec![0..2, 3..5, 5..8]; + assert!(validate_spans_sorted(&spans).is_ok()); + } + + #[test] + fn builder_panics_on_unsorted() { + let unsorted = vec![1..2, 0..1]; + let text = assert_panic_with_message(|| { + let _ = ParsedSpans::builder().imports(unsorted).build(); + }); + assert!(text.contains("imports not sorted")); + } + + #[test] + fn builder_reports_all_errors() { + let imports = vec![1..2, 0..1]; + let typedefs = vec![4..5, 3..4]; + let text = assert_panic_with_message(|| { + let _ = ParsedSpans::builder() + .imports(imports) + .typedefs(typedefs) + .build(); + }); + assert!(text.contains("imports not sorted")); + assert!(text.contains("typedefs not sorted")); + } +} diff --git a/src/parser/cst_builder/tree.rs b/src/parser/cst_builder/tree.rs new file mode 100644 index 00000000..a05aaec2 --- /dev/null +++ b/src/parser/cst_builder/tree.rs @@ -0,0 +1,118 @@ +//! Build a `rowan` green tree from tokens and spans. + +use log::warn; +use rowan::{GreenNode, GreenNodeBuilder, Language}; + +use crate::{DdlogLanguage, Span, SyntaxKind}; + +use super::spans::ParsedSpans; + +struct SpanCursor<'a> { + iter: std::iter::Peekable>, + kind: SyntaxKind, +} + +impl<'a> SpanCursor<'a> { + fn new(spans: &'a [Span], kind: SyntaxKind) -> Self { + Self { + iter: spans.iter().peekable(), + kind, + } + } + + fn advance_to(&mut self, pos: usize) { + while matches!(self.iter.peek(), Some(s) if pos >= s.end) { + self.iter.next(); + } + } + + fn start_if(&mut self, builder: &mut GreenNodeBuilder, pos: usize) { + if matches!(self.iter.peek(), Some(s) if pos == s.start) { + builder.start_node(DdlogLanguage::kind_to_raw(self.kind)); + } + } + + fn finish_if(&mut self, builder: &mut GreenNodeBuilder, pos: usize) { + if matches!(self.iter.peek(), Some(s) if pos >= s.end) { + builder.finish_node(); + self.iter.next(); + } + } +} + +/// Construct the CST from the token stream and recorded statement spans. +pub(crate) fn build_green_tree( + tokens: &[(SyntaxKind, Span)], + src: &str, + spans: &ParsedSpans, +) -> GreenNode { + let mut builder = GreenNodeBuilder::new(); + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); + + let mut cursors = [ + SpanCursor::new(spans.imports(), SyntaxKind::N_IMPORT_STMT), + SpanCursor::new(spans.typedefs(), SyntaxKind::N_TYPE_DEF), + SpanCursor::new(spans.relations(), SyntaxKind::N_RELATION_DECL), + SpanCursor::new(spans.indexes(), SyntaxKind::N_INDEX), + SpanCursor::new(spans.functions(), SyntaxKind::N_FUNCTION), + SpanCursor::new(spans.transformers(), SyntaxKind::N_TRANSFORMER), + SpanCursor::new(spans.rules(), SyntaxKind::N_RULE), + ]; + + for &(kind, ref span) in tokens { + for cur in &mut cursors { + cur.advance_to(span.start); + cur.start_if(&mut builder, span.start); + } + + push_token(&mut builder, kind, span.clone(), src); + + for cur in &mut cursors { + cur.finish_if(&mut builder, span.end); + } + } + + builder.finish_node(); + builder.finish() +} + +fn push_token(builder: &mut GreenNodeBuilder, kind: SyntaxKind, span: Span, src: &str) { + let text = src.get(span.clone()).map_or_else( + || { + warn!( + "token span {:?} out of bounds for source of length {}", + span, + src.len() + ); + "" + }, + |t| t, + ); + + let raw = DdlogLanguage::kind_to_raw(kind); + if kind == SyntaxKind::N_ERROR { + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); + } + builder.token(raw, text); + if kind == SyntaxKind::N_ERROR { + builder.finish_node(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::span_scanner::parse_tokens; + use crate::tokenize; + + #[test] + fn build_green_tree_round_trip() { + let src = "import foo::bar;"; + let tokens = tokenize(src); + let (spans, errors) = parse_tokens(&tokens, src); + assert!(errors.is_empty()); + let green = build_green_tree(&tokens, src, &spans); + let root = crate::parser::ast::Root::from_green(green); + assert_eq!(root.text(), src); + } +} From b80cc3bac611be244381b66108ea0b2af6bb8731 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 18 Jul 2025 22:59:46 +0100 Subject: [PATCH 4/8] Refactor CST builder helpers --- src/parser/cst_builder/spans.rs | 30 ++++++++----------- src/parser/cst_builder/tree.rs | 52 +++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/src/parser/cst_builder/spans.rs b/src/parser/cst_builder/spans.rs index 37f2fab7..fc9177cf 100644 --- a/src/parser/cst_builder/spans.rs +++ b/src/parser/cst_builder/spans.rs @@ -90,15 +90,7 @@ impl ParsedSpansBuilder { /// Build the [`ParsedSpans`]. #[must_use] pub fn build(self) -> ParsedSpans { - ParsedSpans::new( - self.imports, - self.typedefs, - self.relations, - self.indexes, - self.functions, - self.transformers, - self.rules, - ) + ParsedSpans::new(self) } } @@ -109,15 +101,17 @@ impl ParsedSpans { ParsedSpansBuilder::default() } - pub(super) fn new( - imports: Vec, - typedefs: Vec, - relations: Vec, - indexes: Vec, - functions: Vec, - transformers: Vec, - rules: Vec, - ) -> Self { + pub(super) fn new(builder: ParsedSpansBuilder) -> Self { + let ParsedSpansBuilder { + imports, + typedefs, + relations, + indexes, + functions, + transformers, + rules, + } = builder; + let result = validate_span_lists_sorted(&[ ("imports", &imports), ("typedefs", &typedefs), diff --git a/src/parser/cst_builder/tree.rs b/src/parser/cst_builder/tree.rs index a05aaec2..13648985 100644 --- a/src/parser/cst_builder/tree.rs +++ b/src/parser/cst_builder/tree.rs @@ -40,6 +40,39 @@ impl<'a> SpanCursor<'a> { } } +struct SpanCursors<'a> { + cursors: [SpanCursor<'a>; 7], +} + +impl<'a> SpanCursors<'a> { + fn new(spans: &'a ParsedSpans) -> Self { + Self { + cursors: [ + SpanCursor::new(spans.imports(), SyntaxKind::N_IMPORT_STMT), + SpanCursor::new(spans.typedefs(), SyntaxKind::N_TYPE_DEF), + SpanCursor::new(spans.relations(), SyntaxKind::N_RELATION_DECL), + SpanCursor::new(spans.indexes(), SyntaxKind::N_INDEX), + SpanCursor::new(spans.functions(), SyntaxKind::N_FUNCTION), + SpanCursor::new(spans.transformers(), SyntaxKind::N_TRANSFORMER), + SpanCursor::new(spans.rules(), SyntaxKind::N_RULE), + ], + } + } + + fn advance_and_start(&mut self, builder: &mut GreenNodeBuilder, pos: usize) { + for cur in &mut self.cursors { + cur.advance_to(pos); + cur.start_if(builder, pos); + } + } + + fn finish(&mut self, builder: &mut GreenNodeBuilder, pos: usize) { + for cur in &mut self.cursors { + cur.finish_if(builder, pos); + } + } +} + /// Construct the CST from the token stream and recorded statement spans. pub(crate) fn build_green_tree( tokens: &[(SyntaxKind, Span)], @@ -49,27 +82,14 @@ pub(crate) fn build_green_tree( let mut builder = GreenNodeBuilder::new(); builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); - let mut cursors = [ - SpanCursor::new(spans.imports(), SyntaxKind::N_IMPORT_STMT), - SpanCursor::new(spans.typedefs(), SyntaxKind::N_TYPE_DEF), - SpanCursor::new(spans.relations(), SyntaxKind::N_RELATION_DECL), - SpanCursor::new(spans.indexes(), SyntaxKind::N_INDEX), - SpanCursor::new(spans.functions(), SyntaxKind::N_FUNCTION), - SpanCursor::new(spans.transformers(), SyntaxKind::N_TRANSFORMER), - SpanCursor::new(spans.rules(), SyntaxKind::N_RULE), - ]; + let mut cursors = SpanCursors::new(spans); for &(kind, ref span) in tokens { - for cur in &mut cursors { - cur.advance_to(span.start); - cur.start_if(&mut builder, span.start); - } + cursors.advance_and_start(&mut builder, span.start); push_token(&mut builder, kind, span.clone(), src); - for cur in &mut cursors { - cur.finish_if(&mut builder, span.end); - } + cursors.finish(&mut builder, span.end); } builder.finish_node(); From 566c8d717c089e2f87df78e424abd2e002711666 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 18 Jul 2025 23:43:24 +0100 Subject: [PATCH 5/8] Remove ParsedSpans::new --- src/parser/cst_builder/spans.rs | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/parser/cst_builder/spans.rs b/src/parser/cst_builder/spans.rs index fc9177cf..c45987bb 100644 --- a/src/parser/cst_builder/spans.rs +++ b/src/parser/cst_builder/spans.rs @@ -90,19 +90,7 @@ impl ParsedSpansBuilder { /// Build the [`ParsedSpans`]. #[must_use] pub fn build(self) -> ParsedSpans { - ParsedSpans::new(self) - } -} - -impl ParsedSpans { - /// Start building a [`ParsedSpans`] instance. - #[must_use] - pub fn builder() -> ParsedSpansBuilder { - ParsedSpansBuilder::default() - } - - pub(super) fn new(builder: ParsedSpansBuilder) -> Self { - let ParsedSpansBuilder { + let Self { imports, typedefs, relations, @@ -110,7 +98,7 @@ impl ParsedSpans { functions, transformers, rules, - } = builder; + } = self; let result = validate_span_lists_sorted(&[ ("imports", &imports), @@ -123,7 +111,7 @@ impl ParsedSpans { ]); debug_assert!(result.is_ok(), "{}", result.err().unwrap_or_default()); - Self { + ParsedSpans { imports, typedefs, relations, @@ -133,6 +121,16 @@ impl ParsedSpans { rules, } } +} + +impl ParsedSpans { + /// Start building a [`ParsedSpans`] instance. + #[must_use] + pub fn builder() -> ParsedSpansBuilder { + ParsedSpansBuilder::default() + } + + // constructor removed; instances are built via the builder /// Access `import` statement spans. #[must_use] From 671e71e08fa33bf9f64e2f0279a177cef393d4f4 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 19 Jul 2025 01:28:48 +0100 Subject: [PATCH 6/8] Refine green tree builder --- src/parser/cst_builder/tree.rs | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/parser/cst_builder/tree.rs b/src/parser/cst_builder/tree.rs index 13648985..f6d50964 100644 --- a/src/parser/cst_builder/tree.rs +++ b/src/parser/cst_builder/tree.rs @@ -7,6 +7,9 @@ use crate::{DdlogLanguage, Span, SyntaxKind}; use super::spans::ParsedSpans; +/// Number of span cursor categories handled when building the CST. +const CURSOR_COUNT: usize = 7; + struct SpanCursor<'a> { iter: std::iter::Peekable>, kind: SyntaxKind, @@ -41,7 +44,7 @@ impl<'a> SpanCursor<'a> { } struct SpanCursors<'a> { - cursors: [SpanCursor<'a>; 7], + cursors: [SpanCursor<'a>; CURSOR_COUNT], } impl<'a> SpanCursors<'a> { @@ -74,6 +77,21 @@ impl<'a> SpanCursors<'a> { } /// Construct the CST from the token stream and recorded statement spans. +/// +/// # Examples +/// +/// ``` +/// use ddlint::tokenize; +/// use ddlint::parser::{cst_builder::{build_green_tree, ParsedSpans}, span_scanner::parse_tokens, ast::Root}; +/// +/// let src = "import foo::bar;"; +/// let tokens = tokenize(src); +/// let (spans, errors) = parse_tokens(&tokens, src); +/// assert!(errors.is_empty()); +/// let green = build_green_tree(&tokens, src, &spans); +/// let root = Root::from_green(green); +/// assert_eq!(root.text(), src); +/// ``` pub(crate) fn build_green_tree( tokens: &[(SyntaxKind, Span)], src: &str, @@ -111,12 +129,16 @@ fn push_token(builder: &mut GreenNodeBuilder, kind: SyntaxKind, span: Span, src: let raw = DdlogLanguage::kind_to_raw(kind); if kind == SyntaxKind::N_ERROR { - builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); + push_error_wrapped(builder, raw, text); + } else { + builder.token(raw, text); } +} + +fn push_error_wrapped(builder: &mut GreenNodeBuilder, raw: rowan::SyntaxKind, text: &str) { + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_ERROR)); builder.token(raw, text); - if kind == SyntaxKind::N_ERROR { - builder.finish_node(); - } + builder.finish_node(); } #[cfg(test)] From 098e012995d01049b0b8bdc2209365c77d288f43 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 19 Jul 2025 01:44:10 +0100 Subject: [PATCH 7/8] Expand span module documentation --- src/parser/cst_builder/spans.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser/cst_builder/spans.rs b/src/parser/cst_builder/spans.rs index c45987bb..85cc683a 100644 --- a/src/parser/cst_builder/spans.rs +++ b/src/parser/cst_builder/spans.rs @@ -1,4 +1,10 @@ -//! Span storage and validation helpers. +//! Span storage and validation helpers used when building the CST. +//! +//! `ParsedSpans` groups the byte ranges for each statement category after +//! scanning the token stream. During [`build_green_tree`](super::tree::build_green_tree) +//! these spans determine where nodes start and end so the resulting tree +//! mirrors the source layout. The builder enforces that every span list is +//! sorted and free from overlaps in debug builds, catching mistakes early. use crate::Span; From c25ab5be3733ad71d5c51453df6a61384ee9b9d8 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 19 Jul 2025 03:03:05 +0100 Subject: [PATCH 8/8] Name span cursor count constant --- src/parser/cst_builder/tree.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser/cst_builder/tree.rs b/src/parser/cst_builder/tree.rs index f6d50964..f9d90cec 100644 --- a/src/parser/cst_builder/tree.rs +++ b/src/parser/cst_builder/tree.rs @@ -7,8 +7,8 @@ use crate::{DdlogLanguage, Span, SyntaxKind}; use super::spans::ParsedSpans; -/// Number of span cursor categories handled when building the CST. -const CURSOR_COUNT: usize = 7; +/// Number of cursor categories managed during CST construction. +const SPAN_CURSOR_COUNT: usize = 7; struct SpanCursor<'a> { iter: std::iter::Peekable>, @@ -44,7 +44,7 @@ impl<'a> SpanCursor<'a> { } struct SpanCursors<'a> { - cursors: [SpanCursor<'a>; CURSOR_COUNT], + cursors: [SpanCursor<'a>; SPAN_CURSOR_COUNT], } impl<'a> SpanCursors<'a> {