From e60d64107408c0cbc900c0404da22b58a5dc51ef Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 26 Jun 2025 21:40:18 +0100 Subject: [PATCH] Implement typedef parser Parse typedef and extern type declarations, build N_TYPE_DEF nodes, and add typed AST accessors. Extend tests for type definitions. --- src/parser/mod.rs | 200 +++++++++++++++++++++++++++++++++++++++++++--- tests/parser.rs | 40 ++++++++++ 2 files changed, 229 insertions(+), 11 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8b083558..b716a0a9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -21,6 +21,12 @@ pub struct Parsed { errors: Vec>, } +#[derive(Default)] +struct ItemSpans { + imports: Vec, + typedefs: Vec, +} + impl Parsed { /// Access the `rowan` green tree. #[must_use] @@ -49,9 +55,9 @@ impl Parsed { #[must_use] pub fn parse(src: &str) -> Parsed { let tokens = tokenize(src); - let (import_spans, errors) = parse_tokens(&tokens, src.len()); + let (items, errors) = parse_tokens(&tokens, src.len()); - let green = build_green_tree(tokens, src, &import_spans); + let green = build_green_tree(tokens, src, &items); let root = ast::Root::from_green(green.clone()); Parsed { @@ -61,7 +67,13 @@ pub fn parse(src: &str) -> Parsed { } } -fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (Vec, Vec>) { +fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (ItemSpans, Vec>) { + #[derive(Clone)] + enum Item { + Import(Span), + Type(Span), + } + let stream = Stream::from_iter(0..len, tokens.iter().cloned()); let ws = filter(|kind: &SyntaxKind| { @@ -89,20 +101,77 @@ fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (Vec, Vec items.imports.push(s), + Item::Type(s) => items.typedefs.push(s), + } + } + } + (items, errors) } -fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span]) -> GreenNode { +fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, items: &ItemSpans) -> GreenNode { let mut builder = GreenNodeBuilder::new(); builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM)); - // Iterator over the spans recorded for each `import` statement. Each span - // covers the entire statement so we can nest tokens inside an - // `N_IMPORT_STMT` node while building the CST. - let mut import_iter = imports.iter().peekable(); + // Iterators over the spans for `import` and `typedef` statements. Each span + // covers the entire statement so we can nest tokens inside the appropriate + // node while building the CST. + let mut import_iter = items.imports.iter().peekable(); + let mut type_iter = items.typedefs.iter().peekable(); for (kind, span) in tokens { // Advance to the next import span if this token lies after the end of // the current one. Multiple tokens can share the same span, so we need @@ -114,6 +183,13 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span] break; } } + while let Some(next) = type_iter.peek() { + if span.start >= next.end { + type_iter.next(); + } else { + break; + } + } // Begin an `N_IMPORT_STMT` node when this token marks the start of an // import span. Tokens emitted by the lexer appear in order, so equality // is sufficient here. @@ -122,6 +198,11 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span] .is_some_and(|current| span.start == current.start) { builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_IMPORT_STMT)); + } else if type_iter + .peek() + .is_some_and(|current| span.start == current.start) + { + builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_TYPE_DEF)); } let text = src.get(span.clone()).map_or_else( || { @@ -151,6 +232,13 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span] builder.finish_node(); import_iter.next(); } + if type_iter + .peek() + .is_some_and(|current| span.end >= current.end) + { + builder.finish_node(); + type_iter.next(); + } } builder.finish_node(); builder.finish() @@ -218,6 +306,16 @@ pub mod ast { .map(|syntax| Import { syntax }) .collect() } + + /// Collect all `typedef` items under this root. + #[must_use] + pub fn type_defs(&self) -> Vec { + self.syntax + .children() + .filter(|n| n.kind() == SyntaxKind::N_TYPE_DEF) + .map(|syntax| TypeDef { syntax }) + .collect() + } } /// Typed wrapper for an `import` statement. @@ -268,4 +366,84 @@ pub mod ast { }) } } + + /// Typed wrapper for a `typedef` declaration. + #[derive(Debug, Clone)] + pub struct TypeDef { + pub(crate) syntax: SyntaxNode, + } + + impl TypeDef { + /// Access the underlying syntax node. + #[must_use] + pub fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + + /// The declared type name. + #[must_use] + pub fn name(&self) -> Option { + let mut iter = self.syntax.children_with_tokens(); + for el in iter.by_ref() { + match el.kind() { + SyntaxKind::K_TYPEDEF => break, + SyntaxKind::K_EXTERN => { + // skip 'extern type' + for tok in iter.by_ref() { + if tok.kind() == SyntaxKind::K_TYPE { + break; + } + } + break; + } + _ => {} + } + } + iter.find_map(|e| match e { + rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => { + Some(t.text().to_string()) + } + _ => None, + }) + } + + /// Whether this is an `extern type` declaration. + #[must_use] + pub fn is_extern(&self) -> bool { + self.syntax + .children_with_tokens() + .any(|e| e.kind() == SyntaxKind::K_EXTERN) + } + + /// The type definition text for aliases. + #[must_use] + pub fn definition(&self) -> Option { + if self.is_extern() { + return None; + } + let mut found_eq = false; + let mut out = String::new(); + for e in self.syntax.children_with_tokens() { + match e { + rowan::NodeOrToken::Token(t) => { + if found_eq { + out.push_str(t.text()); + } else if t.kind() == SyntaxKind::T_EQ { + found_eq = true; + } + } + rowan::NodeOrToken::Node(n) => { + if found_eq { + out.push_str(&n.text().to_string()); + } + } + } + } + if found_eq { + Some(out.trim().to_string()) + } else { + None + } + } + } } diff --git a/tests/parser.rs b/tests/parser.rs index 986921dd..25d04422 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -4,6 +4,8 @@ //! property holds for simple inputs. Grammar-specific assertions will be added //! once the parser rules are implemented. +#![expect(clippy::expect_used, reason = "tests assert exact behaviour")] + use ddlint::{SyntaxKind, ast::Import, parse}; use rstest::{fixture, rstest}; @@ -155,3 +157,41 @@ fn import_multiple_statements() { let paths: Vec<_> = imports.iter().map(|i| (i.path(), i.alias())).collect(); assert_eq!(paths, [("a".into(), None), ("b".into(), Some("c".into()))]); } + +#[rstest] +fn typedef_standard_case() { + let src = "typedef Uuid = string"; + let parsed = parse(src); + assert!(parsed.errors().is_empty()); + let defs = parsed.root().type_defs(); + let def = defs.first().expect("expected typedef"); + assert_eq!(def.name().as_deref(), Some("Uuid")); + assert_eq!(def.definition(), Some("string".into())); + assert!(!def.is_extern()); +} + +#[rstest] +fn typedef_complex_case() { + let src = "typedef UserRecord = (name: string, age: u64, active: bool)"; + let parsed = parse(src); + assert!(parsed.errors().is_empty()); + let defs = parsed.root().type_defs(); + let def = defs.first().expect("expected typedef"); + assert_eq!(def.name().as_deref(), Some("UserRecord")); + assert_eq!( + def.definition().as_deref(), + Some("(name: string, age: u64, active: bool)") + ); +} + +#[rstest] +fn extern_type_case() { + let src = "extern type FfiHandle"; + let parsed = parse(src); + assert!(parsed.errors().is_empty()); + let defs = parsed.root().type_defs(); + let def = defs.first().expect("expected typedef"); + assert_eq!(def.name().as_deref(), Some("FfiHandle")); + assert!(def.definition().is_none()); + assert!(def.is_extern()); +}