From 63d215261139c7b4ac1f106bae9e2bad6bfb33f3 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 26 Jun 2025 00:55:19 +0100 Subject: [PATCH] Add basic import parsing --- src/language.rs | 2 +- src/parser/mod.rs | 106 +++++++++++++++++++++++++++++++++++++--------- tests/parser.rs | 14 ++++++ 3 files changed, 102 insertions(+), 20 deletions(-) diff --git a/src/language.rs b/src/language.rs index 9aec0bf9..eb71dd1e 100644 --- a/src/language.rs +++ b/src/language.rs @@ -162,7 +162,7 @@ pub enum SyntaxKind { N_HO_FIELD, N_TRANSFORMER, N_APPLY, - N_IMPORT, + N_IMPORT_STMT, N_DATALOG_PROGRAM, // Special N_ERROR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 44ac5bc9..11c31b95 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -19,6 +19,7 @@ pub struct Parsed { green: GreenNode, root: ast::Root, errors: Vec>, + items: Vec, } impl Parsed { @@ -39,6 +40,12 @@ impl Parsed { pub fn errors(&self) -> &[Simple] { &self.errors } + + /// Access parsed items such as imports. + #[must_use] + pub fn items(&self) -> &[ast::Item] { + &self.items + } } /// Parse the provided source string. @@ -49,12 +56,7 @@ impl Parsed { #[must_use] pub fn parse(src: &str) -> Parsed { let tokens = tokenize(src); - let (parsed_kinds, errors) = parse_tokens(&tokens, src.len()); - debug_assert_eq!( - parsed_kinds.len(), - tokens.len(), - "parser output token count differs from lexer", - ); + let (items, errors) = parse_tokens(&tokens, src.len(), src); let green = build_green_tree(tokens, src); let root = ast::Root::from_green(green.clone()); @@ -63,27 +65,23 @@ pub fn parse(src: &str) -> Parsed { green, root, errors, + items, } } fn parse_tokens( tokens: &[(SyntaxKind, Span)], len: usize, -) -> (Vec, Vec>) { + src: &str, +) -> (Vec, Vec>) { let stream = Stream::from_iter(0..len, tokens.iter().cloned()); - - let parser = any::>() + let parser = decl(src) .repeated() - .then_ignore(end()); - let (parsed_kinds, errors) = parser.parse_recovery(stream); - - let result = parsed_kinds.unwrap_or_default(); - debug_assert_eq!( - result.len(), - tokens.len(), - "parser combinator output differs from input token count", - ); - (result, errors) + .then_ignore(end()) + .map(|items| items.into_iter().flatten().collect()); + let (items, errors) = parser.parse_recovery(stream); + + (items.unwrap_or_default(), errors) } fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str) -> GreenNode { @@ -113,6 +111,32 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str) -> GreenNode { builder.finish() } +fn parse_import( + src: &str, +) -> impl Parser> + Clone + '_ { + just(SyntaxKind::K_IMPORT) + .then_ignore(select! { SyntaxKind::T_WHITESPACE => () }.repeated()) + .ignore_then(select!(|span| SyntaxKind::T_IDENT => span)) + .then_ignore(just(SyntaxKind::T_SEMI)) + .map(move |span: Span| { + let text = src.get(span.clone()).unwrap_or(""); + ast::Import { + module: text.to_string(), + } + }) + .boxed() +} + +fn decl( + src: &str, +) -> impl Parser, Error = Simple> + Clone + '_ { + parse_import(src) + .map(ast::Item::Import) + .map(Some) + .or(any::>().map(|_| None)) + .boxed() +} + pub mod ast { //! Minimal typed AST wrappers used by the parser. //! @@ -166,4 +190,48 @@ pub mod ast { self.syntax.text().to_string() } } + + /// An import declaration. + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct Import { + /// The imported module path as text. + pub module: String, + } + + /// Top-level items recognised by the parser. + #[derive(Debug, Clone, PartialEq, Eq)] + pub enum Item { + /// An import statement. + Import(Import), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chumsky::Parser; + + #[test] + fn import_parses() { + let src = "import foo;"; + let tokens = crate::tokenize(src); + let stream = Stream::from_iter(0..src.len(), tokens.clone().into_iter()); + let (out, _errs) = parse_import(src).parse_recovery(stream); + assert_eq!( + out, + Some(ast::Import { + module: "foo".to_string(), + }) + ); + } + + #[test] + fn import_missing_semicolon_errors() { + let src = "import foo"; + let tokens = crate::tokenize(src); + let stream = Stream::from_iter(0..src.len(), tokens.clone().into_iter()); + let (out, errs) = parse_import(src).parse_recovery(stream); + assert!(out.is_none()); + assert!(!errs.is_empty()); + } } diff --git a/tests/parser.rs b/tests/parser.rs index a20b52c5..6d2ee46c 100644 --- a/tests/parser.rs +++ b/tests/parser.rs @@ -73,3 +73,17 @@ fn error_token_produces_error_node() { .any(|node| node.kind() == SyntaxKind::N_ERROR); assert!(has_error); } + +#[fixture] +fn import_prog() -> &'static str { + "import foo;" +} + +#[rstest] +fn import_item_parsed(import_prog: &str) { + let parsed = parse(import_prog); + assert!(matches!( + parsed.items().first(), + Some(ddlint::parser::ast::Item::Import(i)) if i.module == "foo" + )); +}