Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ pub enum SyntaxKind {
N_HO_FIELD,
N_TRANSFORMER,
N_APPLY,
N_IMPORT,
N_IMPORT_STMT,
N_DATALOG_PROGRAM,
// Special
N_ERROR,
Expand Down
106 changes: 87 additions & 19 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub struct Parsed {
green: GreenNode,
root: ast::Root,
errors: Vec<Simple<SyntaxKind>>,
items: Vec<ast::Item>,
}

impl Parsed {
Expand All @@ -39,6 +40,12 @@ impl Parsed {
pub fn errors(&self) -> &[Simple<SyntaxKind>] {
&self.errors
}

/// Access parsed items such as imports.
#[must_use]
pub fn items(&self) -> &[ast::Item] {
&self.items
}
}

/// Parse the provided source string.
Expand All @@ -49,12 +56,7 @@ impl Parsed {
#[must_use]
pub fn parse(src: &str) -> Parsed {
let tokens = tokenize(src);
let (parsed_kinds, errors) = parse_tokens(&tokens, src.len());
debug_assert_eq!(
parsed_kinds.len(),
tokens.len(),
"parser output token count differs from lexer",
);
let (items, errors) = parse_tokens(&tokens, src.len(), src);

let green = build_green_tree(tokens, src);
let root = ast::Root::from_green(green.clone());
Expand All @@ -63,27 +65,23 @@ pub fn parse(src: &str) -> Parsed {
green,
root,
errors,
items,
}
}

fn parse_tokens(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (complexity): Consider simplifying import extraction by walking the AST after parsing instead of using nested parser combinators.

Here’s a simpler approach that drops the nested Parser combinators entirely and just walks your AST to find imports. You can

  1. Revert parse_tokens back to only return (Vec<SyntaxKind>, Vec<Simple<SyntaxKind>>).
  2. Remove your parse_import/decl helpers.
  3. In parse(...), after you build root, do a one‐pass walk over its syntax tree to collect imports.
pub fn parse(src: &str) -> Parsed {
    let tokens = tokenize(src);
    let (parsed_kinds, errors) = parse_tokens(&tokens, src.len());
    debug_assert_eq!(
        parsed_kinds.len(),
        tokens.len(),
        "parser output token count differs from lexer"
    );

    let green = build_green_tree(tokens, src);
    let root = ast::Root::from_green(green.clone());

    // --- new: extract imports in one pass over the AST ---
    let items = root
        .syntax()
        .descendants()
        .filter_map(ast::Import::cast)       // see helper below
        .map(ast::Item::Import)
        .collect();

    Parsed { green, root, errors, items }
}

Then in your ast module add a tiny helper to turn an import node into ast::Import:

impl Import {
    pub fn cast(node: SyntaxNode) -> Option<Import> {
        if node.kind() != SyntaxKind::N_IMPORT {
            return None;
        }
        // e.g. input: AST node for `import foo;`
        // skip the `import` token and the trailing `;`
        let text = node.text();
        let module = text
            .trim_start_matches("import")
            .trim_end_matches(';')
            .trim();
        Some(Import { module: module.to_string() })
    }
}

And your enum stays the same:

pub enum Item {
    Import(Import),
    // … future items
}

This removes all of the map/or/boxed boilerplate and delegates import-finding to a single AST walk, while preserving full functionality and error recovery.

tokens: &[(SyntaxKind, Span)],
len: usize,
) -> (Vec<SyntaxKind>, Vec<Simple<SyntaxKind>>) {
src: &str,
) -> (Vec<ast::Item>, Vec<Simple<SyntaxKind>>) {
let stream = Stream::from_iter(0..len, tokens.iter().cloned());

let parser = any::<SyntaxKind, Simple<SyntaxKind>>()
let parser = decl(src)
.repeated()
.then_ignore(end());
let (parsed_kinds, errors) = parser.parse_recovery(stream);

let result = parsed_kinds.unwrap_or_default();
debug_assert_eq!(
result.len(),
tokens.len(),
"parser combinator output differs from input token count",
);
(result, errors)
.then_ignore(end())
.map(|items| items.into_iter().flatten().collect());
let (items, errors) = parser.parse_recovery(stream);

(items.unwrap_or_default(), errors)
}

fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str) -> GreenNode {
Expand Down Expand Up @@ -113,6 +111,32 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str) -> GreenNode {
builder.finish()
}

fn parse_import(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: parse_import assumes module names are single identifiers.

To handle complex import paths (e.g., with dots or slashes), the parser will need to be updated. Currently, it only supports single identifier module names.

src: &str,
) -> impl Parser<SyntaxKind, ast::Import, Error = Simple<SyntaxKind>> + Clone + '_ {
just(SyntaxKind::K_IMPORT)
.then_ignore(select! { SyntaxKind::T_WHITESPACE => () }.repeated())
.ignore_then(select!(|span| SyntaxKind::T_IDENT => span))
.then_ignore(just(SyntaxKind::T_SEMI))
.map(move |span: Span| {
let text = src.get(span.clone()).unwrap_or("");
ast::Import {
module: text.to_string(),
}
})
.boxed()
}
Comment on lines +114 to +128
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Consider improving error handling for invalid spans.

The import parser implementation is well-structured, but the unwrap_or("") on line 122 could silently produce empty module names if the span is invalid. Consider logging a warning or handling this case more explicitly.

 .map(move |span: Span| {
-    let text = src.get(span.clone()).unwrap_or("");
+    let text = src.get(span.clone()).unwrap_or_else(|| {
+        warn!("Invalid span {:?} for import module name", span);
+        ""
+    });
     ast::Import {
         module: text.to_string(),
     }
 })

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/parser/mod.rs around lines 114 to 128, the parse_import function uses
unwrap_or("") on a span that might be invalid, which can silently produce empty
module names. To fix this, replace unwrap_or with explicit error handling by
checking if the span is valid before extracting the text. If invalid, log a
warning or return a parse error instead of defaulting to an empty string,
ensuring invalid spans are handled explicitly and do not produce empty module
names silently.


fn decl(
src: &str,
) -> impl Parser<SyntaxKind, Option<ast::Item>, Error = Simple<SyntaxKind>> + Clone + '_ {
parse_import(src)
.map(ast::Item::Import)
.map(Some)
.or(any::<SyntaxKind, Simple<SyntaxKind>>().map(|_| None))
.boxed()
}

pub mod ast {
//! Minimal typed AST wrappers used by the parser.
//!
Expand Down Expand Up @@ -166,4 +190,48 @@ pub mod ast {
self.syntax.text().to_string()
}
}

/// An import declaration.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Import {
/// The imported module path as text.
pub module: String,
}

/// Top-level items recognised by the parser.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Item {
/// An import statement.
Import(Import),
}
}

#[cfg(test)]
mod tests {
use super::*;
use chumsky::Parser;

#[test]
fn import_parses() {
let src = "import foo;";
let tokens = crate::tokenize(src);
let stream = Stream::from_iter(0..src.len(), tokens.clone().into_iter());
let (out, _errs) = parse_import(src).parse_recovery(stream);
assert_eq!(
out,
Some(ast::Import {
module: "foo".to_string(),
})
);
}

#[test]
fn import_missing_semicolon_errors() {
let src = "import foo";
let tokens = crate::tokenize(src);
let stream = Stream::from_iter(0..src.len(), tokens.clone().into_iter());
let (out, errs) = parse_import(src).parse_recovery(stream);
assert!(out.is_none());
assert!(!errs.is_empty());
}
}
14 changes: 14 additions & 0 deletions tests/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,17 @@ fn error_token_produces_error_node() {
.any(|node| node.kind() == SyntaxKind::N_ERROR);
assert!(has_error);
}

#[fixture]
fn import_prog() -> &'static str {
"import foo;"
}

#[rstest]
fn import_item_parsed(import_prog: &str) {
let parsed = parse(import_prog);
assert!(matches!(
parsed.items().first(),
Some(ddlint::parser::ast::Item::Import(i)) if i.module == "foo"
));
}