Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 189 additions & 11 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ pub struct Parsed {
errors: Vec<Simple<SyntaxKind>>,
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Duplicated logic for advancing import_iter and type_iter could be refactored.

Consider extracting the shared logic into a helper function to minimize duplication and reduce the risk of inconsistencies.

Suggested implementation:

    let mut import_iter = items.imports.iter().peekable();
    let mut type_iter = items.typedefs.iter().peekable();

    fn advance_iter_to_span<'a, I>(iter: &mut std::iter::Peekable<I>, span_start: usize)
    where
        I: Iterator<Item = &'a _>,
        &'a _: std::ops::Deref<Target = crate::Span>, // Adjust as needed for your Span type
    {
        while let Some(next) = iter.peek() {
            if span_start >= next.end {
                iter.next();
            } else {
                break;
            }
        }
    }

    for (kind, span) in tokens {
        // Advance to the next import span if this token lies after the end of
        // the current one.  Multiple tokens can share the same span, so we need
        advance_iter_to_span(&mut import_iter, span.start);
        advance_iter_to_span(&mut type_iter, span.start);
  • You may need to adjust the type bounds and dereferencing in the advance_iter_to_span function depending on the actual type of your span objects (e.g., if they are not directly accessible via .end).
  • Remove the old duplicated iterator-advancing code for both import_iter and type_iter.


#[derive(Default)]
struct ItemSpans {
imports: Vec<Span>,
typedefs: Vec<Span>,
}

impl Parsed {
/// Access the `rowan` green tree.
#[must_use]
Expand Down Expand Up @@ -49,9 +55,9 @@ impl Parsed {
#[must_use]
pub fn parse(src: &str) -> Parsed {
let tokens = tokenize(src);
let (import_spans, errors) = parse_tokens(&tokens, src.len());
let (items, errors) = parse_tokens(&tokens, src.len());

let green = build_green_tree(tokens, src, &import_spans);
let green = build_green_tree(tokens, src, &items);
let root = ast::Root::from_green(green.clone());

Parsed {
Expand All @@ -61,7 +67,13 @@ pub fn parse(src: &str) -> Parsed {
}
}

fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (Vec<Span>, Vec<Simple<SyntaxKind>>) {
fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (ItemSpans, Vec<Simple<SyntaxKind>>) {
#[derive(Clone)]
enum Item {
Import(Span),
Type(Span),
}

let stream = Stream::from_iter(0..len, tokens.iter().cloned());

let ws = filter(|kind: &SyntaxKind| {
Expand Down Expand Up @@ -89,20 +101,77 @@ fn parse_tokens(tokens: &[(SyntaxKind, Span)], len: usize) -> (Vec<Span>, Vec<Si
.ignore_then(module_path)
.then(alias.or_not())
.padded_by(ws.repeated())
.map_with_span(|_, span| span);
.map_with_span(|((), _), span| Item::Import(span));

let primitive = filter(|kind: &SyntaxKind| {
matches!(
kind,
SyntaxKind::T_IDENT
| SyntaxKind::K_BOOL
| SyntaxKind::K_BIT
| SyntaxKind::K_SIGNED
| SyntaxKind::K_DOUBLE
| SyntaxKind::K_FLOAT
| SyntaxKind::K_BIGINT
)
})
.ignored()
.padded_by(ws.repeated());

let ty = recursive(|ty| {
let field = ident
.then_ignore(just(SyntaxKind::T_COLON).padded_by(ws.repeated()))
.then(ty.clone())
.ignored();

let tuple = field
.separated_by(just(SyntaxKind::T_COMMA).padded_by(ws.repeated()))
.allow_trailing()
.delimited_by(just(SyntaxKind::T_LPAREN), just(SyntaxKind::T_RPAREN))
.ignored();

primitive.or(tuple)
});

let typedef_alias = just(SyntaxKind::K_TYPEDEF)
.padded_by(ws.repeated())
.ignore_then(ident)
.then_ignore(just(SyntaxKind::T_EQ).padded_by(ws.repeated()))
.then(ty)
.padded_by(ws.repeated())
.map_with_span(|((), ()), span| Item::Type(span));

let parser = imprt.repeated().then_ignore(end());
let extern_type = just(SyntaxKind::K_EXTERN)
.padded_by(ws.repeated())
.ignore_then(just(SyntaxKind::K_TYPE))
.padded_by(ws.repeated())
.ignore_then(ident)
.padded_by(ws.repeated())
.map_with_span(|(), span| Item::Type(span));

let decl = choice((imprt, typedef_alias, extern_type));
let parser = decl.repeated().then_ignore(end());
let (res, errors) = parser.parse_recovery(stream);
(res.unwrap_or_default(), errors)
let mut items = ItemSpans::default();
if let Some(list) = res {
for item in list {
match item {
Item::Import(s) => items.imports.push(s),
Item::Type(s) => items.typedefs.push(s),
}
}
}
(items, errors)
}

fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span]) -> GreenNode {
fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, items: &ItemSpans) -> GreenNode {
let mut builder = GreenNodeBuilder::new();
builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_DATALOG_PROGRAM));
// Iterator over the spans recorded for each `import` statement. Each span
// covers the entire statement so we can nest tokens inside an
// `N_IMPORT_STMT` node while building the CST.
let mut import_iter = imports.iter().peekable();
// Iterators over the spans for `import` and `typedef` statements. Each span
// covers the entire statement so we can nest tokens inside the appropriate
// node while building the CST.
let mut import_iter = items.imports.iter().peekable();
let mut type_iter = items.typedefs.iter().peekable();
for (kind, span) in tokens {
// Advance to the next import span if this token lies after the end of
// the current one. Multiple tokens can share the same span, so we need
Expand All @@ -114,6 +183,13 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span]
break;
}
}
while let Some(next) = type_iter.peek() {
if span.start >= next.end {
type_iter.next();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question (bug_risk): Finishing nodes for import and typedef may overlap if spans are not disjoint.

If overlapping spans are possible, both finish_node calls could be executed for the same token. If this can't happen, please add an assertion or comment to clarify this invariant.

} else {
break;
}
}
// Begin an `N_IMPORT_STMT` node when this token marks the start of an
// import span. Tokens emitted by the lexer appear in order, so equality
// is sufficient here.
Expand All @@ -122,6 +198,11 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span]
.is_some_and(|current| span.start == current.start)
{
builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_IMPORT_STMT));
} else if type_iter
.peek()
.is_some_and(|current| span.start == current.start)
{
builder.start_node(DdlogLanguage::kind_to_raw(SyntaxKind::N_TYPE_DEF));
}
let text = src.get(span.clone()).map_or_else(
|| {
Expand Down Expand Up @@ -151,6 +232,13 @@ fn build_green_tree(tokens: Vec<(SyntaxKind, Span)>, src: &str, imports: &[Span]
builder.finish_node();
import_iter.next();
}
if type_iter
.peek()
.is_some_and(|current| span.end >= current.end)
{
builder.finish_node();
type_iter.next();
}
}
builder.finish_node();
builder.finish()
Expand Down Expand Up @@ -218,6 +306,16 @@ pub mod ast {
.map(|syntax| Import { syntax })
.collect()
}

/// Collect all `typedef` items under this root.
#[must_use]
pub fn type_defs(&self) -> Vec<TypeDef> {
self.syntax
.children()
.filter(|n| n.kind() == SyntaxKind::N_TYPE_DEF)
.map(|syntax| TypeDef { syntax })
.collect()
}
}

/// Typed wrapper for an `import` statement.
Expand Down Expand Up @@ -268,4 +366,84 @@ pub mod ast {
})
}
}

/// Typed wrapper for a `typedef` declaration.
#[derive(Debug, Clone)]
pub struct TypeDef {
pub(crate) syntax: SyntaxNode<DdlogLanguage>,
}

impl TypeDef {
/// Access the underlying syntax node.
#[must_use]
pub fn syntax(&self) -> &SyntaxNode<DdlogLanguage> {
&self.syntax
}

/// The declared type name.
#[must_use]
pub fn name(&self) -> Option<String> {
let mut iter = self.syntax.children_with_tokens();
for el in iter.by_ref() {
match el.kind() {
SyntaxKind::K_TYPEDEF => break,
SyntaxKind::K_EXTERN => {
// skip 'extern type'
for tok in iter.by_ref() {
if tok.kind() == SyntaxKind::K_TYPE {
break;
}
}
break;
}
_ => {}
}
}
iter.find_map(|e| match e {
rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => {
Some(t.text().to_string())
}
_ => None,
})
}
Comment on lines +385 to +408
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Simplify the name() method implementation.

The current implementation with nested loops and manual iteration is hard to follow. Consider using iterator combinators for clarity.

 pub fn name(&self) -> Option<String> {
-    let mut iter = self.syntax.children_with_tokens();
-    for el in iter.by_ref() {
-        match el.kind() {
-            SyntaxKind::K_TYPEDEF => break,
-            SyntaxKind::K_EXTERN => {
-                // skip 'extern type'
-                for tok in iter.by_ref() {
-                    if tok.kind() == SyntaxKind::K_TYPE {
-                        break;
-                    }
-                }
-                break;
-            }
-            _ => {}
-        }
-    }
-    iter.find_map(|e| match e {
-        rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => {
-            Some(t.text().to_string())
-        }
-        _ => None,
-    })
+    self.syntax
+        .children_with_tokens()
+        .skip_while(|e| !matches!(e.kind(), SyntaxKind::K_TYPEDEF | SyntaxKind::K_EXTERN))
+        .skip_while(|e| e.kind() != SyntaxKind::T_IDENT)
+        .find_map(|e| match e {
+            rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => {
+                Some(t.text().to_string())
+            }
+            _ => None,
+        })
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
pub fn name(&self) -> Option<String> {
let mut iter = self.syntax.children_with_tokens();
for el in iter.by_ref() {
match el.kind() {
SyntaxKind::K_TYPEDEF => break,
SyntaxKind::K_EXTERN => {
// skip 'extern type'
for tok in iter.by_ref() {
if tok.kind() == SyntaxKind::K_TYPE {
break;
}
}
break;
}
_ => {}
}
}
iter.find_map(|e| match e {
rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => {
Some(t.text().to_string())
}
_ => None,
})
}
pub fn name(&self) -> Option<String> {
self.syntax
.children_with_tokens()
.skip_while(|e| !matches!(e.kind(), SyntaxKind::K_TYPEDEF | SyntaxKind::K_EXTERN))
.skip_while(|e| e.kind() != SyntaxKind::T_IDENT)
.find_map(|e| match e {
rowan::NodeOrToken::Token(t) if t.kind() == SyntaxKind::T_IDENT => {
Some(t.text().to_string())
}
_ => None,
})
}
🤖 Prompt for AI Agents
In src/parser/mod.rs around lines 385 to 408, the name() method uses nested
loops and manual iteration, making it hard to read. Refactor this method to use
iterator combinators like filter, skip_while, or find_map to simplify the logic
and improve clarity while preserving the existing behavior.


/// Whether this is an `extern type` declaration.
#[must_use]
pub fn is_extern(&self) -> bool {
self.syntax
.children_with_tokens()
.any(|e| e.kind() == SyntaxKind::K_EXTERN)
}

/// The type definition text for aliases.
#[must_use]
pub fn definition(&self) -> Option<String> {
if self.is_extern() {
return None;
}
let mut found_eq = false;
let mut out = String::new();
for e in self.syntax.children_with_tokens() {
match e {
rowan::NodeOrToken::Token(t) => {
if found_eq {
out.push_str(t.text());
} else if t.kind() == SyntaxKind::T_EQ {
found_eq = true;
}
}
rowan::NodeOrToken::Node(n) => {
if found_eq {
out.push_str(&n.text().to_string());
}
}
}
}
if found_eq {
Some(out.trim().to_string())
} else {
None
}
}
Comment on lines +420 to +447
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick (assertive)

Consider using iterator-based approach for better performance.

The current implementation with string concatenation in a loop could be inefficient for large type definitions.

 pub fn definition(&self) -> Option<String> {
     if self.is_extern() {
         return None;
     }
-    let mut found_eq = false;
-    let mut out = String::new();
-    for e in self.syntax.children_with_tokens() {
-        match e {
-            rowan::NodeOrToken::Token(t) => {
-                if found_eq {
-                    out.push_str(t.text());
-                } else if t.kind() == SyntaxKind::T_EQ {
-                    found_eq = true;
-                }
-            }
-            rowan::NodeOrToken::Node(n) => {
-                if found_eq {
-                    out.push_str(&n.text().to_string());
-                }
-            }
-        }
-    }
-    if found_eq {
-        Some(out.trim().to_string())
-    } else {
-        None
-    }
+    let eq_pos = self.syntax
+        .children_with_tokens()
+        .position(|e| matches!(e.kind(), SyntaxKind::T_EQ))?;
+    
+    let definition = self.syntax
+        .children_with_tokens()
+        .skip(eq_pos + 1)
+        .map(|e| match e {
+            rowan::NodeOrToken::Token(t) => t.text().to_string(),
+            rowan::NodeOrToken::Node(n) => n.text().to_string(),
+        })
+        .collect::<String>();
+    
+    Some(definition.trim().to_string())
 }

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In src/parser/mod.rs around lines 420 to 447, the definition method uses string
concatenation inside a loop, which can be inefficient for large type
definitions. Refactor this method to use an iterator-based approach by
collecting the relevant text pieces into an iterator and then joining them once
at the end, instead of appending strings repeatedly. This will improve
performance by minimizing intermediate string allocations.

}
}
40 changes: 40 additions & 0 deletions tests/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
//! property holds for simple inputs. Grammar-specific assertions will be added
//! once the parser rules are implemented.

#![expect(clippy::expect_used, reason = "tests assert exact behaviour")]

use ddlint::{SyntaxKind, ast::Import, parse};
use rstest::{fixture, rstest};

Expand Down Expand Up @@ -155,3 +157,41 @@ fn import_multiple_statements() {
let paths: Vec<_> = imports.iter().map(|i| (i.path(), i.alias())).collect();
assert_eq!(paths, [("a".into(), None), ("b".into(), Some("c".into()))]);
}

#[rstest]
fn typedef_standard_case() {
let src = "typedef Uuid = string";
let parsed = parse(src);
assert!(parsed.errors().is_empty());
let defs = parsed.root().type_defs();
let def = defs.first().expect("expected typedef");
assert_eq!(def.name().as_deref(), Some("Uuid"));
assert_eq!(def.definition(), Some("string".into()));
assert!(!def.is_extern());
}

#[rstest]
fn typedef_complex_case() {
let src = "typedef UserRecord = (name: string, age: u64, active: bool)";
let parsed = parse(src);
assert!(parsed.errors().is_empty());
let defs = parsed.root().type_defs();
let def = defs.first().expect("expected typedef");
assert_eq!(def.name().as_deref(), Some("UserRecord"));
assert_eq!(
def.definition().as_deref(),
Some("(name: string, age: u64, active: bool)")
);
}

#[rstest]
fn extern_type_case() {
let src = "extern type FfiHandle";
let parsed = parse(src);
assert!(parsed.errors().is_empty());
let defs = parsed.root().type_defs();
let def = defs.first().expect("expected typedef");
assert_eq!(def.name().as_deref(), Some("FfiHandle"));
assert!(def.definition().is_none());
assert!(def.is_extern());
}