From dc4c9b12305722aa4d7d62c4bf71479035643696 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 5 Jul 2025 13:15:04 +0100 Subject: [PATCH 1/2] Add delimiter mismatch error --- src/parser/ast/parse_utils.rs | 88 ++++++++++++++++++++++++++++++----- src/parser/mod.rs | 4 +- 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/src/parser/ast/parse_utils.rs b/src/parser/ast/parse_utils.rs index d5c3d62a..2e878b15 100644 --- a/src/parser/ast/parse_utils.rs +++ b/src/parser/ast/parse_utils.rs @@ -3,12 +3,12 @@ //! This module contains small functions reused by multiple AST nodes when //! extracting typed data from the CST. -use rowan::{NodeOrToken, SyntaxElement}; +use rowan::{NodeOrToken, SyntaxElement, TextRange}; use super::skip_whitespace_and_comments; use crate::{DdlogLanguage, SyntaxKind}; -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Delim { Paren, Angle, @@ -23,6 +23,39 @@ enum Delim { #[derive(Default)] struct DelimStack(Vec); +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) struct DelimiterError { + expected: Delim, + found: SyntaxKind, + span: TextRange, +} + +impl std::fmt::Display for DelimiterError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let expected = match self.expected { + Delim::Paren => ")", + Delim::Angle => ">", + Delim::Bracket => "]", + Delim::Brace => "}", + }; + let found = match self.found { + SyntaxKind::T_RPAREN => ")", + SyntaxKind::T_GT => ">", + SyntaxKind::T_SHR => ">>", + SyntaxKind::T_RBRACKET => "]", + SyntaxKind::T_RBRACE => "}", + _ => "?", + }; + write!( + f, + "expected '{expected}' before '{found}' at {:#?}", + self.span + ) + } +} + +impl std::error::Error for DelimiterError {} + impl DelimStack { fn open(&mut self, delim: Delim, count: usize) { for _ in 0..count { @@ -87,7 +120,7 @@ fn push(token: &rowan::SyntaxToken, buf: &mut String) { /// starting at the opening parenthesis. The returned vector contains /// each name and its associated type text. #[must_use] -pub(super) fn parse_name_type_pairs(mut iter: I) -> Vec<(String, String)> +pub(super) fn parse_name_type_pairs(mut iter: I) -> (Vec<(String, String)>, Vec) where I: Iterator>, { @@ -99,6 +132,7 @@ where } let mut pairs = Vec::new(); + let mut errors = Vec::new(); let mut buf = String::new(); let mut name: Option = None; let mut depth = DelimStack::default(); @@ -116,6 +150,7 @@ where &mut pairs, &mut depth, &mut outer_parens, + &mut errors, ) { break; } @@ -124,7 +159,7 @@ where } } - pairs + (pairs, errors) } /// Handle a single token during name-type pair parsing. @@ -135,6 +170,7 @@ fn handle_token( pairs: &mut Vec<(String, String)>, depth: &mut DelimStack, outer_parens: &mut usize, + errors: &mut Vec, ) -> bool { match token.kind() { SyntaxKind::T_LPAREN => open_and_push(token, buf, depth, Delim::Paren, 1), @@ -148,22 +184,43 @@ fn handle_token( } SyntaxKind::T_LT => open_and_push(token, buf, depth, Delim::Angle, 1), SyntaxKind::T_GT => { - close_and_push(token, buf, depth, Delim::Angle, 1); + if close_and_push(token, buf, depth, Delim::Angle, 1) < 1 { + errors.push(DelimiterError { + expected: Delim::Angle, + found: token.kind(), + span: token.text_range(), + }); + } } SyntaxKind::T_SHL => open_and_push(token, buf, depth, Delim::Angle, 2), SyntaxKind::T_SHR => { - let closed = close_and_push(token, buf, depth, Delim::Angle, 2); - if closed < 2 { - // TODO: report unmatched '>>' (See issue #54) + if close_and_push(token, buf, depth, Delim::Angle, 2) < 2 { + errors.push(DelimiterError { + expected: Delim::Angle, + found: token.kind(), + span: token.text_range(), + }); } } SyntaxKind::T_LBRACKET => open_and_push(token, buf, depth, Delim::Bracket, 1), SyntaxKind::T_RBRACKET => { - close_and_push(token, buf, depth, Delim::Bracket, 1); + if close_and_push(token, buf, depth, Delim::Bracket, 1) < 1 { + errors.push(DelimiterError { + expected: Delim::Bracket, + found: token.kind(), + span: token.text_range(), + }); + } } SyntaxKind::T_LBRACE => open_and_push(token, buf, depth, Delim::Brace, 1), SyntaxKind::T_RBRACE => { - close_and_push(token, buf, depth, Delim::Brace, 1); + if close_and_push(token, buf, depth, Delim::Brace, 1) < 1 { + errors.push(DelimiterError { + expected: Delim::Brace, + found: token.kind(), + span: token.text_range(), + }); + } } SyntaxKind::T_COMMA if depth.is_empty() && *outer_parens == 1 => { finalize_pair(name, buf, pairs); @@ -295,10 +352,19 @@ mod tests { ) { let _ = src; let elements = tokens_for; - let result = parse_name_type_pairs(elements.into_iter()); + let (result, errors) = parse_name_type_pairs(elements.into_iter()); + assert!(errors.is_empty()); assert_eq!(result, expected); } + #[test] + fn unmatched_shift_errors() { + let src = "function bad(x: Vec>): bool {}"; + let elements = tokens_for(src); + let (_pairs, errors) = parse_name_type_pairs(elements.into_iter()); + assert_eq!(errors.len(), 1); + } + #[rstest] #[case("function f(): u32 {}", Some("u32".to_string()))] #[case("extern function f(): bool;", Some("bool".to_string()))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a335a6a4..1e57bda8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1311,7 +1311,7 @@ pub mod ast { /// Columns declared for the relation. #[must_use] pub fn columns(&self) -> Vec<(String, String)> { - parse_name_type_pairs(self.syntax.children_with_tokens()) + parse_name_type_pairs(self.syntax.children_with_tokens()).0 } /// Primary key column names if specified. @@ -1619,7 +1619,7 @@ pub mod ast { /// Function parameters as name/type pairs. #[must_use] pub fn parameters(&self) -> Vec<(String, String)> { - parse_name_type_pairs(self.syntax.children_with_tokens()) + parse_name_type_pairs(self.syntax.children_with_tokens()).0 } /// Return type text if specified. From 0042034abba4594d9c54988fe72d4c510f7f1376 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 5 Jul 2025 16:45:31 +0100 Subject: [PATCH 2/2] Add delimiter mismatch errors and tests --- src/parser/ast/parse_utils.rs | 73 ++++++++++++++++++++++++----------- src/parser/mod.rs | 16 +++++++- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/src/parser/ast/parse_utils.rs b/src/parser/ast/parse_utils.rs index 2e878b15..43cadb4a 100644 --- a/src/parser/ast/parse_utils.rs +++ b/src/parser/ast/parse_utils.rs @@ -48,8 +48,8 @@ impl std::fmt::Display for DelimiterError { }; write!( f, - "expected '{expected}' before '{found}' at {:#?}", - self.span + "expected '{}' before '{}' at {:#?}", + expected, found, self.span ) } } @@ -114,6 +114,18 @@ fn push(token: &rowan::SyntaxToken, buf: &mut String) { buf.push_str(token.text()); } +fn push_error( + errors: &mut Vec, + expected: Delim, + token: &rowan::SyntaxToken, +) { + errors.push(DelimiterError { + expected, + found: token.kind(), + span: token.text_range(), + }); +} + /// Consume `(name: type)` pairs from the provided iterator. /// /// The iterator should yield the tokens of a parameter or column list @@ -140,7 +152,7 @@ where // parentheses inside types do not terminate parsing. let mut outer_parens = 1usize; - for e in iter { + for e in iter.by_ref() { match e { NodeOrToken::Token(t) => { if handle_token( @@ -159,6 +171,21 @@ where } } + // Capture unmatched closing tokens after the parameter list ends. + for e in iter { + if let NodeOrToken::Token(t) = e { + match t.kind() { + SyntaxKind::T_RPAREN => push_error(&mut errors, Delim::Paren, &t), + SyntaxKind::T_RBRACKET => push_error(&mut errors, Delim::Bracket, &t), + SyntaxKind::T_RBRACE => push_error(&mut errors, Delim::Brace, &t), + SyntaxKind::T_GT | SyntaxKind::T_SHR => push_error(&mut errors, Delim::Angle, &t), + _ => break, + } + } else { + break; + } + } + (pairs, errors) } @@ -185,41 +212,25 @@ fn handle_token( SyntaxKind::T_LT => open_and_push(token, buf, depth, Delim::Angle, 1), SyntaxKind::T_GT => { if close_and_push(token, buf, depth, Delim::Angle, 1) < 1 { - errors.push(DelimiterError { - expected: Delim::Angle, - found: token.kind(), - span: token.text_range(), - }); + push_error(errors, Delim::Angle, token); } } SyntaxKind::T_SHL => open_and_push(token, buf, depth, Delim::Angle, 2), SyntaxKind::T_SHR => { if close_and_push(token, buf, depth, Delim::Angle, 2) < 2 { - errors.push(DelimiterError { - expected: Delim::Angle, - found: token.kind(), - span: token.text_range(), - }); + push_error(errors, Delim::Angle, token); } } SyntaxKind::T_LBRACKET => open_and_push(token, buf, depth, Delim::Bracket, 1), SyntaxKind::T_RBRACKET => { if close_and_push(token, buf, depth, Delim::Bracket, 1) < 1 { - errors.push(DelimiterError { - expected: Delim::Bracket, - found: token.kind(), - span: token.text_range(), - }); + push_error(errors, Delim::Bracket, token); } } SyntaxKind::T_LBRACE => open_and_push(token, buf, depth, Delim::Brace, 1), SyntaxKind::T_RBRACE => { if close_and_push(token, buf, depth, Delim::Brace, 1) < 1 { - errors.push(DelimiterError { - expected: Delim::Brace, - found: token.kind(), - span: token.text_range(), - }); + push_error(errors, Delim::Brace, token); } } SyntaxKind::T_COMMA if depth.is_empty() && *outer_parens == 1 => { @@ -365,6 +376,22 @@ mod tests { assert_eq!(errors.len(), 1); } + #[test] + fn unmatched_bracket_error() { + let src = "function bad(x: Vec], y: u32) {}"; + let elements = tokens_for(src); + let (_pairs, errors) = parse_name_type_pairs(elements.into_iter()); + assert_eq!(errors.len(), 1); + } + + #[test] + fn unmatched_brace_error() { + let src = "function bad(x: u32}, y: bool) {}"; + let elements = tokens_for(src); + let (_pairs, errors) = parse_name_type_pairs(elements.into_iter()); + assert_eq!(errors.len(), 1); + } + #[rstest] #[case("function f(): u32 {}", Some("u32".to_string()))] #[case("extern function f(): bool;", Some("bool".to_string()))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1e57bda8..c359364e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1309,9 +1309,15 @@ pub mod ast { } /// Columns declared for the relation. + /// + /// Delimiter errors detected during parsing are ignored. + /// This may change in future to surface these diagnostics. #[must_use] pub fn columns(&self) -> Vec<(String, String)> { - parse_name_type_pairs(self.syntax.children_with_tokens()).0 + let (pairs, errors) = parse_name_type_pairs(self.syntax.children_with_tokens()); + // Delimiter errors are ignored for now. Future versions may surface them. + let _ = errors; + pairs } /// Primary key column names if specified. @@ -1617,9 +1623,15 @@ pub mod ast { } /// Function parameters as name/type pairs. + /// + /// Delimiter errors detected during parsing are ignored. + /// This may change in future to surface these diagnostics. #[must_use] pub fn parameters(&self) -> Vec<(String, String)> { - parse_name_type_pairs(self.syntax.children_with_tokens()).0 + let (pairs, errors) = parse_name_type_pairs(self.syntax.children_with_tokens()); + // Delimiter errors are ignored for now. Future versions may surface them. + let _ = errors; + pairs } /// Return type text if specified.