Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 105 additions & 12 deletions src/parser/ast/parse_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
//! This module contains small functions reused by multiple AST nodes when
//! extracting typed data from the CST.

use rowan::{NodeOrToken, SyntaxElement};
use rowan::{NodeOrToken, SyntaxElement, TextRange};

use super::skip_whitespace_and_comments;
use crate::{DdlogLanguage, SyntaxKind};

#[derive(Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Delim {
Paren,
Angle,
Expand All @@ -23,6 +23,39 @@ enum Delim {
#[derive(Default)]
struct DelimStack(Vec<Delim>);

#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct DelimiterError {
expected: Delim,
found: SyntaxKind,
span: TextRange,
}

impl std::fmt::Display for DelimiterError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let expected = match self.expected {
Delim::Paren => ")",
Delim::Angle => ">",
Delim::Bracket => "]",
Delim::Brace => "}",
};
let found = match self.found {
SyntaxKind::T_RPAREN => ")",
SyntaxKind::T_GT => ">",
SyntaxKind::T_SHR => ">>",
SyntaxKind::T_RBRACKET => "]",
SyntaxKind::T_RBRACE => "}",
_ => "?",
};
write!(
f,
"expected '{}' before '{}' at {:#?}",
expected, found, self.span
)
}
}

impl std::error::Error for DelimiterError {}

impl DelimStack {
fn open(&mut self, delim: Delim, count: usize) {
for _ in 0..count {
Expand Down Expand Up @@ -81,13 +114,25 @@ fn push(token: &rowan::SyntaxToken<DdlogLanguage>, buf: &mut String) {
buf.push_str(token.text());
}

fn push_error(
errors: &mut Vec<DelimiterError>,
expected: Delim,
token: &rowan::SyntaxToken<DdlogLanguage>,
) {
errors.push(DelimiterError {
expected,
found: token.kind(),
span: token.text_range(),
});
}

/// Consume `(name: type)` pairs from the provided iterator.
///
/// The iterator should yield the tokens of a parameter or column list
/// starting at the opening parenthesis. The returned vector contains
/// each name and its associated type text.
#[must_use]
pub(super) fn parse_name_type_pairs<I>(mut iter: I) -> Vec<(String, String)>
pub(super) fn parse_name_type_pairs<I>(mut iter: I) -> (Vec<(String, String)>, Vec<DelimiterError>)
where
I: Iterator<Item = SyntaxElement<DdlogLanguage>>,
{
Expand All @@ -99,14 +144,15 @@ where
}

let mut pairs = Vec::new();
let mut errors = Vec::new();
let mut buf = String::new();
let mut name: Option<String> = None;
let mut depth = DelimStack::default();
// Track the outer parameter list separately so that nested
// parentheses inside types do not terminate parsing.
let mut outer_parens = 1usize;

for e in iter {
for e in iter.by_ref() {
match e {
NodeOrToken::Token(t) => {
if handle_token(
Expand All @@ -116,6 +162,7 @@ where
&mut pairs,
&mut depth,
&mut outer_parens,
&mut errors,
) {
break;
}
Expand All @@ -124,7 +171,22 @@ where
}
}

pairs
// Capture unmatched closing tokens after the parameter list ends.
for e in iter {
if let NodeOrToken::Token(t) = e {
match t.kind() {
SyntaxKind::T_RPAREN => push_error(&mut errors, Delim::Paren, &t),
SyntaxKind::T_RBRACKET => push_error(&mut errors, Delim::Bracket, &t),
SyntaxKind::T_RBRACE => push_error(&mut errors, Delim::Brace, &t),
SyntaxKind::T_GT | SyntaxKind::T_SHR => push_error(&mut errors, Delim::Angle, &t),
_ => break,
}
} else {
break;
}
}

(pairs, errors)
}

/// Handle a single token during name-type pair parsing.
Expand All @@ -135,6 +197,7 @@ fn handle_token(
pairs: &mut Vec<(String, String)>,
depth: &mut DelimStack,
outer_parens: &mut usize,
errors: &mut Vec<DelimiterError>,
) -> bool {
match token.kind() {
SyntaxKind::T_LPAREN => open_and_push(token, buf, depth, Delim::Paren, 1),
Expand All @@ -148,22 +211,27 @@ fn handle_token(
}
SyntaxKind::T_LT => open_and_push(token, buf, depth, Delim::Angle, 1),
SyntaxKind::T_GT => {
close_and_push(token, buf, depth, Delim::Angle, 1);
if close_and_push(token, buf, depth, Delim::Angle, 1) < 1 {
push_error(errors, Delim::Angle, token);
}
}
SyntaxKind::T_SHL => open_and_push(token, buf, depth, Delim::Angle, 2),
SyntaxKind::T_SHR => {
let closed = close_and_push(token, buf, depth, Delim::Angle, 2);
if closed < 2 {
// TODO: report unmatched '>>' (See issue #54)
if close_and_push(token, buf, depth, Delim::Angle, 2) < 2 {
push_error(errors, Delim::Angle, token);
}
}
SyntaxKind::T_LBRACKET => open_and_push(token, buf, depth, Delim::Bracket, 1),
SyntaxKind::T_RBRACKET => {
close_and_push(token, buf, depth, Delim::Bracket, 1);
if close_and_push(token, buf, depth, Delim::Bracket, 1) < 1 {
push_error(errors, Delim::Bracket, token);
}
}
SyntaxKind::T_LBRACE => open_and_push(token, buf, depth, Delim::Brace, 1),
SyntaxKind::T_RBRACE => {
close_and_push(token, buf, depth, Delim::Brace, 1);
if close_and_push(token, buf, depth, Delim::Brace, 1) < 1 {
push_error(errors, Delim::Brace, token);
}
}
SyntaxKind::T_COMMA if depth.is_empty() && *outer_parens == 1 => {
finalize_pair(name, buf, pairs);
Expand Down Expand Up @@ -295,10 +363,35 @@ mod tests {
) {
let _ = src;
let elements = tokens_for;
let result = parse_name_type_pairs(elements.into_iter());
let (result, errors) = parse_name_type_pairs(elements.into_iter());
assert!(errors.is_empty());
assert_eq!(result, expected);
}

#[test]
fn unmatched_shift_errors() {
let src = "function bad(x: Vec<u8>>): bool {}";
let elements = tokens_for(src);
let (_pairs, errors) = parse_name_type_pairs(elements.into_iter());
assert_eq!(errors.len(), 1);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

#[test]
fn unmatched_bracket_error() {
let src = "function bad(x: Vec<u8>], y: u32) {}";
let elements = tokens_for(src);
let (_pairs, errors) = parse_name_type_pairs(elements.into_iter());
assert_eq!(errors.len(), 1);
}

#[test]
fn unmatched_brace_error() {
let src = "function bad(x: u32}, y: bool) {}";
let elements = tokens_for(src);
let (_pairs, errors) = parse_name_type_pairs(elements.into_iter());
assert_eq!(errors.len(), 1);
}

#[rstest]
#[case("function f(): u32 {}", Some("u32".to_string()))]
#[case("extern function f(): bool;", Some("bool".to_string()))]
Expand Down
16 changes: 14 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1309,9 +1309,15 @@ pub mod ast {
}

/// Columns declared for the relation.
///
/// Delimiter errors detected during parsing are ignored.
/// This may change in future to surface these diagnostics.
#[must_use]
pub fn columns(&self) -> Vec<(String, String)> {
parse_name_type_pairs(self.syntax.children_with_tokens())
let (pairs, errors) = parse_name_type_pairs(self.syntax.children_with_tokens());
// Delimiter errors are ignored for now. Future versions may surface them.
let _ = errors;
pairs
}

/// Primary key column names if specified.
Expand Down Expand Up @@ -1617,9 +1623,15 @@ pub mod ast {
}

/// Function parameters as name/type pairs.
///
/// Delimiter errors detected during parsing are ignored.
/// This may change in future to surface these diagnostics.
#[must_use]
pub fn parameters(&self) -> Vec<(String, String)> {
parse_name_type_pairs(self.syntax.children_with_tokens())
let (pairs, errors) = parse_name_type_pairs(self.syntax.children_with_tokens());
// Delimiter errors are ignored for now. Future versions may surface them.
let _ = errors;
pairs
}

/// Return type text if specified.
Expand Down