Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 126 additions & 6 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,17 @@ pub fn parse(src: &str) -> Parsed {
}
}

/// Identifies and collects the spans of `import` and `typedef` statements in a token stream.
///
/// Returns tuples containing the spans of `import` statements, `typedef`/`extern type` declarations,
/// and any parse errors encountered during import span collection.
///
/// # Examples
///
/// ```no_run
/// let (imports, typedefs, errors) = parse_tokens(&tokens, src);
/// assert!(imports.iter().all(|span| span.start < span.end));
/// ```
fn parse_tokens(
tokens: &[(SyntaxKind, Span)],
src: &str,
Expand All @@ -113,16 +124,38 @@ fn parse_tokens(
(import_spans, typedef_spans, errors)
}

/// Scan the token stream for `import` statements and record their spans.
/// Scans the token stream for `import` statements and collects their spans.
///
/// Returns the list of spans and any parse errors encountered while
/// recovering from malformed import statements.
/// Parses the token stream to identify well-formed `import` statements, recording the
/// corresponding spans. If a malformed `import` statement is encountered, attempts to
/// recover by skipping to the end of the line and records any parse errors encountered
/// during recovery.
///
/// # Returns
///
/// A tuple containing a vector of spans for valid `import` statements and a vector of
/// parse errors for malformed statements.
///
/// # Examples
///
/// ```no_run
/// use parser::{collect_import_spans, SyntaxKind, Span};
///
/// let tokens: Vec<(SyntaxKind, Span)> = /* tokenized source */;
/// let src = "import foo::bar as baz;";
/// let (import_spans, errors) = collect_import_spans(&tokens, src);
/// assert!(!import_spans.is_empty());
/// ```
fn collect_import_spans(
tokens: &[(SyntaxKind, Span)],
src: &str,
) -> (Vec<Span>, Vec<Simple<SyntaxKind>>) {
type State<'a> = SpanCollector<'a, Vec<Simple<SyntaxKind>>>;

/// Attempts to parse an `import` statement at the given span, recording its span or collecting errors.
///
/// If parsing succeeds, the span of the `import` statement is added to the state's span list and the token stream is advanced past it.
/// On failure, errors are collected and the stream is advanced to the end of the current line.
fn handle_import(st: &mut State<'_>, span: Span) {
let ws = filter(|kind: &SyntaxKind| {
matches!(kind, SyntaxKind::T_WHITESPACE | SyntaxKind::T_COMMENT)
Expand Down Expand Up @@ -174,13 +207,37 @@ fn collect_import_spans(
st.into_parts()
}

/// Collect the spans of `typedef` and `extern type` declarations.
/// Collects the spans of `typedef` and `extern type` declarations in the token stream.
///
/// Spans cover the full declaration line so tokens can be grouped into
/// `N_TYPE_DEF` nodes later when building the CST.
/// Each span covers the entire line of the declaration, enabling grouping of tokens into
/// `N_TYPE_DEF` nodes during CST construction. Only `extern type` declarations are recognised
/// for `extern` statements; other forms are skipped.
///
/// # Returns
///
/// A vector of spans, each representing a `typedef` or `extern type` declaration.
///
/// # Examples
///
/// ```no_run
/// let tokens = tokenize("typedef Foo = Bar;\nextern type Baz;\n", None);
/// let spans = collect_typedef_spans(&tokens, "typedef Foo = Bar;\nextern type Baz;\n");
/// assert_eq!(spans.len(), 2);
/// ```
fn collect_typedef_spans(tokens: &[(SyntaxKind, Span)], src: &str) -> Vec<Span> {
type State<'a> = SpanCollector<'a, ()>;

/// Handles a `typedef` token by advancing the token stream to the end of the line and recording the span.
///
/// Records the span from the start of the `typedef` token to the end of the line in the state's span list.
///
/// # Examples
///
/// ```no_run
/// // Given a State positioned at a typedef token:
/// handle_typedef(&mut state, typedef_span);
/// // The span from the typedef to the line end is recorded in state.spans.
/// ```
fn handle_typedef(st: &mut State<'_>, span: Span) {
let start = span.start;
st.stream.advance();
Expand All @@ -189,6 +246,18 @@ fn collect_typedef_spans(tokens: &[(SyntaxKind, Span)], src: &str) -> Vec<Span>
st.spans.push(start..end);
}

/// Handles an `extern` declaration, collecting the span if it is an `extern type` statement.
///
/// Advances the token stream past the `extern` keyword and any inline whitespace. If the next
/// token is `type`, advances past it and collects the span up to the end of the line. Otherwise,
/// skips the remainder of the line without collecting a span.
///
/// # Examples
///
/// ```no_run
/// // Used internally during typedef span collection:
/// handle_extern(&mut state, span);
/// ```
fn handle_extern(st: &mut State<'_>, span: Span) {
let start = span.start;
st.stream.advance();
Expand Down Expand Up @@ -528,6 +597,18 @@ mod tests {
use crate::tokenize;
use rstest::rstest;

/// Tests that `skip_until` advances the token stream cursor past the specified span end.
///
/// # Examples
///
/// ```no_run
/// let src = "import foo\n";
/// let tokens = tokenize(src);
/// let mut stream = TokenStream::new(&tokens, src);
/// let end = stream.line_end(0);
/// stream.skip_until(end);
/// assert_eq!(stream.cursor(), tokens.len());
/// ```
#[rstest]
fn skip_until_advances_past_span() {
let src = "import foo\n";
Expand All @@ -538,6 +619,19 @@ mod tests {
assert_eq!(stream.cursor(), tokens.len());
}

/// Tests that `TokenStream::line_end` returns the position immediately after the end of the current line.
///
/// # Examples
///
/// ```no_run
/// let src = "typedef A = string\nnext";
/// let tokens = tokenize(src);
/// let stream = TokenStream::new(&tokens, src);
/// let start = 1; // token after 'typedef'
/// let end = stream.line_end(start);
/// let newline = src.find('\n').unwrap_or_else(|| panic!("newline missing"));
/// assert_eq!(end, newline + 1);
/// ```
#[rstest]
fn line_end_returns_span_end() {
let src = "typedef A = string\nnext";
Expand All @@ -549,6 +643,21 @@ mod tests {
assert_eq!(end, newline + 1);
}

/// Tests that `skip_ws_inline` correctly skips inline whitespace tokens in the token stream.
///
/// # Examples
///
/// ```no_run
/// let src = "extern type Foo";
/// let tokens = tokenize(src);
/// let mut stream = TokenStream::new(&tokens, src);
/// stream.advance();
/// stream.skip_ws_inline();
/// assert!(matches!(
/// stream.peek().map(|t| t.0),
/// Some(SyntaxKind::K_TYPE)
/// ));
/// ```
#[rstest]
fn skip_ws_inline_skips_spaces() {
let src = "extern type Foo";
Expand All @@ -562,6 +671,17 @@ mod tests {
));
}

/// Tests that `line_end` returns the length of the source string when called with an out-of-bounds index.
///
/// # Examples
///
/// ```no_run
/// let src = "typedef A = string\n";
/// let tokens = tokenize(src);
/// let stream = TokenStream::new(&tokens, src);
/// let start = tokens.len();
/// assert_eq!(stream.line_end(start), src.len());
/// ```
#[rstest]
fn line_end_out_of_bounds_returns_len() {
let src = "typedef A = string\n";
Expand Down
23 changes: 22 additions & 1 deletion src/parser/span_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,28 @@ pub(crate) struct SpanCollector<'a, Extra> {
}

impl<'a, Extra> SpanCollector<'a, Extra> {
/// Create a new collector over `tokens`.
/// Constructs a new `SpanCollector` for the given token stream, source string, and extra state.
///
/// # Parameters
///
/// - `tokens`: Slice of token and span pairs to be scanned.
/// - `src`: The source string corresponding to the tokens.
/// - `extra`: Additional state required for parsing logic.
///
/// # Returns
///
/// A `SpanCollector` instance ready to collect statement spans during parsing.
///
/// # Examples
///
/// ```no_run
/// use crate::parser::{SpanCollector, SyntaxKind, Span};
///
/// let tokens: &[(SyntaxKind, Span)] = &[];
/// let src = "";
/// let extra = ();
/// let collector = SpanCollector::new(tokens, src, extra);
/// ```
#[must_use]
pub(crate) fn new(tokens: &'a [(SyntaxKind, Span)], src: &'a str, extra: Extra) -> Self {
Self {
Expand Down
112 changes: 103 additions & 9 deletions src/parser/token_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,16 @@ pub(crate) struct TokenStream<'a> {
}

impl<'a> TokenStream<'a> {
/// Create a new stream over `tokens`.
/// Constructs a new `TokenStream` over the provided tokens and source text.
///
/// The stream starts with the cursor at the beginning of the token slice.
///
/// # Examples
///
/// ```no_run
/// let stream = TokenStream::new(&tokens, src);
/// assert_eq!(stream.cursor(), 0);
/// ```
#[must_use]
pub(crate) fn new(tokens: &'a [(SyntaxKind, Span)], src: &'a str) -> Self {
Self {
Expand All @@ -37,38 +46,92 @@ impl<'a> TokenStream<'a> {
}
}

/// Current cursor position.
/// Returns the current cursor position within the token stream.
///
/// # Examples
///
/// ```no_run
/// let stream = TokenStream::new(tokens, src);
/// let pos = stream.cursor();
/// assert_eq!(pos, 0);
/// ```
#[must_use]
pub(crate) fn cursor(&self) -> usize {
self.cursor
}

/// Peek at the token under the cursor.
/// Returns the token at the current cursor position, if any.
///
/// Returns `None` if the cursor is at or beyond the end of the token stream.
///
/// # Examples
///
/// ```no_run
/// let stream = TokenStream::new(&tokens, src);
/// if let Some((kind, span)) = stream.peek() {
/// // Inspect the current token
/// }
/// ```
#[must_use]
pub(crate) fn peek(&self) -> Option<(SyntaxKind, Span)> {
self.tokens.get(self.cursor).cloned()
}

/// Advance the cursor by one token.
/// Moves the cursor forward by one token if not already at the end of the token stream.
///
/// # Examples
///
/// ```no_run
/// let mut stream = TokenStream::new(tokens, src);
/// stream.advance();
/// assert_eq!(stream.cursor(), 1);
/// ```
pub(crate) fn advance(&mut self) {
if self.cursor < self.tokens.len() {
self.cursor += 1;
}
}

/// Access the underlying token slice.
/// Returns a reference to the underlying slice of tokens.
///
/// # Examples
///
/// ```no_run
/// let stream = TokenStream::new(&tokens, src);
/// let all_tokens = stream.tokens();
/// assert_eq!(all_tokens.len(), tokens.len());
/// ```
#[must_use]
pub(crate) fn tokens(&self) -> &[(SyntaxKind, Span)] {
self.tokens
}

/// Access the source text.
/// Returns a reference to the source text associated with this token stream.
///
/// # Examples
///
/// ```no_run
/// let stream = TokenStream::new(tokens, "let x = 1;");
/// assert_eq!(stream.src(), "let x = 1;");
/// ```
#[must_use]
pub(crate) fn src(&self) -> &str {
self.src
}

/// Advance past tokens whose span ends before or at `end`.
/// Advances the cursor past all tokens whose span ends at or before the specified position.
///
/// Tokens are skipped until a token is found whose span end is greater than `end`, or until no tokens remain.
///
/// # Examples
///
/// ```no_run
/// use parser::token_stream::TokenStream;
/// // Assume tokens is a Vec<(SyntaxKind, Span)> and src is the source string.
/// let mut stream = TokenStream::new(&tokens, src);
/// stream.skip_until(42);
/// // The cursor now points to the first token whose span ends after position 42.
/// ```
pub(crate) fn skip_until(&mut self, end: usize) {
while let Some(span) = self.tokens.get(self.cursor).map(|t| &t.1) {
if span.end <= end {
Expand All @@ -79,7 +142,18 @@ impl<'a> TokenStream<'a> {
}
}

/// Return the position one past the newline after `start` or the source length.
/// Returns the position immediately after the next newline character following the token at `start`, or the end of the source if no newline is found.
///
/// Iterates through tokens starting at the given index, updating the end position to each token's span end. Stops at the first token whose span contains a newline character, or returns the source length if no such token exists.
///
/// # Examples
///
/// ```no_run
/// let tokens = lex("foo\nbar");
/// let stream = TokenStream::new(&tokens, "foo\nbar");
/// let pos = stream.line_end(0);
/// assert_eq!(pos, 4); // position after '\n'
/// ```
#[must_use]
pub(crate) fn line_end(&self, start: usize) -> usize {
let mut end = self.tokens.get(start).map_or(self.src.len(), |t| t.1.end);
Expand All @@ -92,7 +166,27 @@ impl<'a> TokenStream<'a> {
end
}

/// Skip whitespace and comments that do not contain newlines.
/// Advances the cursor past whitespace and comment tokens that do not contain newlines.
///
/// Skips over consecutive whitespace or comment tokens as long as their spans do not
/// include a newline character. Stops at the first token that is not whitespace/comment
/// or contains a newline.
///
/// # Examples
///
/// ```no_run
/// use parser::{TokenStream, SyntaxKind, Span};
///
/// let src = "let x = 42; // comment";
/// let tokens = vec![
/// (SyntaxKind::T_WHITESPACE, Span::new(0, 1)),
/// (SyntaxKind::T_COMMENT, Span::new(10, 20)),
/// (SyntaxKind::T_IDENT, Span::new(21, 22)),
/// ];
/// let mut stream = TokenStream::new(&tokens, src);
/// stream.skip_ws_inline();
/// assert_eq!(stream.cursor(), 2);
/// ```
pub(crate) fn skip_ws_inline(&mut self) {
while let Some(tok) = self.tokens.get(self.cursor) {
if matches!(tok.0, SyntaxKind::T_WHITESPACE | SyntaxKind::T_COMMENT)
Expand Down