diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 02dac9a5..325fd0c9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -103,6 +103,17 @@ pub fn parse(src: &str) -> Parsed { } } +/// Identifies and collects the spans of `import` and `typedef` statements in a token stream. +/// +/// Returns tuples containing the spans of `import` statements, `typedef`/`extern type` declarations, +/// and any parse errors encountered during import span collection. +/// +/// # Examples +/// +/// ```no_run +/// let (imports, typedefs, errors) = parse_tokens(&tokens, src); +/// assert!(imports.iter().all(|span| span.start < span.end)); +/// ``` fn parse_tokens( tokens: &[(SyntaxKind, Span)], src: &str, @@ -113,16 +124,38 @@ fn parse_tokens( (import_spans, typedef_spans, errors) } -/// Scan the token stream for `import` statements and record their spans. +/// Scans the token stream for `import` statements and collects their spans. /// -/// Returns the list of spans and any parse errors encountered while -/// recovering from malformed import statements. +/// Parses the token stream to identify well-formed `import` statements, recording the +/// corresponding spans. If a malformed `import` statement is encountered, attempts to +/// recover by skipping to the end of the line and records any parse errors encountered +/// during recovery. +/// +/// # Returns +/// +/// A tuple containing a vector of spans for valid `import` statements and a vector of +/// parse errors for malformed statements. +/// +/// # Examples +/// +/// ```no_run +/// use parser::{collect_import_spans, SyntaxKind, Span}; +/// +/// let tokens: Vec<(SyntaxKind, Span)> = /* tokenized source */; +/// let src = "import foo::bar as baz;"; +/// let (import_spans, errors) = collect_import_spans(&tokens, src); +/// assert!(!import_spans.is_empty()); +/// ``` fn collect_import_spans( tokens: &[(SyntaxKind, Span)], src: &str, ) -> (Vec, Vec>) { type State<'a> = SpanCollector<'a, Vec>>; + /// Attempts to parse an `import` statement at the given span, recording its span or collecting errors. + /// + /// If parsing succeeds, the span of the `import` statement is added to the state's span list and the token stream is advanced past it. + /// On failure, errors are collected and the stream is advanced to the end of the current line. fn handle_import(st: &mut State<'_>, span: Span) { let ws = filter(|kind: &SyntaxKind| { matches!(kind, SyntaxKind::T_WHITESPACE | SyntaxKind::T_COMMENT) @@ -174,13 +207,37 @@ fn collect_import_spans( st.into_parts() } -/// Collect the spans of `typedef` and `extern type` declarations. +/// Collects the spans of `typedef` and `extern type` declarations in the token stream. /// -/// Spans cover the full declaration line so tokens can be grouped into -/// `N_TYPE_DEF` nodes later when building the CST. +/// Each span covers the entire line of the declaration, enabling grouping of tokens into +/// `N_TYPE_DEF` nodes during CST construction. Only `extern type` declarations are recognised +/// for `extern` statements; other forms are skipped. +/// +/// # Returns +/// +/// A vector of spans, each representing a `typedef` or `extern type` declaration. +/// +/// # Examples +/// +/// ```no_run +/// let tokens = tokenize("typedef Foo = Bar;\nextern type Baz;\n", None); +/// let spans = collect_typedef_spans(&tokens, "typedef Foo = Bar;\nextern type Baz;\n"); +/// assert_eq!(spans.len(), 2); +/// ``` fn collect_typedef_spans(tokens: &[(SyntaxKind, Span)], src: &str) -> Vec { type State<'a> = SpanCollector<'a, ()>; + /// Handles a `typedef` token by advancing the token stream to the end of the line and recording the span. + /// + /// Records the span from the start of the `typedef` token to the end of the line in the state's span list. + /// + /// # Examples + /// + /// ```no_run + /// // Given a State positioned at a typedef token: + /// handle_typedef(&mut state, typedef_span); + /// // The span from the typedef to the line end is recorded in state.spans. + /// ``` fn handle_typedef(st: &mut State<'_>, span: Span) { let start = span.start; st.stream.advance(); @@ -189,6 +246,18 @@ fn collect_typedef_spans(tokens: &[(SyntaxKind, Span)], src: &str) -> Vec st.spans.push(start..end); } + /// Handles an `extern` declaration, collecting the span if it is an `extern type` statement. + /// + /// Advances the token stream past the `extern` keyword and any inline whitespace. If the next + /// token is `type`, advances past it and collects the span up to the end of the line. Otherwise, + /// skips the remainder of the line without collecting a span. + /// + /// # Examples + /// + /// ```no_run + /// // Used internally during typedef span collection: + /// handle_extern(&mut state, span); + /// ``` fn handle_extern(st: &mut State<'_>, span: Span) { let start = span.start; st.stream.advance(); @@ -528,6 +597,18 @@ mod tests { use crate::tokenize; use rstest::rstest; + /// Tests that `skip_until` advances the token stream cursor past the specified span end. + /// + /// # Examples + /// + /// ```no_run + /// let src = "import foo\n"; + /// let tokens = tokenize(src); + /// let mut stream = TokenStream::new(&tokens, src); + /// let end = stream.line_end(0); + /// stream.skip_until(end); + /// assert_eq!(stream.cursor(), tokens.len()); + /// ``` #[rstest] fn skip_until_advances_past_span() { let src = "import foo\n"; @@ -538,6 +619,19 @@ mod tests { assert_eq!(stream.cursor(), tokens.len()); } + /// Tests that `TokenStream::line_end` returns the position immediately after the end of the current line. + /// + /// # Examples + /// + /// ```no_run + /// let src = "typedef A = string\nnext"; + /// let tokens = tokenize(src); + /// let stream = TokenStream::new(&tokens, src); + /// let start = 1; // token after 'typedef' + /// let end = stream.line_end(start); + /// let newline = src.find('\n').unwrap_or_else(|| panic!("newline missing")); + /// assert_eq!(end, newline + 1); + /// ``` #[rstest] fn line_end_returns_span_end() { let src = "typedef A = string\nnext"; @@ -549,6 +643,21 @@ mod tests { assert_eq!(end, newline + 1); } + /// Tests that `skip_ws_inline` correctly skips inline whitespace tokens in the token stream. + /// + /// # Examples + /// + /// ```no_run + /// let src = "extern type Foo"; + /// let tokens = tokenize(src); + /// let mut stream = TokenStream::new(&tokens, src); + /// stream.advance(); + /// stream.skip_ws_inline(); + /// assert!(matches!( + /// stream.peek().map(|t| t.0), + /// Some(SyntaxKind::K_TYPE) + /// )); + /// ``` #[rstest] fn skip_ws_inline_skips_spaces() { let src = "extern type Foo"; @@ -562,6 +671,17 @@ mod tests { )); } + /// Tests that `line_end` returns the length of the source string when called with an out-of-bounds index. + /// + /// # Examples + /// + /// ```no_run + /// let src = "typedef A = string\n"; + /// let tokens = tokenize(src); + /// let stream = TokenStream::new(&tokens, src); + /// let start = tokens.len(); + /// assert_eq!(stream.line_end(start), src.len()); + /// ``` #[rstest] fn line_end_out_of_bounds_returns_len() { let src = "typedef A = string\n"; diff --git a/src/parser/span_collector.rs b/src/parser/span_collector.rs index 4ea27b69..5db036d9 100644 --- a/src/parser/span_collector.rs +++ b/src/parser/span_collector.rs @@ -18,7 +18,28 @@ pub(crate) struct SpanCollector<'a, Extra> { } impl<'a, Extra> SpanCollector<'a, Extra> { - /// Create a new collector over `tokens`. + /// Constructs a new `SpanCollector` for the given token stream, source string, and extra state. + /// + /// # Parameters + /// + /// - `tokens`: Slice of token and span pairs to be scanned. + /// - `src`: The source string corresponding to the tokens. + /// - `extra`: Additional state required for parsing logic. + /// + /// # Returns + /// + /// A `SpanCollector` instance ready to collect statement spans during parsing. + /// + /// # Examples + /// + /// ```no_run + /// use crate::parser::{SpanCollector, SyntaxKind, Span}; + /// + /// let tokens: &[(SyntaxKind, Span)] = &[]; + /// let src = ""; + /// let extra = (); + /// let collector = SpanCollector::new(tokens, src, extra); + /// ``` #[must_use] pub(crate) fn new(tokens: &'a [(SyntaxKind, Span)], src: &'a str, extra: Extra) -> Self { Self { diff --git a/src/parser/token_stream.rs b/src/parser/token_stream.rs index 533d6bf4..7e3d610c 100644 --- a/src/parser/token_stream.rs +++ b/src/parser/token_stream.rs @@ -27,7 +27,16 @@ pub(crate) struct TokenStream<'a> { } impl<'a> TokenStream<'a> { - /// Create a new stream over `tokens`. + /// Constructs a new `TokenStream` over the provided tokens and source text. + /// + /// The stream starts with the cursor at the beginning of the token slice. + /// + /// # Examples + /// + /// ```no_run + /// let stream = TokenStream::new(&tokens, src); + /// assert_eq!(stream.cursor(), 0); + /// ``` #[must_use] pub(crate) fn new(tokens: &'a [(SyntaxKind, Span)], src: &'a str) -> Self { Self { @@ -37,38 +46,92 @@ impl<'a> TokenStream<'a> { } } - /// Current cursor position. + /// Returns the current cursor position within the token stream. + /// + /// # Examples + /// + /// ```no_run + /// let stream = TokenStream::new(tokens, src); + /// let pos = stream.cursor(); + /// assert_eq!(pos, 0); + /// ``` #[must_use] pub(crate) fn cursor(&self) -> usize { self.cursor } - /// Peek at the token under the cursor. + /// Returns the token at the current cursor position, if any. + /// + /// Returns `None` if the cursor is at or beyond the end of the token stream. + /// + /// # Examples + /// + /// ```no_run + /// let stream = TokenStream::new(&tokens, src); + /// if let Some((kind, span)) = stream.peek() { + /// // Inspect the current token + /// } + /// ``` #[must_use] pub(crate) fn peek(&self) -> Option<(SyntaxKind, Span)> { self.tokens.get(self.cursor).cloned() } - /// Advance the cursor by one token. + /// Moves the cursor forward by one token if not already at the end of the token stream. + /// + /// # Examples + /// + /// ```no_run + /// let mut stream = TokenStream::new(tokens, src); + /// stream.advance(); + /// assert_eq!(stream.cursor(), 1); + /// ``` pub(crate) fn advance(&mut self) { if self.cursor < self.tokens.len() { self.cursor += 1; } } - /// Access the underlying token slice. + /// Returns a reference to the underlying slice of tokens. + /// + /// # Examples + /// + /// ```no_run + /// let stream = TokenStream::new(&tokens, src); + /// let all_tokens = stream.tokens(); + /// assert_eq!(all_tokens.len(), tokens.len()); + /// ``` #[must_use] pub(crate) fn tokens(&self) -> &[(SyntaxKind, Span)] { self.tokens } - /// Access the source text. + /// Returns a reference to the source text associated with this token stream. + /// + /// # Examples + /// + /// ```no_run + /// let stream = TokenStream::new(tokens, "let x = 1;"); + /// assert_eq!(stream.src(), "let x = 1;"); + /// ``` #[must_use] pub(crate) fn src(&self) -> &str { self.src } - /// Advance past tokens whose span ends before or at `end`. + /// Advances the cursor past all tokens whose span ends at or before the specified position. + /// + /// Tokens are skipped until a token is found whose span end is greater than `end`, or until no tokens remain. + /// + /// # Examples + /// + /// ```no_run + /// use parser::token_stream::TokenStream; + /// // Assume tokens is a Vec<(SyntaxKind, Span)> and src is the source string. + /// let mut stream = TokenStream::new(&tokens, src); + /// stream.skip_until(42); + /// // The cursor now points to the first token whose span ends after position 42. + /// ``` pub(crate) fn skip_until(&mut self, end: usize) { while let Some(span) = self.tokens.get(self.cursor).map(|t| &t.1) { if span.end <= end { @@ -79,7 +142,18 @@ impl<'a> TokenStream<'a> { } } - /// Return the position one past the newline after `start` or the source length. + /// Returns the position immediately after the next newline character following the token at `start`, or the end of the source if no newline is found. + /// + /// Iterates through tokens starting at the given index, updating the end position to each token's span end. Stops at the first token whose span contains a newline character, or returns the source length if no such token exists. + /// + /// # Examples + /// + /// ```no_run + /// let tokens = lex("foo\nbar"); + /// let stream = TokenStream::new(&tokens, "foo\nbar"); + /// let pos = stream.line_end(0); + /// assert_eq!(pos, 4); // position after '\n' + /// ``` #[must_use] pub(crate) fn line_end(&self, start: usize) -> usize { let mut end = self.tokens.get(start).map_or(self.src.len(), |t| t.1.end); @@ -92,7 +166,27 @@ impl<'a> TokenStream<'a> { end } - /// Skip whitespace and comments that do not contain newlines. + /// Advances the cursor past whitespace and comment tokens that do not contain newlines. + /// + /// Skips over consecutive whitespace or comment tokens as long as their spans do not + /// include a newline character. Stops at the first token that is not whitespace/comment + /// or contains a newline. + /// + /// # Examples + /// + /// ```no_run + /// use parser::{TokenStream, SyntaxKind, Span}; + /// + /// let src = "let x = 42; // comment"; + /// let tokens = vec![ + /// (SyntaxKind::T_WHITESPACE, Span::new(0, 1)), + /// (SyntaxKind::T_COMMENT, Span::new(10, 20)), + /// (SyntaxKind::T_IDENT, Span::new(21, 22)), + /// ]; + /// let mut stream = TokenStream::new(&tokens, src); + /// stream.skip_ws_inline(); + /// assert_eq!(stream.cursor(), 2); + /// ``` pub(crate) fn skip_ws_inline(&mut self) { while let Some(tok) = self.tokens.get(self.cursor) { if matches!(tok.0, SyntaxKind::T_WHITESPACE | SyntaxKind::T_COMMENT)