Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions implants/lib/eldritch/eldritch-core/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,51 @@ impl Lexer {
let initial_start = self.current;
let mut nesting_level = 1;

while nesting_level > 0 && !self.is_at_end() && self.peek() != '\n' {
if self.peek() == '{' {
while nesting_level > 0 && !self.is_at_end() {
let c = self.peek();

if c == '\'' || c == '"' {
let quote = c;
self.advance(); // consume opening quote
while !self.is_at_end() {
let sc = self.peek();
if sc == quote {
self.advance();
break;
} else if sc == '\\' {
self.advance();
if !self.is_at_end() {
self.advance();
}
} else if sc == '\n' {
self.line += 1;
self.advance();
} else {
self.advance();
}
}
continue;
}

if c == '#' {
while !self.is_at_end() && self.peek() != '\n' {
self.advance();
}
continue;
}

if c == '\n' {
self.line += 1;
self.advance();
continue;
}

if c == '{' {
nesting_level += 1;
} else if self.peek() == '}' {
} else if c == '}' {
nesting_level -= 1;
}

if nesting_level > 0 {
self.advance();
}
Expand Down
14 changes: 12 additions & 2 deletions implants/lib/eldritch/eldritch-core/tests/lexer_more_edges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ fn test_fstring_unmatched_brace() {
// Must use triple quotes so newline doesn't terminate the string before we see the inner error
let input = "f\"\"\"{\n\"\"\"";
let tokens = lex(input);
// The result should be FStringContent containing the Error
// With enhanced f-string expression parsing (which consumes strings inside),
// an unclosed brace might lead to the expression parser consuming the closing quotes
// if they look like a string start.
// So we accept either "Unmatched '{'" (inner error) OR "Unterminated string literal" (outer error).
match &tokens[0] {
TokenKind::FStringContent(inner) => {
let error_found = inner.iter().any(|t| match &t.kind {
Expand All @@ -51,7 +54,14 @@ fn test_fstring_unmatched_brace() {
inner
);
}
_ => panic!("Expected FStringContent, got {:?}", tokens[0]),
TokenKind::Error(msg) => {
assert!(
msg.contains("Unterminated string literal") || msg.contains("Unmatched '{'"),
"Unexpected error message: {}",
msg
);
}
_ => panic!("Expected FStringContent or Error, got {:?}", tokens[0]),
}
}

Expand Down
130 changes: 130 additions & 0 deletions implants/lib/eldritch/eldritch-core/tests/lexer_new_coverage.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use eldritch_core::{Lexer, TokenKind};

fn lex(source: &str) -> Vec<TokenKind> {
let mut lexer = Lexer::new(String::from(source));
lexer.scan_tokens().into_iter().map(|t| t.kind).collect()
}

#[test]
fn test_fstring_nested_braces() {
// f"{ {x} }" -> FStringContent containing [LBrace, Identifier(x), RBrace] (inside outer braces)
// Wait, the outer braces are consumed by `tokenize_fstring_expression`.
// The inner content is `{x}`.
// So the tokens returned by `tokenize_fstring_expression` are LParen, tokens_of_expr, RParen.
// The expression is `{x}` which is a Set containing x.
// So tokens inside FStringContent should be: LParen, LBrace, Identifier(x), RBrace, RParen.
let input = "f\"{ {x} }\"";
let tokens = lex(input);

if let TokenKind::FStringContent(inner_tokens) = &tokens[0] {
let kinds: Vec<TokenKind> = inner_tokens.iter().map(|t| t.kind.clone()).collect();
let expected = vec![
TokenKind::LParen,
TokenKind::LBrace,
TokenKind::Identifier(String::from("x")),
TokenKind::RBrace,
TokenKind::RParen,
];
assert_eq!(kinds, expected, "Nested braces failed");
} else {
panic!("Expected FStringContent, got {:?}", tokens[0]);
}
}

#[test]
fn test_fstring_quote_containing_brace() {
// f"{ '}' }" -> String containing '}'
// Currently likely fails as '}' closes the expression early.
let input = "f\"{ '}' }\"";
let tokens = lex(input);

if let TokenKind::FStringContent(inner_tokens) = &tokens[0] {
let kinds: Vec<TokenKind> = inner_tokens.iter().map(|t| t.kind.clone()).collect();
let expected = vec![
TokenKind::LParen,
TokenKind::String(String::from("}")), // The string literal contains "}"
TokenKind::RParen,
];
assert_eq!(kinds, expected, "Quote containing brace failed");
} else {
panic!("Expected FStringContent, got {:?}", tokens[0]);
}
}

#[test]
fn test_fstring_double_quote_containing_brace() {
// f'{ "}" }'
let input = "f'{ \"}\" }'";
let tokens = lex(input);

if let TokenKind::FStringContent(inner_tokens) = &tokens[0] {
let kinds: Vec<TokenKind> = inner_tokens.iter().map(|t| t.kind.clone()).collect();
let expected = vec![
TokenKind::LParen,
TokenKind::String(String::from("}")),
TokenKind::RParen,
];
assert_eq!(kinds, expected, "Double quote containing brace failed");
} else {
panic!("Expected FStringContent, got {:?}", tokens[0]);
}
}

#[test]
fn test_indentation_inside_parens() {
// Indentation inside parentheses should be ignored (implicit line joining)
let input = "(\n x\n)";
let tokens = lex(input);
let expected = vec![
TokenKind::LParen,
TokenKind::Identifier(String::from("x")),
TokenKind::RParen,
TokenKind::Newline,
TokenKind::Eof,
];
assert_eq!(tokens, expected);
}

#[test]
fn test_indentation_with_comment() {
// Indentation with comment on the same line
// " # comment\n pass" -> Indent, Newline, Pass, Dedent
// The comment line counts as a line for indentation (emitting Indent), but then Newline.
let input = "if True:\n # comment\n pass";
let tokens = lex(input);
let expected = vec![
TokenKind::If,
TokenKind::True,
TokenKind::Colon,
TokenKind::Newline,
TokenKind::Indent,
TokenKind::Newline, // from the comment line
TokenKind::Pass,
TokenKind::Newline,
TokenKind::Dedent,
TokenKind::Eof,
];
assert_eq!(tokens, expected);
}

#[test]
fn test_multiline_fstring_expression() {
// Multiline expression inside f-string
// f"""{ \n 1 + 1 \n }""" -> should work and ignore newlines inside expression
let input = "f\"\"\"{ \n 1 + 1 \n }\"\"\"";
let tokens = lex(input);

if let TokenKind::FStringContent(inner_tokens) = &tokens[0] {
let kinds: Vec<TokenKind> = inner_tokens.iter().map(|t| t.kind.clone()).collect();
let expected = vec![
TokenKind::LParen,
TokenKind::Integer(1),
TokenKind::Plus,
TokenKind::Integer(1),
TokenKind::RParen,
];
assert_eq!(kinds, expected, "Multiline f-string expression failed");
} else {
panic!("Expected FStringContent, got {:?}", tokens[0]);
}
}
Loading