From c0093541d6f71cb25f227a8650dcaf3264d774d4 Mon Sep 17 00:00:00 2001 From: LunaStev Date: Mon, 22 Dec 2025 19:36:07 +0900 Subject: [PATCH] feat: add char and bool literal support with escape sequences Implement character literals, boolean literals, and improve numeric literal parsing with proper hexadecimal support. Changes: - Add char literal tokenization in lexer: - Parse single-quoted characters ('A', 'x', etc.) - Support escape sequences (\n, \t, \r, \\, \', \") - Validate proper char literal termination - Add TokenType::CharLiteral(char) variant - Add boolean literal keywords: - Recognize "true" and "false" as language keywords - Add TokenType::BoolLiteral(bool) variant - Parse directly to boolean values in lexer - Refactor numeric literal parsing: - Fix hexadecimal literal handling (0x prefix) - Separate hex, decimal, and float parsing logic - Remove intermediate string manipulation - Handle digit accumulation character-by-character - Properly consume digits after decimal point - Extend AST literal types: - Add Literal::Bool(bool) for boolean values - Add Literal::Char(char) for character values - Add Literal::Byte(u8) for byte values (alias for char) - Implement LLVM IR generation for new literals: - Bool: Generate i1 const_int (0 or 1) - Char: Generate i8 const_int with ASCII value - Byte: Generate i8 const_int with byte value - Add variable initialization support in codegen: - Handle Bool, Char, Byte literals in store operations - Convert literal values to appropriate LLVM types - Support initialization with literal values - Fix clang warning suppression: - Add -Wno-override-module flag to silence module warnings - Remove duplicate stdin pipe declaration - Update test suite: - test71.wave: Demonstrate all supported type literals - Show i32, u32, f32, str, bool, byte, char, ptr, array usage Example usage: var flag: bool = true; var letter: char = 'A'; var newline: char = '\n'; var hex: i32 = 0xFF; This completes Wave's basic literal support with industry-standard syntax for characters, booleans, and hexadecimal numbers. Signed-off-by: LunaStev --- front/lexer/src/lexer/lexer.rs | 79 ++++++++++++++----- front/lexer/src/lexer/token.rs | 2 + front/parser/src/parser/ast.rs | 3 + front/parser/src/parser/format.rs | 8 ++ .../src/llvm_temporary/expression.rs | 18 +++++ .../src/llvm_temporary/llvm_backend.rs | 2 +- .../src/llvm_temporary/statement.rs | 24 +++++- test/test71.wave | 12 +++ 8 files changed, 126 insertions(+), 22 deletions(-) create mode 100644 test/test71.wave diff --git a/front/lexer/src/lexer/lexer.rs b/front/lexer/src/lexer/lexer.rs index 61164a36..16846280 100644 --- a/front/lexer/src/lexer/lexer.rs +++ b/front/lexer/src/lexer/lexer.rs @@ -477,6 +477,14 @@ impl<'a> Lexer<'a> { lexeme: ",".to_string(), line: self.line, }, + '\'' => { + let value = self.char_literal(); + Token { + token_type: TokenType::CharLiteral(value), + lexeme: format!("'{}'", value), + line: self.line, + } + }, '"' => { let string_value = self.string(); Token { @@ -758,6 +766,16 @@ impl<'a> Lexer<'a> { lexeme: "break".to_string(), line: self.line, }, + "true" => Token { + token_type: TokenType::BoolLiteral(true), + lexeme: "true".to_string(), + line: self.line, + }, + "false" => Token { + token_type: TokenType::BoolLiteral(false), + lexeme: "false".to_string(), + line: self.line, + }, _ => Token { token_type: TokenType::Identifier(identifier.clone()), lexeme: identifier, @@ -766,48 +784,43 @@ impl<'a> Lexer<'a> { } } '0'..='9' => { - let mut num_str = self.number().to_string(); // 첫 숫자만 읽음 - - // 16진수 접두사 체크 - if num_str == "0" && (self.peek() == 'x' || self.peek() == 'X') { - num_str.push(self.advance()); // 'x' 붙이기 + if c == '0' && (self.peek() == 'x' || self.peek() == 'X') { + self.advance(); // consume 'x' or 'X' + let mut hex_str = String::new(); while self.peek().is_ascii_hexdigit() { - num_str.push(self.advance()); + hex_str.push(self.advance()); } - let value = i64::from_str_radix(&num_str[2..], 16).unwrap_or(0); + let value = i64::from_str_radix(&hex_str, 16).unwrap_or(0); + return Token { token_type: TokenType::Number(value), - lexeme: num_str, + lexeme: format!("0x{}", hex_str), line: self.line, }; } - // float 처리 + let mut num_str = c.to_string(); + while self.peek().is_ascii_digit() { + num_str.push(self.advance()); + } + let is_float = if self.peek() == '.' { num_str.push('.'); self.advance(); - while self.peek().is_ascii_digit() { - num_str.push(self.advance()); // 소수점 뒤 숫자 + num_str.push(self.advance()); } true } else { false }; - // 일반 숫자/실수 토큰 결정 let token_type = if is_float { - num_str - .parse::() - .map(TokenType::Float) - .unwrap_or(TokenType::Float(0.0)) + num_str.parse::().map(TokenType::Float).unwrap() } else { - num_str - .parse::() - .map(TokenType::Number) - .unwrap_or(TokenType::Number(0)) + num_str.parse::().map(TokenType::Number).unwrap() }; Token { @@ -872,6 +885,32 @@ impl<'a> Lexer<'a> { string_literal } + fn char_literal(&mut self) -> char { + let c = if self.peek() == '\\' { + self.advance(); // consume '\' + let escaped = self.advance(); + match escaped { + 'n' => '\n', + 't' => '\t', + 'r' => '\r', + '\\' => '\\', + '\'' => '\'', + '"' => '"', + _ => panic!("Invalid escape sequence in char literal"), + } + } else { + self.advance() + }; + + if self.peek() != '\'' { + panic!("Unterminated or invalid char literal"); + } + + self.advance(); // closing ' + + c + } + fn identifier(&mut self) -> String { let start = if self.current > 0 { self.current - 1 diff --git a/front/lexer/src/lexer/token.rs b/front/lexer/src/lexer/token.rs index 028e58e9..e7d6e163 100644 --- a/front/lexer/src/lexer/token.rs +++ b/front/lexer/src/lexer/token.rs @@ -164,4 +164,6 @@ pub enum TokenType { Proto, Struct, TypeVoid, + CharLiteral(char), + BoolLiteral(bool), } diff --git a/front/parser/src/parser/ast.rs b/front/parser/src/parser/ast.rs index 4376c26e..71b34a30 100644 --- a/front/parser/src/parser/ast.rs +++ b/front/parser/src/parser/ast.rs @@ -133,6 +133,9 @@ pub enum Literal { Number(i64), Float(f64), String(String), + Bool(bool), + Char(char), + Byte(u8), } #[derive(Debug, Clone)] diff --git a/front/parser/src/parser/format.rs b/front/parser/src/parser/format.rs index 279f2158..8fcc665d 100644 --- a/front/parser/src/parser/format.rs +++ b/front/parser/src/parser/format.rs @@ -257,6 +257,14 @@ where tokens.next(); Some(Expression::Literal(Literal::Float(*value))) } + TokenType::CharLiteral(c) => { + tokens.next(); + Some(Expression::Literal(Literal::Char(*c))) + } + TokenType::BoolLiteral(b) => { + tokens.next(); + Some(Expression::Literal(Literal::Bool(*b))) + } TokenType::Identifier(name) => { let name = name.clone(); tokens.next(); diff --git a/llvm_temporary/src/llvm_temporary/expression.rs b/llvm_temporary/src/llvm_temporary/expression.rs index 5608801c..ab13a665 100644 --- a/llvm_temporary/src/llvm_temporary/expression.rs +++ b/llvm_temporary/src/llvm_temporary/expression.rs @@ -72,6 +72,24 @@ pub fn generate_expression_ir<'ctx>( gep.as_basic_value_enum() }, + Literal::Bool(v) => { + context + .bool_type() + .const_int(if *v { 1 } else { 0 }, false) + .as_basic_value_enum() + }, + Literal::Char(c) => { + context + .i8_type() + .const_int(*c as u64, false) + .as_basic_value_enum() + }, + Literal::Byte(b) => { + context + .i8_type() + .const_int(*b as u64, false) + .as_basic_value_enum() + } _ => unimplemented!("Unsupported literal type"), }, diff --git a/llvm_temporary/src/llvm_temporary/llvm_backend.rs b/llvm_temporary/src/llvm_temporary/llvm_backend.rs index 1cc3ad0d..236c6909 100644 --- a/llvm_temporary/src/llvm_temporary/llvm_backend.rs +++ b/llvm_temporary/src/llvm_temporary/llvm_backend.rs @@ -13,7 +13,7 @@ pub fn compile_ir_to_object(ir: &str, file_stem: &str, opt_flag: &str) -> String .arg("-") .arg("-o") .arg(&object_path) - .stdin(std::process::Stdio::piped()) + .arg("-Wno-override-module") .stdin(std::process::Stdio::piped()) .spawn() .expect("Failed to execute clang"); diff --git a/llvm_temporary/src/llvm_temporary/statement.rs b/llvm_temporary/src/llvm_temporary/statement.rs index 6e53e9bc..0f713aed 100644 --- a/llvm_temporary/src/llvm_temporary/statement.rs +++ b/llvm_temporary/src/llvm_temporary/statement.rs @@ -111,10 +111,32 @@ pub fn generate_statement_ir<'ctx>( ( Expression::Literal(Literal::Float(value)), BasicTypeEnum::FloatType(float_type), - ) => { + ) + => { let init_value = float_type.const_float(*value); builder.build_store(alloca, init_value).unwrap(); } + ( + Expression::Literal(Literal::Bool(v)), + BasicTypeEnum::IntType(int_ty), + ) => { + let val = int_ty.const_int(if *v { 1 } else { 0 }, false); + builder.build_store(alloca, val).unwrap(); + } + ( + Expression::Literal(Literal::Char(c)), + BasicTypeEnum::IntType(int_ty), + ) => { + let val = int_ty.const_int(*c as u64, false); + builder.build_store(alloca, val).unwrap(); + } + ( + Expression::Literal(Literal::Byte(b)), + BasicTypeEnum::IntType(int_ty), + ) => { + let val = int_ty.const_int(*b as u64, false); + builder.build_store(alloca, val).unwrap(); + } (Expression::Literal(Literal::Float(value)), _) => { let float_value = context.f32_type().const_float(*value); diff --git a/test/test71.wave b/test/test71.wave new file mode 100644 index 00000000..3f3e119a --- /dev/null +++ b/test/test71.wave @@ -0,0 +1,12 @@ +fun main() { + var a: i32 = 10; + var b: u32 = 20; + var c: f32 = 10.1; + var d: str = "Wave"; + var e: bool = true; + var f: byte = 0xFF; + var g: char = 'A'; + var h: ptr = &a; + var i: array = [1, 2, 3, 4, 5]; + println("{}\n {}\n {}\n {}\n {}\n {}\n {}\n {}\n {}", a, b, c, d, e, f, g, h, i); +} \ No newline at end of file