diff --git a/data/readme.md b/data/readme.md new file mode 100644 index 0000000..4dfe8d3 --- /dev/null +++ b/data/readme.md @@ -0,0 +1,161 @@ +# QuasiLang Syntax Guide + +QuasiLang is the source language parsed by **QuasiPiler**. +Sample programs: `test00.qc`–`test12.qc`. + +--- + +## Basics + +* **Comments** + * Line: `// comment` + * Block: `/* comment */` — may be inline or multi-line. +* **Whitespace**: ignored except as a separator. +* **Identifiers**: letters, digits, `_` (cannot start with a digit). +* **Literals** + * Numbers: integers, floats, exponents. + * Strings: `'...'` or `"..."` with escapes (`\n`, `\t`, `\\`). + * Booleans: `true`, `false` + * Null: `null` + +--- + +## Expressions + +* **Arithmetic & Assignment**: `+ - * / %` with `= += -= *= /= %=` +* **Comparison & Logic**: `== != < <= > >=` and `&& || !` +* **Bitwise & Shift**: `& | ^ << >>` with `&= |= ^= <<= >>=` +* **Increment/Decrement**: prefix and postfix `++ --` +* **Ternary**: `cond ? a : b` +* **Member access**: `obj.key`, `obj["key"]` +* **Indexing & slicing**: + * Single index: `arr[i]` + * Slices: `arr[start:end:step]` + * Any part may be omitted: `arr[:end]`, `arr[start:]`, `arr[::step]`, `arr[::]` +* **Function calls**: `f(arg1, arg2)` +* **Function declaration**: `fu(x,y){ return x+y; }` + +--- + +## Data Structures + +* **Lists**: `[1, 2, 3]` +* **Objects**: `{ "key": value, "other": 42 }` + *Keys must be constant strings.* +* **Tuples `()`** + + * Foreach bundles (abstract streams) + * Not concrete lists unless materialized with `list(...)` + +### Tuple-based foreach + +Tuples drive vectorized foreach expansion when applied after an expression: + +1. **Member/Index tuple** + + ```qc + obj.(a, b, c) // (obj.a, obj.b, obj.c) + arr[(i, j, k)] // (arr[i], arr[j], arr[k]) + ``` + +2. **Operators with tuples** + + ```qc + 5 + (1,2,3) // (6, 7, 8) + list(5 + (1,2,3)) // [6, 7, 8] + ``` + +3. **Chaining** + + ```qc + obj.(a,b).(x,y) // (obj.a.x, obj.a.y, obj.b.x, obj.b.y) + arr[(1,4)].(id,total) // (arr[1].id, arr[1].total, arr[4].id, arr[4].total) + ``` + +4. **Slices with tuples** + + ```qc + arr[1:6:2][(2,3)] // (arr[1][2], arr[1][3], arr[3][2], arr[3][3], arr[5][2], arr[5][3]) + list(arr[1:6:2])[2] // arr[5] + ``` + +--- + +## Statements & Declarations + +* **Variables**: `name = expression;` +* **Functions**: `name(p1,p2){ ... }` or `name = fu(p1){ ... };` +* **Conditionals**: + + ```qc + if (cond) { ... } + elif (other) { ... } + else { ... } + ``` + + Bodies may omit braces for a single statement: + + ```qc + if (cond) do_something(); + while (ok) step(); + ``` + + Return with conditional: + + ```qc + return if(a){b} else c; // valid + return if(a) b; else c; // invalid due to precedence + ``` +* **Loops**: `while(cond){...}`, `for(init; cond; step){...}` + Both also allow single-statement bodies without `{}`. +* **Exceptions**: + + ```qc + try { ... } catch (err) { ... } finally { ... } + ``` +* **Jumps**: `break;`, `continue;`, `return expr;`, `goto label;` +* **Labels**: `label_name:` + +--- + +## Examples + +**Foreach with tuples** + +```qc +user = { "id": 7, "name": "Ada", "meta": { "city":"Paris", "tz":"CET" } }; + +user.(id, name) // 7, "Ada" +user.(meta).("city","tz") // "Paris", "CET" +orders[(0,2)].(id,total) // orders[0].id, orders[0].total, orders[2].id, orders[2].total +``` + +**Slices and foreach** + +```qc +matrix = [row0,row1,row2,row3,row4,row5]; +matrix[1:6:2][(2,3)]; // row1[2], row1[3], row3[2], row3[3], row5[2], row5[3] +list(matrix[1:6:2])[2]; // row5 +``` + +**Arithmetic with tuples** + +```qc +print(5 + (1,2,3)); // 6,7,8 +print(list(5 + (1,2,3))); // [6,7,8] +``` + +**Control flow** + +```qc +if (ready) start(); +while (i < n) i++; +return if(ok){result} else null; +``` + +**Chained functions** + +```qc +adder = fu(x){ return fu(y){ return fu(z){ return x+y+z; }; }; }; +sum = adder(1)(2)(3); +``` diff --git a/data/test07.qc b/data/test07.qc index e57e80b..202be4d 100644 --- a/data/test07.qc +++ b/data/test07.qc @@ -7,6 +7,8 @@ a + b += c, c = a + b, d; + [a + b, c + d * e]; + {a + b * c; d + e - f, g + h << (i >> j)} return a + b - c * d / e % f ^ g << h >> i | j & k; diff --git a/data/test08.qc b/data/test08.qc index c18b077..2ce6a41 100644 --- a/data/test08.qc +++ b/data/test08.qc @@ -8,4 +8,6 @@ main(a+b, c); main(a, b, c) + d; -a + main(a, b, c); \ No newline at end of file +a + main(a, b, c); + +main(a)(b)(c); \ No newline at end of file diff --git a/include/ast.hpp b/include/ast.hpp index 8fd21cb..428fac3 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -152,6 +152,22 @@ struct callexp_node : token_node { using callexp_ptr = std::shared_ptr; +struct imcallexp_node : ast_node { + ast_node_ptr callee; + ast_node_ptr paren; + bool has_paren { false }; + + explicit imcallexp_node(ast_node_ptr callee); + void set_paren(ast_node_ptr paren); + + const position& get_start() const override; + void dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full + ) const override; +}; + +using imcallexp_ptr = std::shared_ptr; + struct fundecl_node : callexp_node { ast_node_ptr body; bool has_body { false }; diff --git a/include/expression.hpp b/include/expression.hpp index 246b0d3..c2e998b 100644 --- a/include/expression.hpp +++ b/include/expression.hpp @@ -26,6 +26,7 @@ #define EXPRESSION_HPP #include "ast.hpp" +#include #include #include #include @@ -75,6 +76,11 @@ class expression { const std::vector& nodes, size_t pos, std::string_view op ); + static std::runtime_error make_error( + const std::string& message, const std::vector& expression, + const std::source_location& location = std::source_location::current() + ); + static const std::unordered_map> binary_ops; static const std::unordered_map prefix_ops; diff --git a/include/grouper.hpp b/include/grouper.hpp index 64dd756..78e0de8 100644 --- a/include/grouper.hpp +++ b/include/grouper.hpp @@ -26,6 +26,7 @@ #define GROUPER_HPP #include "ast.hpp" +#include /** * @brief Parses tokens into hierarchical groups and expressions. @@ -62,13 +63,25 @@ class grouper { [[nodiscard]] bool handle_chain(const group_ptr& result, const group_ptr& inode) const; + [[nodiscard]] static bool is_secondary_keyword(const std::string& kw); + [[nodiscard]] static std::string + keyword_from_node(const ast_node_ptr& node); + [[nodiscard]] group_ptr fetch_previous_command( + const group_ptr& result, const std::string& kw, const group_ptr& inode + ) const; + [[nodiscard]] std::string fetch_previous_keyword( + const group_ptr& prev, const std::string& kw, const group_ptr& inode + ) const; + void validate_chain( + const std::string& prev_kw, const std::string& kw, + const group_ptr& inode + ) const; + bool append_group( const group_ptr& result, const ast_node_ptr& node, bool& wait_for_condition, bool& wait_for_body, group_kind kind ) const; - void identify_body(const group_ptr& group) const; - void identify(const group_ptr& group, const group_ptr& result) const; /** * @brief Transform token groups representing arithmetic into AST nodes. diff --git a/include/reader.hpp b/include/reader.hpp index 52d3291..703d4c8 100644 --- a/include/reader.hpp +++ b/include/reader.hpp @@ -145,6 +145,12 @@ class reader { * token_kind::floating. */ token_kind read_number(std::string& into); + // Helpers for numeric literal parsing + bool check_digit() const noexcept; + void read_digits(std::string& into); + void read_integer_part(std::string& into); + bool read_fraction_part(std::string& into); + bool read_exponent_part(std::string& into); void init_token(token& t) const noexcept; /** diff --git a/readme.md b/readme.md index 77054e7..f6a0554 100644 --- a/readme.md +++ b/readme.md @@ -38,165 +38,11 @@ This repo is my sanctuary under license — it begs mercy, not stars. I’ll bel ``` * ``: path to your QuasiCode file -## QuasiLang Syntax Guide +## QuasiLang -### Basics +**See [QuasiLang Syntax Guide](data/readme.md) for the full syntax guide.** -* **Comments** - - * Line comments: `// This is a comment` - * Block comments: - - ```qc - /* - This is a block comment - */ - ``` - -* **Whitespace** is ignored except as a separator. - -* **Identifiers** consist of letters, digits, and underscores, but cannot start with a digit. - -* **Literals** - - * **Numbers:** Support integer and floating-point (with optional exponent). - * **Strings:** Can use single `'` or double `"` quotes. Common escape sequences (like `\n`, `\t`, `\\`, etc.) are supported. - -* **Separators and Grouping** - - * Separators: `,` (comma), `;` (semicolon), `:` (colon) - * Grouping: - - * `()` for expressions and function parameters - * `[]` for lists and indexing - * `{}` for code blocks and objects - ---- - -### Expressions - -* **Arithmetic & Assignment:** - `+`, `-`, `*`, `/`, `%`, `=`, `+=`, `-=`, `*=`, `/=`, `%=` -* **Comparison & Logic:** - `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!` -* **Bitwise:** - `&`, `|`, `^`, `<<`, `>>`, and compound assignments (`&=`, `|=`, etc.) -* **Increment/Decrement:** - `++`, `--` (prefix and postfix) -* **Member Access:** - `obj.key` or `obj["key"]` -* **Indexing & Slicing:** - `arr[2]`, `arr[1:4]`, `arr[::2]` -* **Function Calls:** - `func(arg1, arg2)` - ---- - -### Statements and Declarations - -* **Variable Assignment:** - `name = expression;` -* **Function Declaration:** - `name(param1, param2) { ... }` -* **Control Flow:** - - * **Conditional:** - - ```qc - if (cond) { ... } - elif (other) { ... } - else { ... } - ``` - * **Loops:** - - * While: `while (cond) { ... }` - * For: `for (init; cond; step) { ... }` - * **Jump Statements:** - - * `break;` - * `continue;` - * `return;` or `return expr;` - * `goto label_name;` - * **Exception Handling:** - - ```qc - try { - // code - } catch (err) { - // handler - } finally { - // cleanup - } - ``` -* **Labels:** - Define with `label_name:` and use with `goto label_name;` - ---- - -### Data Structures - -* **Lists:** - `[item1, item2, ...]` -* **Dictionaries/Objects:** - `{ "key": value, "other": 42 }` - ---- - -### Example Programs - -**Hello World** - -```qc -// hello.qc -greeting = "Hello, world!"; -print(greeting); -``` - -**Basic Control Flow** - -```qc -numbers = [1, 2, 3, 4, 5]; -sum = 0; - -for(i = 0; i < len(numbers); i++) { - sum += numbers[i]; -} - -if (sum > 15) { - print("Sum is greater than 15"); -} elif (sum > 10) { - print("Sum is greater than 10"); -} else { - print("Sum is", sum); -} -``` - -**Functions and Error Handling** - -```qc -safe_div(a, b) { - try { - return a / b; - } catch (...) { - print("Error: Division by zero"); - return null; - } -} - -result = safe_div(10, 0); -``` - -**Labels and Goto (Rare, but Supported)** - -```qc -counter = 0; -start: - counter++; - if (counter < 3) { - goto start; - } -print("Done!"); -``` +**See [include](include/) and [src](src/) for implementation, and check [tests](tests/) for more examples.** --- diff --git a/src/ast.cpp b/src/ast.cpp index b778540..ddae320 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -237,6 +237,34 @@ void callexp_node::dump( } } +imcallexp_node::imcallexp_node(ast_node_ptr c) + : callee(std::move(c)) { + fixed_size += callee->fixed_size; + full_size += callee->full_size; +} + +void imcallexp_node::set_paren(ast_node_ptr p) { + paren = std::move(p); + has_paren = true; + fixed_size += paren->fixed_size; + full_size += paren->full_size; +} + +const position& imcallexp_node::get_start() const { + return callee->get_start(); +} + +void imcallexp_node::dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full +) const { + os << prefix << (is_last ? "`-" : "|-") << "ImplicitCall\n"; + const std::string child_prefix = prefix + (is_last ? " " : "| "); + callee->dump(os, child_prefix, !has_paren, full); + if (has_paren) { + paren->dump(os, child_prefix, true, full); + } +} + fundecl_node::fundecl_node(const callexp_ptr& proto) : callexp_node(proto ? proto->value : token {}) { if (proto) { diff --git a/src/expression.cpp b/src/expression.cpp index 179c770..a8c401e 100644 --- a/src/expression.cpp +++ b/src/expression.cpp @@ -25,6 +25,8 @@ #include "expression.hpp" #include +#include +#include #include #include @@ -129,7 +131,7 @@ ast_node_ptr expression::parse_expression( auto middle = parse_expression(items, idx, 0); if (idx >= items.size() || !items[idx].is_op || items[idx].tok.word != ":") { - throw std::runtime_error("expected ':' in ternary expression"); + throw make_error("expected ':' in ternary expression", items); } token ctok = items[idx].tok; ++idx; @@ -168,7 +170,7 @@ ast_node_ptr expression::parse_prefix(std::vector& items, size_t& idx) { } } if (idx >= items.size()) { - throw std::runtime_error("unexpected end"); + throw make_error("unexpected end", items); } auto node = items[idx].node; ++idx; @@ -184,3 +186,28 @@ ast_node_ptr expression::parse_prefix(std::vector& items, size_t& idx) { } return node; } + +std::runtime_error expression::make_error( + const std::string& message, const std::vector& expression, + const std::source_location& location +) { + std::ostringstream oss; + oss << "[Expression-Error] " << message << ". "; + if (!expression.empty()) { + oss << "while parsing expression: "; + for (const auto& it : expression) { + if (it.is_op) { + oss << it.tok.word << ' '; + } else if (auto tn + = std::dynamic_pointer_cast(it.node)) { + oss << tn->value.word << ' '; + } else { + oss << " "; + } + } + oss << '\n'; + } + oss << "in file: " << location.file_name() << '(' << location.line() << ':' + << location.column() << ") `" << location.function_name() << "`"; + return std::runtime_error(oss.str()); +} diff --git a/src/grouper.cpp b/src/grouper.cpp index 72f3410..4e547e8 100644 --- a/src/grouper.cpp +++ b/src/grouper.cpp @@ -25,6 +25,7 @@ #include "grouper.hpp" #include "expression.hpp" +#include grouper::grouper(reader& r, const size_t limit) : src(r) @@ -50,7 +51,6 @@ group_ptr grouper::parse(const group_kind kind) { result->kind = kind; parse_group(kind, group); identify(group, result); - parse_arithmetic(result); return result; } @@ -119,59 +119,83 @@ group_ptr grouper::identify_subgroup(const group_ptr& group) const { inode->limit = limit; inode->kind = kind; identify(group, inode); - parse_arithmetic(inode); return inode; } +bool grouper::is_secondary_keyword(const std::string& kw) { + return kw == "else" || kw == "elif" || kw == "catch" || kw == "finally"; +} + +std::string grouper::keyword_from_node(const ast_node_ptr& node) { + if (const auto ctrl = std::dynamic_pointer_cast(node)) { + return ctrl->value.word; + } + if (const auto cond = std::dynamic_pointer_cast(node)) { + return cond->value.word; + } + return {}; +} + +group_ptr grouper::fetch_previous_command( + const group_ptr& result, const std::string& kw, const group_ptr& inode +) const { + if (result->empty()) { + throw make_error("orphan secondary keyword: " + kw, inode); + } + const auto prev + = std::dynamic_pointer_cast(result->nodes.back()); + if (!prev || prev->nodes.empty() || prev->kind != group_kind::command) { + throw make_error("invalid predecessor for keyword: " + kw, inode); + } + return prev; +} + +std::string grouper::fetch_previous_keyword( + const group_ptr& prev, const std::string& kw, const group_ptr& inode +) const { + const auto last = prev->nodes.back(); + if (const auto ctrl = std::dynamic_pointer_cast(last)) { + return ctrl->value.word; + } + if (const auto cond = std::dynamic_pointer_cast(last)) { + return cond->value.word; + } + throw make_error("invalid predecessor for keyword: " + kw, inode); +} + +void grouper::validate_chain( + const std::string& prev_kw, const std::string& kw, const group_ptr& inode +) const { + bool allowed = false; + if (kw == "else" || kw == "elif") { + allowed = (prev_kw == "if" || prev_kw == "elif"); + } else if (kw == "catch" || kw == "finally") { + allowed = (prev_kw == "try" || prev_kw == "catch"); + } + if (!allowed) { + throw make_error( + "unexpected keyword order: " + prev_kw + " before " + kw, inode + ); + } +} + bool grouper::handle_chain( const group_ptr& result, const group_ptr& inode ) const { const auto first = inode->nodes.front(); - std::string kw; - if (const auto ctrl = std::dynamic_pointer_cast(first)) { - kw = ctrl->value.word; - } else if (const auto cond - = std::dynamic_pointer_cast(first)) { - kw = cond->value.word; + const auto kw = keyword_from_node(first); + if (!is_secondary_keyword(kw)) { + return false; } - if (kw == "else" || kw == "elif" || kw == "catch" || kw == "finally") { - if (result->empty()) { - throw make_error("orphan secondary keyword: " + kw, inode); - } - const auto prev - = std::dynamic_pointer_cast(result->nodes.back()); - if (!prev || prev->nodes.empty() || prev->kind != group_kind::command) { - throw make_error("invalid predecessor for keyword: " + kw, inode); - } - const auto last = prev->nodes.back(); - std::string prev_kw; - if (const auto ctrl = std::dynamic_pointer_cast(last)) { - prev_kw = ctrl->value.word; - } else if (const auto cond - = std::dynamic_pointer_cast(last)) { - prev_kw = cond->value.word; - } else { - throw make_error("invalid predecessor for keyword: " + kw, inode); - } - bool allowed = false; - if (kw == "else" || kw == "elif") { - allowed = (prev_kw == "if" || prev_kw == "elif"); - } else if (kw == "catch" || kw == "finally") { - allowed = (prev_kw == "try" || prev_kw == "catch"); - } - if (!allowed) { - throw make_error( - "unexpected keyword order: " + prev_kw + " before " + kw, inode - ); - } - result->pop_back(); - for (auto& ch : inode->nodes) { - append(prev, ch); - } - append(result, prev); - return true; + const auto prev = fetch_previous_command(result, kw, inode); + const auto prev_kw = fetch_previous_keyword(prev, kw, inode); + validate_chain(prev_kw, kw, inode); + result->pop_back(); + for (auto& ch : inode->nodes) { + append(prev, ch); } - return false; + append(result, prev); + return true; } bool grouper::append_group( @@ -196,12 +220,19 @@ bool grouper::append_group( append(result, ctrl); return true; } - if (const auto callexp = std::dynamic_pointer_cast(top); - callexp && kind == group_kind::body) { - const auto fundecl = std::make_shared(callexp); - fundecl->set_body(node); - append(result, fundecl); - return true; + if (const auto callexp = std::dynamic_pointer_cast(top)) { + if (kind == group_kind::body) { + const auto fundecl = std::make_shared(callexp); + fundecl->set_body(node); + append(result, fundecl); + return true; + } + if (kind == group_kind::paren) { + const auto icall = std::make_shared(top); + icall->set_paren(node); + append(result, icall); + return true; + } } const auto tok = std::dynamic_pointer_cast(top); if (tok && tok->value.kind == token_kind::keyword @@ -211,40 +242,23 @@ bool grouper::append_group( append(result, callexp); return true; } + if (kind == group_kind::paren && !tok) { + const auto icall = std::make_shared(top); + icall->set_paren(node); + append(result, icall); + return true; + } append(result, top); } return false; } -void grouper::identify_body(const group_ptr& group) const { - const auto body = std::make_shared(); - body->limit = limit; - while (!group->empty()) { - auto top = group->nodes.back(); - group->pop_back(); - if (auto tok = std::dynamic_pointer_cast(top)) { - if (const auto ctrl - = std::dynamic_pointer_cast(tok)) { - ctrl->set_body(body); - append(group, ctrl); - break; - } - if (auto callexp = std::dynamic_pointer_cast(tok)) { - const auto fundecl = std::make_shared(callexp); - fundecl->set_body(body); - append(group, fundecl); - break; - } - } - append(body, top); - } -} - void grouper::identify(const group_ptr& group, const group_ptr& result) const { bool wait_for_condition = false; bool wait_for_body = false; - for (auto& node : group->nodes) { + for (size_t i = 0; i < group->nodes.size(); ++i) { + auto node = group->nodes[i]; bool is_group = false; group_kind kind {}; @@ -271,6 +285,31 @@ void grouper::identify(const group_ptr& group, const group_ptr& result) const { continue; } } + if (wait_for_body && !is_group) { + const auto tail = std::make_shared(); + tail->limit = limit; + for (; i < group->nodes.size(); ++i) { + append(tail, group->nodes[i]); + } + const auto body = std::make_shared(); + body->limit = limit; + identify(tail, body); + + const auto top = result->nodes.back(); + result->pop_back(); + if (const auto ctrl + = std::dynamic_pointer_cast(top)) { + ctrl->set_body(body); + append(result, ctrl); + } else if (auto callexp + = std::dynamic_pointer_cast(top)) { + const auto fundecl = std::make_shared(callexp); + fundecl->set_body(body); + append(result, fundecl); + } + wait_for_body = false; + continue; + } if (const auto tok = std::dynamic_pointer_cast(node)) { if (tok->value.kind == token_kind::keyword) { const auto& w = tok->value.word; @@ -298,8 +337,10 @@ void grouper::identify(const group_ptr& group, const group_ptr& result) const { } append(result, node); } - if (wait_for_body) { - identify_body(result); + try { + parse_arithmetic(result); + } catch (const std::runtime_error& e) { + throw make_error(e.what(), result); } } @@ -464,7 +505,7 @@ void grouper::parse_arithmetic(const group_ptr& group) const { } auto items = expression::make_items(group->nodes); size_t idx = 0; - auto expr = expression::parse_expression(items, idx, 0); + const auto expr = expression::parse_expression(items, idx, 0); if (idx == items.size()) { group->nodes.clear(); group->weights = {}; diff --git a/src/reader.cpp b/src/reader.cpp index e2acd27..95c0bbb 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -246,49 +246,68 @@ void reader::read_string(std::string& into) { advance_char(); } -token_kind reader::read_number(std::string& into) { - into.clear(); - bool is_float = false; +void reader::read_digits(std::string& into) { + if (!check_digit()) { + throw make_error("digit expected after decimal"); + } + do { + into += get_char(); + } while (check_digit()); +} + +void reader::read_integer_part(std::string& into) { if (is_valid() && peek_char() == '0') { into += get_char(); - if (is_valid() && std::isdigit(peek_uchar())) { + if (check_digit()) { throw make_error("leading zeros not allowed"); } - } else if (is_valid() && std::isdigit(peek_uchar())) { - do { - into += get_char(); - } while (is_valid() && std::isdigit(peek_uchar())); } else { - throw make_error("expected digit"); + read_digits(into); } +} +bool reader::read_fraction_part(std::string& into) { if (is_valid() && peek_char() == '.') { - is_float = true; into += get_char(); - if (!is_valid() || !std::isdigit(peek_uchar())) { - throw make_error("digit expected after decimal"); - } - while (is_valid() && std::isdigit(peek_uchar())) { + read_digits(into); + return true; + } + return false; +} + +bool reader::read_exponent_part(std::string& into) { + if (!is_valid()) { + return false; + } + char c = peek_char(); + if (std::tolower(c) != 'e') { + return false; + } + into += get_char(); + if (is_valid()) { + c = peek_char(); + if (c == '+' || c == '-') { into += get_char(); } } + read_digits(into); + return true; +} - if (is_valid() && (peek_char() == 'e' || peek_char() == 'E')) { +token_kind reader::read_number(std::string& into) { + into.clear(); + read_integer_part(into); + bool is_float = read_fraction_part(into); + if (read_exponent_part(into)) { is_float = true; - into += get_char(); - if (is_valid() && (peek_char() == '+' || peek_char() == '-')) { - into += get_char(); - } - if (!is_valid() || !std::isdigit(peek_uchar())) { - throw make_error("digit expected after exponent"); - } - while (is_valid() && std::isdigit(peek_uchar())) { - into += get_char(); - } } return is_float ? token_kind::floating : token_kind::integer; } +bool reader::check_digit() const noexcept { + return is_valid() && std::isdigit(peek_uchar()); +} + void reader::init_token(token& t) const noexcept { t.word.clear(); t.pos = get_position(); diff --git a/tests/arithmetic_tests.cpp b/tests/arithmetic_tests.cpp index 64f42d0..a387e03 100644 --- a/tests/arithmetic_tests.cpp +++ b/tests/arithmetic_tests.cpp @@ -33,7 +33,7 @@ TEST(ArithmeticTest, ParseBinary) { std::string input = "a+b"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); @@ -47,7 +47,7 @@ TEST(ArithmeticTest, ParsePrefixUnary) { std::string input = "+a"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); @@ -61,7 +61,7 @@ TEST(ArithmeticTest, ParsePostfixUnary) { std::string input = "a++"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); @@ -115,7 +115,7 @@ TEST(ExpressionTest, TernaryBranches) { idx = 0; n = expression::parse_expression(items, idx, 3); - auto tok = std::dynamic_pointer_cast(n); + const auto tok = std::dynamic_pointer_cast(n); ASSERT_TRUE(tok); EXPECT_EQ(tok->value.word, "a"); EXPECT_EQ(idx, 1u); diff --git a/tests/identify_tests.cpp b/tests/identify_tests.cpp index 22656fe..26d9ad2 100644 --- a/tests/identify_tests.cpp +++ b/tests/identify_tests.cpp @@ -110,6 +110,26 @@ TEST(IdentifierTest, IdentifyCallExpression) { EXPECT_TRUE(call->has_paren); } +TEST(IdentifierTest, IdentifyImplicitCallExpression) { + std::string input = "main(a)(b)"; + reader r { input }; + grouper g { r }; + + auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* icall = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(icall, nullptr); + ASSERT_TRUE(icall->has_paren); + + auto* inner = dynamic_cast(icall->callee.get()); + ASSERT_NE(inner, nullptr); + EXPECT_EQ(inner->value.word, "main"); + EXPECT_TRUE(inner->has_paren); +} + TEST(IdentifierTest, IdentifyFunctionDecl) { std::string input = "main(a){b}"; reader r { input }; @@ -142,3 +162,173 @@ TEST(IdentifierTest, IdentifyReturnStatement) { EXPECT_EQ(jmp->value.word, "return"); EXPECT_TRUE(jmp->has_body); } + +TEST(IdentifierTest, ParseGotoExpression) { + std::string input = "return a+b"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + auto* bin = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); +} + +TEST(IdentifierTest, ParseComplexReturnExpression) { + std::string input = "return ((x * x + y * y + ((x << y) - (y << x))) ^ (y " + "| x)) + ((x % 7) * (y % 5));"; + reader r { input }; + grouper g { r }; + EXPECT_NO_THROW(g.parse()); +} + +TEST(IdentifierTest, ParseElseExpression) { + std::string input = "if(a)b;else c+d"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_GE(halt->size(), 2u); + auto* els = dynamic_cast(halt->nodes.back().get()); + ASSERT_NE(els, nullptr); + ASSERT_TRUE(els->has_body); + auto* body = dynamic_cast(els->body.get()); + ASSERT_NE(body, nullptr); + auto* bin = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); +} + +TEST(IdentifierTest, ParseReturnIfElse) { + std::string input = "return if(a){b}else c"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + ASSERT_EQ(body->size(), 2u); + + auto* cond = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(cond, nullptr); + EXPECT_EQ(cond->value.word, "if"); + EXPECT_TRUE(cond->has_paren); + EXPECT_TRUE(cond->has_body); + + auto* els = dynamic_cast(body->nodes[1].get()); + ASSERT_NE(els, nullptr); + EXPECT_EQ(els->value.word, "else"); + EXPECT_TRUE(els->has_body); +} + +TEST(IdentifierTest, ParseReturnIfElseExpression) { + std::string input = "return if(a){b+c}else c(d)e"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + ASSERT_EQ(body->size(), 2u); + + auto* cond = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(cond, nullptr); + auto* cbody = dynamic_cast(cond->body.get()); + ASSERT_NE(cbody, nullptr); + auto* ccmd = dynamic_cast(cbody->nodes[0].get()); + ASSERT_NE(ccmd, nullptr); + auto* bin = dynamic_cast(ccmd->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); + + auto* els = dynamic_cast(body->nodes[1].get()); + ASSERT_NE(els, nullptr); + auto* ebody = dynamic_cast(els->body.get()); + ASSERT_NE(ebody, nullptr); + if (auto* icall = dynamic_cast(ebody->nodes[0].get())) { + ASSERT_TRUE(icall->has_paren); + auto* inner = dynamic_cast(icall->callee.get()); + ASSERT_NE(inner, nullptr); + EXPECT_EQ(inner->value.word, "c"); + EXPECT_TRUE(inner->has_paren); + } else { + ASSERT_GE(ebody->size(), 2u); + auto* call = dynamic_cast(ebody->nodes[0].get()); + ASSERT_NE(call, nullptr); + EXPECT_EQ(call->value.word, "c"); + EXPECT_TRUE(call->has_paren); + auto* tail = dynamic_cast(ebody->nodes[1].get()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->value.word, "e"); + } +} + +TEST(IdentifierTest, IdentifyLoops) { + for (auto [input, kw] : + { std::pair { "while(a){b}", "while" }, + { "for(a;b;c){d}", "for" } }) { + reader r { input }; + grouper g { r }; + + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* loop = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(loop, nullptr); + EXPECT_EQ(loop->value.word, kw); + EXPECT_TRUE(loop->is_loop); + EXPECT_TRUE(loop->has_paren); + EXPECT_TRUE(loop->has_body); + } +} + +TEST(IdentifierTest, IdentifyJumpStatements) { + for (auto [input, kw, has_body] : + { std::tuple { "break", "break", + false }, + { "continue", "continue", false }, + { "goto a", "goto", true }, + { "return b", "return", true } }) { + reader r { input }; + grouper g { r }; + + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + EXPECT_EQ(jmp->value.word, kw); + EXPECT_EQ(jmp->has_body, has_body); + if (has_body) { + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + auto* tok = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(tok, nullptr); + EXPECT_EQ(tok->value.word, kw == "goto" ? "a" : "b"); + } + } +} \ No newline at end of file