From 7831a3ccc319ba432432a75da4e4de65d6d7a7a9 Mon Sep 17 00:00:00 2001 From: Yaroslav Riabtsev Date: Sat, 23 Aug 2025 19:02:02 +0200 Subject: [PATCH 1/4] unwrapped bodies fix --- data/test07.qc | 2 + data/test08.qc | 4 +- include/ast.hpp | 62 ++++++++++-- include/expression.hpp | 39 ++++++- include/grouper.hpp | 74 ++++++++++++-- include/reader.hpp | 62 ++++++++++-- src/ast.cpp | 28 +++++ src/expression.cpp | 125 ++++++++++------------- src/grouper.cpp | 202 ++++++++++++++++++++++--------------- src/reader.cpp | 25 +++-- tests/arithmetic_tests.cpp | 67 +++++++++--- tests/ast_tests.cpp | 10 +- tests/grouper_tests.cpp | 55 +++++++++- tests/identify_tests.cpp | 190 ++++++++++++++++++++++++++++++++++ tests/reader_tests.cpp | 45 +++++++++ 15 files changed, 782 insertions(+), 208 deletions(-) diff --git a/data/test07.qc b/data/test07.qc index e57e80b..202be4d 100644 --- a/data/test07.qc +++ b/data/test07.qc @@ -7,6 +7,8 @@ a + b += c, c = a + b, d; + [a + b, c + d * e]; + {a + b * c; d + e - f, g + h << (i >> j)} return a + b - c * d / e % f ^ g << h >> i | j & k; diff --git a/data/test08.qc b/data/test08.qc index c18b077..2ce6a41 100644 --- a/data/test08.qc +++ b/data/test08.qc @@ -8,4 +8,6 @@ main(a+b, c); main(a, b, c) + d; -a + main(a, b, c); \ No newline at end of file +a + main(a, b, c); + +main(a)(b)(c); \ No newline at end of file diff --git a/include/ast.hpp b/include/ast.hpp index 959a57b..428fac3 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -66,13 +66,30 @@ enum class group_kind { file, body, list, paren, command, item, key, halt }; [[nodiscard]] const char* group_kind_name(group_kind k) noexcept; +/** + * @brief Collection of AST nodes with a configurable size limit. + * + * Large sub-groups may be replaced with placeholder nodes to keep + * @c fixed_size within @c limit. + */ struct group_node : ast_node { - size_t limit; + size_t limit; ///< Maximum allowed node weight group_kind kind { group_kind::halt }; std::vector nodes; - std::priority_queue> - weights; /// node_size -> node_index - + /// queue of heavy child nodes: + std::priority_queue> weights; + + /** + * @brief Append a child node while respecting the size limit. + * + * Nodes contribute their @c fixed_size and @c full_size to the parent. If + * the accumulated @c fixed_size exceeds @c limit, larger child groups are + * replaced with ::placeholder_node instances so the tree can be lazily + * expanded later. + * + * @param node Node to append. + * @param src Reader used to reconstruct squeezed subtrees on demand. + */ void append(ast_node_ptr node, const reader& src); [[nodiscard]] bool empty() const noexcept override; [[nodiscard]] size_t size() const noexcept; @@ -81,7 +98,17 @@ struct group_node : ast_node { void dump( std::ostream& os, const std::string& prefix, bool is_last, bool full ) const override; - const position& get_start() const override; + [[nodiscard]] const position& get_start() const override; + /** + * @brief Replace a child group with a placeholder. + * + * The placeholder stores enough information to re-read the original subtree + * from @p src later. This is used when a group's @c fixed_size would exceed + * the configured limit and thus needs to be collapsed. + * + * @param index Index of the child to replace. + * @param src Reader used to recreate the subtree if needed. + */ void squeeze(size_t index, const reader& src); void pop_back(); }; @@ -95,7 +122,14 @@ struct wrapped_node : group_node { using wrapped_ptr = std::shared_ptr; -struct placeholder_node : wrapped_node { +/** + * @brief Node standing in place of a squeezed sub-tree. + * + * When a group exceeds the configured size limit it can be replaced by a + * placeholder node. The original reader is stored so the subtree can be + * reconstructed on demand. + */ +struct placeholder_node final : wrapped_node { reader* src { nullptr }; void dump( std::ostream& os, const std::string& prefix, bool is_last, bool full @@ -118,6 +152,22 @@ struct callexp_node : token_node { using callexp_ptr = std::shared_ptr; +struct imcallexp_node : ast_node { + ast_node_ptr callee; + ast_node_ptr paren; + bool has_paren { false }; + + explicit imcallexp_node(ast_node_ptr callee); + void set_paren(ast_node_ptr paren); + + const position& get_start() const override; + void dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full + ) const override; +}; + +using imcallexp_ptr = std::shared_ptr; + struct fundecl_node : callexp_node { ast_node_ptr body; bool has_body { false }; diff --git a/include/expression.hpp b/include/expression.hpp index e6800d9..c2e998b 100644 --- a/include/expression.hpp +++ b/include/expression.hpp @@ -26,30 +26,59 @@ #define EXPRESSION_HPP #include "ast.hpp" +#include +#include #include #include class expression { public: + /** + * @brief Element of the input stream for the expression parser. + * @details When @c is_op is set the item represents an operator token; + * otherwise it stores a pointer to an AST node. + */ struct item { bool is_op { false }; token tok; ast_node_ptr node; }; + /** + * @brief Split a raw node list into tokens and operands. + * + * Consecutive operator tokens are combined into multi-character operators + * such as += or ==. + */ static std::vector make_items(const std::vector& nodes); - + /** + * @brief Parse a binary/ternary expression from a token list. + * + * The function implements a Pratt style parser. @p min_prec + * specifies the minimal operator precedence accepted for the + * current recursion level. + * + * @param items Token/operand stream produced by make_items(). + * @param idx Current position within @p items, updated on return. + * @param min_prec Minimal precedence level to parse. + */ static ast_node_ptr parse_expression(std::vector& items, size_t& idx, int min_prec); - + /** + * @brief Parse a prefix expression and any trailing postfix operators. + */ static ast_node_ptr parse_prefix(std::vector& items, size_t& idx); private: - static token make_token(const token_node& tn, const std::string& word); + static token make_token(const token_node& tn, std::string_view word); static bool match_op( - const std::vector& nodes, size_t pos, - const std::string& op + const std::vector& nodes, size_t pos, std::string_view op + ); + + static std::runtime_error make_error( + const std::string& message, const std::vector& expression, + const std::source_location& location = std::source_location::current() ); static const std::unordered_map> diff --git a/include/grouper.hpp b/include/grouper.hpp index f201b3a..78e0de8 100644 --- a/include/grouper.hpp +++ b/include/grouper.hpp @@ -26,11 +26,21 @@ #define GROUPER_HPP #include "ast.hpp" +#include +/** + * @brief Parses tokens into hierarchical groups and expressions. + * + * The grouper is responsible for constructing the AST from a token stream. + * It handles bracket matching, command separation and expression parsing. + */ class grouper { public: explicit grouper(reader& r, size_t limit = 64); - + /** + * @brief Parse a sequence starting at the current reader position. + * @param kind Expected top-level group kind. + */ group_ptr parse(group_kind kind = group_kind::file); private: @@ -43,35 +53,77 @@ class grouper { void peek(); [[nodiscard]] group_ptr identify_subgroup(const group_ptr& group) const; - + /** + * @brief Attach @p inode to the last statement if it is a secondary + * keyword. + * + * Handles constructs like else or catch by merging them + * with the previous command group. + */ [[nodiscard]] bool handle_chain(const group_ptr& result, const group_ptr& inode) const; + [[nodiscard]] static bool is_secondary_keyword(const std::string& kw); + [[nodiscard]] static std::string + keyword_from_node(const ast_node_ptr& node); + [[nodiscard]] group_ptr fetch_previous_command( + const group_ptr& result, const std::string& kw, const group_ptr& inode + ) const; + [[nodiscard]] std::string fetch_previous_keyword( + const group_ptr& prev, const std::string& kw, const group_ptr& inode + ) const; + void validate_chain( + const std::string& prev_kw, const std::string& kw, + const group_ptr& inode + ) const; + bool append_group( const group_ptr& result, const ast_node_ptr& node, bool& wait_for_condition, bool& wait_for_body, group_kind kind ) const; - void identify_body(const group_ptr& group) const; - void identify(const group_ptr& group, const group_ptr& result) const; - + /** + * @brief Transform token groups representing arithmetic into AST nodes. + * + * Runs the expression parser over certain group kinds. If the entire group + * forms a valid expression, its children are replaced with the resulting + * expression subtree. + */ void parse_arithmetic(const group_ptr& group) const; - + /** + * @brief Close the current command when a separator is encountered. + */ bool append_command(group_ptr& group, group_ptr& top, group_kind kind) const; - + /** + * @brief Begin parsing of a bracketed sub-group. + * + * Pushes a new wrapped_node onto @p top when an opening bracket is + * encountered. + */ void append_wrapped(const group_ptr& top); - + /** + * @brief Finalize a wrapped sub-group when a closing bracket is seen. + */ void close_wrapped(const group_ptr& group, group_ptr& top, group_kind kind); - + /** + * @brief Parse a sequence of tokens into the supplied group. + * + * This is the core loop that recognises brackets and separators and + * builds the initial hierarchical structure. + */ void parse_group(group_kind kind, group_ptr& group); - + /** + * @brief Safely append a node to its parent group. + */ void append( const group_ptr& parent, const ast_node_ptr& node, const std::source_location& location = std::source_location::current() ) const; - + /** + * @brief Create a formatted runtime error describing a parse failure. + */ [[nodiscard]] std::runtime_error make_error( const std::string& message, const group_ptr& context = {}, const std::source_location& location = std::source_location::current() diff --git a/include/reader.hpp b/include/reader.hpp index 7a6b15d..8830c5f 100644 --- a/include/reader.hpp +++ b/include/reader.hpp @@ -29,10 +29,13 @@ #include #include +/** + * @brief Byte and line location within the input stream. + */ struct position { - std::streamoff offset; - int line; - int column; + std::streamoff offset; ///< absolute offset from the beginning of the file + int line; ///< zero based line number + int column; ///< zero based column number }; enum class token_kind { @@ -49,14 +52,14 @@ enum class token_kind { special_character }; -struct token { +struct token final { token_kind kind; position pos; std::string word; - virtual ~token(); + ~token(); - virtual void dump( + void dump( std::ostream& os, const std::string& prefix, bool is_last ) const noexcept; @@ -65,6 +68,13 @@ struct token { using token_ptr = std::shared_ptr; +/** + * @brief Lightweight tokenizer for QuasiLang source code. + * + * The reader reads from either a file or a memory buffer and produces + * tokens on demand via next_token(). Position information is tracked so + * callers can report meaningful diagnostics. + */ class reader { public: explicit reader( @@ -74,11 +84,19 @@ class reader { explicit reader(std::string& data) noexcept; ~reader(); - + /** + * @brief Read the next token from the input stream. + * + * @param out Token object to be filled with the parsed data. + */ void next_token(token& out); void jump_to_position(position pos); - + /** + * @brief Throw an exception with the current position information. + * + * Used by parsers to abort processing while preserving diagnostics. + */ void interrupt(); position get_position() const; @@ -108,15 +126,37 @@ class reader { void read_whitespace(std::string& into); void read_keyword(std::string& into); - + /** + * @brief Read a quoted string literal with escape handling. + * + * Supports common escape sequences and Unicode escapes of the + * form \uXXXX. The resulting decoded text is stored in + * @p into without the surrounding quotes. + * @throw std::runtime_error on malformed input. + */ void read_string(std::string& into); void read_comment(std::string& into); - + /** + * @brief Parse an integer or floating point literal. + * + * Digits are consumed according to the QuasiLang grammar. If a + * fractional part or exponent is present the returned kind is + * token_kind::floating. + */ token_kind read_number(std::string& into); + // Helpers for numeric literal parsing + void read_integer_part(std::string& into); + bool read_fraction_part(std::string& into); + bool read_exponent_part(std::string& into); void init_token(token& t) const noexcept; - + /** + * @brief Helper to create formatted runtime errors. + * + * In debug builds the message includes context information such + * as the current position and originating source location. + */ [[nodiscard]] std::runtime_error make_error( const std::string& message, const std::source_location& location = std::source_location::current() diff --git a/src/ast.cpp b/src/ast.cpp index b778540..ddae320 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -237,6 +237,34 @@ void callexp_node::dump( } } +imcallexp_node::imcallexp_node(ast_node_ptr c) + : callee(std::move(c)) { + fixed_size += callee->fixed_size; + full_size += callee->full_size; +} + +void imcallexp_node::set_paren(ast_node_ptr p) { + paren = std::move(p); + has_paren = true; + fixed_size += paren->fixed_size; + full_size += paren->full_size; +} + +const position& imcallexp_node::get_start() const { + return callee->get_start(); +} + +void imcallexp_node::dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full +) const { + os << prefix << (is_last ? "`-" : "|-") << "ImplicitCall\n"; + const std::string child_prefix = prefix + (is_last ? " " : "| "); + callee->dump(os, child_prefix, !has_paren, full); + if (has_paren) { + paren->dump(os, child_prefix, true, full); + } +} + fundecl_node::fundecl_node(const callexp_ptr& proto) : callexp_node(proto ? proto->value : token {}) { if (proto) { diff --git a/src/expression.cpp b/src/expression.cpp index 3556d77..a8c401e 100644 --- a/src/expression.cpp +++ b/src/expression.cpp @@ -24,6 +24,10 @@ #include "expression.hpp" +#include +#include +#include +#include #include const std::unordered_map> @@ -50,25 +54,25 @@ const std::unordered_map expression::prefix_ops const std::unordered_map expression::postfix_ops = { { "++", 14 }, { "--", 14 } }; -token expression::make_token(const token_node& tn, const std::string& word) { +token expression::make_token(const token_node& tn, std::string_view word) { token t = tn.value; t.word = word; return t; } bool expression::match_op( - const std::vector& nodes, const size_t pos, - const std::string& op + const std::vector& nodes, size_t pos, std::string_view op ) { if (pos + op.size() > nodes.size()) { return false; } - for (size_t i = 0; i < op.size(); ++i) { - if (const auto tn - = std::dynamic_pointer_cast(nodes[pos + i]); - !tn || tn->value.word != std::string(1, op[i])) { + size_t i = 0; + for (char c : op) { + const auto tn = std::dynamic_pointer_cast(nodes[pos + i]); + if (!tn || tn->value.word != std::string(1, c)) { return false; } + ++i; } return true; } @@ -80,71 +84,23 @@ expression::make_items(const std::vector& nodes) { if (auto tn = std::dynamic_pointer_cast(nodes[i])) { if (tn->value.kind == token_kind::special_character || tn->value.kind == token_kind::separator) { - token tok; + static constexpr std::array multi_ops { + "<<=", ">>=", "++", "--", "+=", "-=", "*=", + "/=", "%=", "^=", "|=", "&=", "==", "!=", + "<=", ">=", "<<", ">>", "&&", "||" + }; + + std::string_view op = tn->value.word; size_t len = 1; - std::string op = tn->value.word; - if (match_op(nodes, i, "<<=")) { - op = "<<="; - len = 3; - } else if (match_op(nodes, i, ">>=")) { - op = ">>="; - len = 3; - } else if (match_op(nodes, i, "++")) { - op = "++"; - len = 2; - } else if (match_op(nodes, i, "--")) { - op = "--"; - len = 2; - } else if (match_op(nodes, i, "+=")) { - op = "+="; - len = 2; - } else if (match_op(nodes, i, "-=")) { - op = "-="; - len = 2; - } else if (match_op(nodes, i, "*=")) { - op = "*="; - len = 2; - } else if (match_op(nodes, i, "/=")) { - op = "/="; - len = 2; - } else if (match_op(nodes, i, "%=")) { - op = "%="; - len = 2; - } else if (match_op(nodes, i, "^=")) { - op = "^="; - len = 2; - } else if (match_op(nodes, i, "|=")) { - op = "|="; - len = 2; - } else if (match_op(nodes, i, "&=")) { - op = "&="; - len = 2; - } else if (match_op(nodes, i, "==")) { - op = "=="; - len = 2; - } else if (match_op(nodes, i, "!=")) { - op = "!="; - len = 2; - } else if (match_op(nodes, i, "<=")) { - op = "<="; - len = 2; - } else if (match_op(nodes, i, ">=")) { - op = ">="; - len = 2; - } else if (match_op(nodes, i, "<<")) { - op = "<<"; - len = 2; - } else if (match_op(nodes, i, ">>")) { - op = ">>"; - len = 2; - } else if (match_op(nodes, i, "&&")) { - op = "&&"; - len = 2; - } else if (match_op(nodes, i, "||")) { - op = "||"; - len = 2; + for (auto candidate : multi_ops) { + if (match_op(nodes, i, candidate)) { + op = candidate; + len = candidate.size(); + break; + } } - tok = make_token(*tn, op); + + token tok = make_token(*tn, op); res.push_back({ true, tok, {} }); i += len; continue; @@ -175,7 +131,7 @@ ast_node_ptr expression::parse_expression( auto middle = parse_expression(items, idx, 0); if (idx >= items.size() || !items[idx].is_op || items[idx].tok.word != ":") { - throw std::runtime_error("expected ':' in ternary expression"); + throw make_error("expected ':' in ternary expression", items); } token ctok = items[idx].tok; ++idx; @@ -214,7 +170,7 @@ ast_node_ptr expression::parse_prefix(std::vector& items, size_t& idx) { } } if (idx >= items.size()) { - throw std::runtime_error("unexpected end"); + throw make_error("unexpected end", items); } auto node = items[idx].node; ++idx; @@ -230,3 +186,28 @@ ast_node_ptr expression::parse_prefix(std::vector& items, size_t& idx) { } return node; } + +std::runtime_error expression::make_error( + const std::string& message, const std::vector& expression, + const std::source_location& location +) { + std::ostringstream oss; + oss << "[Expression-Error] " << message << ". "; + if (!expression.empty()) { + oss << "while parsing expression: "; + for (const auto& it : expression) { + if (it.is_op) { + oss << it.tok.word << ' '; + } else if (auto tn + = std::dynamic_pointer_cast(it.node)) { + oss << tn->value.word << ' '; + } else { + oss << " "; + } + } + oss << '\n'; + } + oss << "in file: " << location.file_name() << '(' << location.line() << ':' + << location.column() << ") `" << location.function_name() << "`"; + return std::runtime_error(oss.str()); +} diff --git a/src/grouper.cpp b/src/grouper.cpp index c9cf0bb..4e547e8 100644 --- a/src/grouper.cpp +++ b/src/grouper.cpp @@ -25,6 +25,7 @@ #include "grouper.hpp" #include "expression.hpp" +#include grouper::grouper(reader& r, const size_t limit) : src(r) @@ -50,7 +51,6 @@ group_ptr grouper::parse(const group_kind kind) { result->kind = kind; parse_group(kind, group); identify(group, result); - parse_arithmetic(result); return result; } @@ -105,6 +105,9 @@ void grouper::peek() { } group_ptr grouper::identify_subgroup(const group_ptr& group) const { + if (std::dynamic_pointer_cast(group)) { + return group; + } group_ptr inode; const auto kind = group->kind; if (kind == group_kind::body || kind == group_kind::list @@ -116,59 +119,83 @@ group_ptr grouper::identify_subgroup(const group_ptr& group) const { inode->limit = limit; inode->kind = kind; identify(group, inode); - parse_arithmetic(inode); return inode; } +bool grouper::is_secondary_keyword(const std::string& kw) { + return kw == "else" || kw == "elif" || kw == "catch" || kw == "finally"; +} + +std::string grouper::keyword_from_node(const ast_node_ptr& node) { + if (const auto ctrl = std::dynamic_pointer_cast(node)) { + return ctrl->value.word; + } + if (const auto cond = std::dynamic_pointer_cast(node)) { + return cond->value.word; + } + return {}; +} + +group_ptr grouper::fetch_previous_command( + const group_ptr& result, const std::string& kw, const group_ptr& inode +) const { + if (result->empty()) { + throw make_error("orphan secondary keyword: " + kw, inode); + } + const auto prev + = std::dynamic_pointer_cast(result->nodes.back()); + if (!prev || prev->nodes.empty() || prev->kind != group_kind::command) { + throw make_error("invalid predecessor for keyword: " + kw, inode); + } + return prev; +} + +std::string grouper::fetch_previous_keyword( + const group_ptr& prev, const std::string& kw, const group_ptr& inode +) const { + const auto last = prev->nodes.back(); + if (const auto ctrl = std::dynamic_pointer_cast(last)) { + return ctrl->value.word; + } + if (const auto cond = std::dynamic_pointer_cast(last)) { + return cond->value.word; + } + throw make_error("invalid predecessor for keyword: " + kw, inode); +} + +void grouper::validate_chain( + const std::string& prev_kw, const std::string& kw, const group_ptr& inode +) const { + bool allowed = false; + if (kw == "else" || kw == "elif") { + allowed = (prev_kw == "if" || prev_kw == "elif"); + } else if (kw == "catch" || kw == "finally") { + allowed = (prev_kw == "try" || prev_kw == "catch"); + } + if (!allowed) { + throw make_error( + "unexpected keyword order: " + prev_kw + " before " + kw, inode + ); + } +} + bool grouper::handle_chain( const group_ptr& result, const group_ptr& inode ) const { const auto first = inode->nodes.front(); - std::string kw; - if (const auto ctrl = std::dynamic_pointer_cast(first)) { - kw = ctrl->value.word; - } else if (const auto cond - = std::dynamic_pointer_cast(first)) { - kw = cond->value.word; + const auto kw = keyword_from_node(first); + if (!is_secondary_keyword(kw)) { + return false; } - if (kw == "else" || kw == "elif" || kw == "catch" || kw == "finally") { - if (result->empty()) { - throw make_error("orphan secondary keyword: " + kw, inode); - } - const auto prev - = std::dynamic_pointer_cast(result->nodes.back()); - if (!prev || prev->nodes.empty() || prev->kind != group_kind::command) { - throw make_error("invalid predecessor for keyword: " + kw, inode); - } - const auto last = prev->nodes.back(); - std::string prev_kw; - if (const auto ctrl = std::dynamic_pointer_cast(last)) { - prev_kw = ctrl->value.word; - } else if (const auto cond - = std::dynamic_pointer_cast(last)) { - prev_kw = cond->value.word; - } else { - throw make_error("invalid predecessor for keyword: " + kw, inode); - } - bool allowed = false; - if (kw == "else" || kw == "elif") { - allowed = (prev_kw == "if" || prev_kw == "elif"); - } else if (kw == "catch" || kw == "finally") { - allowed = (prev_kw == "try" || prev_kw == "catch"); - } - if (!allowed) { - throw make_error( - "unexpected keyword order: " + prev_kw + " before " + kw, inode - ); - } - result->pop_back(); - for (auto& ch : inode->nodes) { - append(prev, ch); - } - append(result, prev); - return true; + const auto prev = fetch_previous_command(result, kw, inode); + const auto prev_kw = fetch_previous_keyword(prev, kw, inode); + validate_chain(prev_kw, kw, inode); + result->pop_back(); + for (auto& ch : inode->nodes) { + append(prev, ch); } - return false; + append(result, prev); + return true; } bool grouper::append_group( @@ -193,12 +220,19 @@ bool grouper::append_group( append(result, ctrl); return true; } - if (const auto callexp = std::dynamic_pointer_cast(top); - callexp && kind == group_kind::body) { - const auto fundecl = std::make_shared(callexp); - fundecl->set_body(node); - append(result, fundecl); - return true; + if (const auto callexp = std::dynamic_pointer_cast(top)) { + if (kind == group_kind::body) { + const auto fundecl = std::make_shared(callexp); + fundecl->set_body(node); + append(result, fundecl); + return true; + } + if (kind == group_kind::paren) { + const auto icall = std::make_shared(top); + icall->set_paren(node); + append(result, icall); + return true; + } } const auto tok = std::dynamic_pointer_cast(top); if (tok && tok->value.kind == token_kind::keyword @@ -208,40 +242,23 @@ bool grouper::append_group( append(result, callexp); return true; } + if (kind == group_kind::paren && !tok) { + const auto icall = std::make_shared(top); + icall->set_paren(node); + append(result, icall); + return true; + } append(result, top); } return false; } -void grouper::identify_body(const group_ptr& group) const { - const auto body = std::make_shared(); - body->limit = limit; - while (!group->empty()) { - auto top = group->nodes.back(); - group->pop_back(); - if (auto tok = std::dynamic_pointer_cast(top)) { - if (const auto ctrl - = std::dynamic_pointer_cast(tok)) { - ctrl->set_body(body); - append(group, ctrl); - break; - } - if (auto callexp = std::dynamic_pointer_cast(tok)) { - const auto fundecl = std::make_shared(callexp); - fundecl->set_body(body); - append(group, fundecl); - break; - } - } - append(body, top); - } -} - void grouper::identify(const group_ptr& group, const group_ptr& result) const { bool wait_for_condition = false; bool wait_for_body = false; - for (auto& node : group->nodes) { + for (size_t i = 0; i < group->nodes.size(); ++i) { + auto node = group->nodes[i]; bool is_group = false; group_kind kind {}; @@ -268,6 +285,31 @@ void grouper::identify(const group_ptr& group, const group_ptr& result) const { continue; } } + if (wait_for_body && !is_group) { + const auto tail = std::make_shared(); + tail->limit = limit; + for (; i < group->nodes.size(); ++i) { + append(tail, group->nodes[i]); + } + const auto body = std::make_shared(); + body->limit = limit; + identify(tail, body); + + const auto top = result->nodes.back(); + result->pop_back(); + if (const auto ctrl + = std::dynamic_pointer_cast(top)) { + ctrl->set_body(body); + append(result, ctrl); + } else if (auto callexp + = std::dynamic_pointer_cast(top)) { + const auto fundecl = std::make_shared(callexp); + fundecl->set_body(body); + append(result, fundecl); + } + wait_for_body = false; + continue; + } if (const auto tok = std::dynamic_pointer_cast(node)) { if (tok->value.kind == token_kind::keyword) { const auto& w = tok->value.word; @@ -295,8 +337,10 @@ void grouper::identify(const group_ptr& group, const group_ptr& result) const { } append(result, node); } - if (wait_for_body) { - identify_body(result); + try { + parse_arithmetic(result); + } catch (const std::runtime_error& e) { + throw make_error(e.what(), result); } } @@ -461,7 +505,7 @@ void grouper::parse_arithmetic(const group_ptr& group) const { } auto items = expression::make_items(group->nodes); size_t idx = 0; - auto expr = expression::parse_expression(items, idx, 0); + const auto expr = expression::parse_expression(items, idx, 0); if (idx == items.size()) { group->nodes.clear(); group->weights = {}; diff --git a/src/reader.cpp b/src/reader.cpp index 514e699..c3cc38c 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -105,7 +105,7 @@ void reader::reload_buffer() { } file_offset = ifs.tellg(); buffer.resize(static_cast(max_buffer_size)); - ifs.read(&buffer[0], max_buffer_size); + ifs.read(buffer.data(), max_buffer_size); const auto got = ifs.gcount(); buffer.resize(static_cast(got)); buffer_position = 0; @@ -246,9 +246,7 @@ void reader::read_string(std::string& into) { advance_char(); } -token_kind reader::read_number(std::string& into) { - into.clear(); - bool is_float = false; +void reader::read_integer_part(std::string& into) { if (is_valid() && peek_char() == '0') { into += get_char(); if (is_valid() && std::isdigit(peek_uchar())) { @@ -261,9 +259,10 @@ token_kind reader::read_number(std::string& into) { } else { throw make_error("expected digit"); } +} +bool reader::read_fraction_part(std::string& into) { if (is_valid() && peek_char() == '.') { - is_float = true; into += get_char(); if (!is_valid() || !std::isdigit(peek_uchar())) { throw make_error("digit expected after decimal"); @@ -271,10 +270,13 @@ token_kind reader::read_number(std::string& into) { while (is_valid() && std::isdigit(peek_uchar())) { into += get_char(); } + return true; } + return false; +} +bool reader::read_exponent_part(std::string& into) { if (is_valid() && (peek_char() == 'e' || peek_char() == 'E')) { - is_float = true; into += get_char(); if (is_valid() && (peek_char() == '+' || peek_char() == '-')) { into += get_char(); @@ -285,6 +287,17 @@ token_kind reader::read_number(std::string& into) { while (is_valid() && std::isdigit(peek_uchar())) { into += get_char(); } + return true; + } + return false; +} + +token_kind reader::read_number(std::string& into) { + into.clear(); + read_integer_part(into); + bool is_float = read_fraction_part(into); + if (read_exponent_part(into)) { + is_float = true; } return is_float ? token_kind::floating : token_kind::integer; } diff --git a/tests/arithmetic_tests.cpp b/tests/arithmetic_tests.cpp index 3b9c879..a387e03 100644 --- a/tests/arithmetic_tests.cpp +++ b/tests/arithmetic_tests.cpp @@ -22,16 +22,20 @@ * SOFTWARE. */ +#include "expression.hpp" #include "grouper.hpp" + #include +#include + TEST(ArithmeticTest, ParseBinary) { - std::string input = "a+b;"; + std::string input = "a+b"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* bin = dynamic_cast(cmd->nodes[0].get()); @@ -40,12 +44,12 @@ TEST(ArithmeticTest, ParseBinary) { } TEST(ArithmeticTest, ParsePrefixUnary) { - std::string input = "+a;"; + std::string input = "+a"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* un = dynamic_cast(cmd->nodes[0].get()); @@ -54,12 +58,12 @@ TEST(ArithmeticTest, ParsePrefixUnary) { } TEST(ArithmeticTest, ParsePostfixUnary) { - std::string input = "a++;"; + std::string input = "a++"; reader r { input }; grouper g { r }; - auto res = g.parse(); + const auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* un = dynamic_cast(cmd->nodes[0].get()); @@ -68,12 +72,12 @@ TEST(ArithmeticTest, ParsePostfixUnary) { } TEST(ArithmeticTest, ParseNestedGroups) { - std::string input = "++(a--);"; + std::string input = "++(a--)"; reader r { input }; grouper g { r }; auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* pre = dynamic_cast(cmd->nodes[0].get()); @@ -88,4 +92,43 @@ TEST(ArithmeticTest, ParseNestedGroups) { auto* post = dynamic_cast(inner->nodes[0].get()); ASSERT_NE(post, nullptr); EXPECT_FALSE(post->is_prefix); -} \ No newline at end of file +} + +TEST(ExpressionTest, TernaryBranches) { + std::vector nodes; + auto make_tok = [&](std::string w, const token_kind k) { + auto t = std::make_shared(); + t->value.word = std::move(w); + t->value.kind = k; + return t; + }; + nodes.push_back(make_tok("a", token_kind::keyword)); + nodes.push_back(make_tok("?", token_kind::special_character)); + nodes.push_back(make_tok("b", token_kind::keyword)); + nodes.push_back(make_tok(":", token_kind::separator)); + nodes.push_back(make_tok("c", token_kind::keyword)); + auto items = expression::make_items(nodes); + size_t idx = 0; + auto n = expression::parse_expression(items, idx, 0); + ASSERT_TRUE(std::dynamic_pointer_cast(n)); + EXPECT_EQ(idx, items.size()); + + idx = 0; + n = expression::parse_expression(items, idx, 3); + const auto tok = std::dynamic_pointer_cast(n); + ASSERT_TRUE(tok); + EXPECT_EQ(tok->value.word, "a"); + EXPECT_EQ(idx, 1u); + + items.pop_back(); + idx = 0; + EXPECT_THROW( + expression::parse_expression(items, idx, 0), std::runtime_error + ); +} + +TEST(ExpressionTest, ParsePrefixUnexpectedEnd) { + std::vector items; + size_t idx = 0; + EXPECT_THROW(expression::parse_prefix(items, idx), std::runtime_error); +} diff --git a/tests/ast_tests.cpp b/tests/ast_tests.cpp index dc81c43..049520f 100644 --- a/tests/ast_tests.cpp +++ b/tests/ast_tests.cpp @@ -26,14 +26,15 @@ #include TEST(AstDump, ExamplePartAST) { - for (int i = 0; i < 12; ++i) { + for (int i = 0; i < 13; ++i) { try { std::stringstream idx; idx << std::setfill('0') << std::setw(2) << i; std::ostringstream path_in; path_in << "test_data/test" << idx.str() << ".qc"; reader r(path_in.str()); - grouper g { r, 512 }; + size_t extra = (i == 12 ? 2 : 1); + grouper g { r, 64 * extra }; auto res = g.parse(); std::ostringstream path_out; path_out << "test_data/test" << idx.str() << ".dump"; @@ -47,14 +48,15 @@ TEST(AstDump, ExamplePartAST) { } TEST(AstDump, ExampleFullAST) { - for (int i = 0; i < 12; ++i) { + for (int i = 0; i < 13; ++i) { try { std::stringstream idx; idx << std::setfill('0') << std::setw(2) << i; std::ostringstream path_in; path_in << "test_data/test" << idx.str() << ".qc"; reader r(path_in.str()); - grouper g { r, 512 }; + size_t extra = (i == 12 ? 2 : 1); + grouper g { r, 64 * extra }; auto res = g.parse(); std::ostringstream path_out; path_out << "test_data/test" << idx.str() << ".full-dump"; diff --git a/tests/grouper_tests.cpp b/tests/grouper_tests.cpp index 763fe81..9e9ffbf 100644 --- a/tests/grouper_tests.cpp +++ b/tests/grouper_tests.cpp @@ -22,9 +22,10 @@ * SOFTWARE. */ -#include "grouper.hpp" #include +#include "grouper.hpp" + TEST(GrouperTest, ParsesSimpleBody) { std::string input = "{a;b}"; reader r { input }; @@ -116,3 +117,55 @@ TEST(GrouperTest, LimitTooSmallThrows) { } } } + +TEST(GrouperChainTest, ErrorScenarios) { + struct Case { + std::string input; + std::string msg; + } cases[] = { + { "else a", "orphan secondary keyword" }, + { "a,else b", "invalid predecessor for keyword" }, + { "a;else b", "invalid predecessor for keyword" }, + { "try{b};else{c}", "unexpected keyword order" }, + }; + + for (const auto& c : cases) { + reader r { const_cast(c.input) }; + grouper g { r }; + try { + g.parse(); + FAIL() << "no exception"; + } catch (const std::runtime_error& e) { + EXPECT_NE(std::string(e.what()).find(c.msg), std::string::npos) + << c.input; + } + } +} + +TEST(GrouperIdentifyTest, MissingCondition) { + std::string input = "if{a}"; + reader r { input }; + grouper g { r }; + EXPECT_THROW({ g.parse(); }, std::runtime_error); +} + +TEST(GrouperPlaceholderTest, PreservesPlaceholders) { + std::string input = "{[a,b,c,d],[e,f,g,h],[i,j,k,l]}"; + reader r { input }; + grouper g { r, 4 }; + + auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + auto* body = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(body, nullptr); + + bool has_placeholder = false; + for (const auto& ch : body->nodes) { + if (std::dynamic_pointer_cast(ch)) { + has_placeholder = true; + break; + } + } + EXPECT_TRUE(has_placeholder); +} \ No newline at end of file diff --git a/tests/identify_tests.cpp b/tests/identify_tests.cpp index 22656fe..26d9ad2 100644 --- a/tests/identify_tests.cpp +++ b/tests/identify_tests.cpp @@ -110,6 +110,26 @@ TEST(IdentifierTest, IdentifyCallExpression) { EXPECT_TRUE(call->has_paren); } +TEST(IdentifierTest, IdentifyImplicitCallExpression) { + std::string input = "main(a)(b)"; + reader r { input }; + grouper g { r }; + + auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* icall = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(icall, nullptr); + ASSERT_TRUE(icall->has_paren); + + auto* inner = dynamic_cast(icall->callee.get()); + ASSERT_NE(inner, nullptr); + EXPECT_EQ(inner->value.word, "main"); + EXPECT_TRUE(inner->has_paren); +} + TEST(IdentifierTest, IdentifyFunctionDecl) { std::string input = "main(a){b}"; reader r { input }; @@ -142,3 +162,173 @@ TEST(IdentifierTest, IdentifyReturnStatement) { EXPECT_EQ(jmp->value.word, "return"); EXPECT_TRUE(jmp->has_body); } + +TEST(IdentifierTest, ParseGotoExpression) { + std::string input = "return a+b"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + auto* bin = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); +} + +TEST(IdentifierTest, ParseComplexReturnExpression) { + std::string input = "return ((x * x + y * y + ((x << y) - (y << x))) ^ (y " + "| x)) + ((x % 7) * (y % 5));"; + reader r { input }; + grouper g { r }; + EXPECT_NO_THROW(g.parse()); +} + +TEST(IdentifierTest, ParseElseExpression) { + std::string input = "if(a)b;else c+d"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_GE(halt->size(), 2u); + auto* els = dynamic_cast(halt->nodes.back().get()); + ASSERT_NE(els, nullptr); + ASSERT_TRUE(els->has_body); + auto* body = dynamic_cast(els->body.get()); + ASSERT_NE(body, nullptr); + auto* bin = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); +} + +TEST(IdentifierTest, ParseReturnIfElse) { + std::string input = "return if(a){b}else c"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + ASSERT_EQ(body->size(), 2u); + + auto* cond = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(cond, nullptr); + EXPECT_EQ(cond->value.word, "if"); + EXPECT_TRUE(cond->has_paren); + EXPECT_TRUE(cond->has_body); + + auto* els = dynamic_cast(body->nodes[1].get()); + ASSERT_NE(els, nullptr); + EXPECT_EQ(els->value.word, "else"); + EXPECT_TRUE(els->has_body); +} + +TEST(IdentifierTest, ParseReturnIfElseExpression) { + std::string input = "return if(a){b+c}else c(d)e"; + reader r { input }; + grouper g { r }; + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + ASSERT_TRUE(jmp->has_body); + + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + ASSERT_EQ(body->size(), 2u); + + auto* cond = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(cond, nullptr); + auto* cbody = dynamic_cast(cond->body.get()); + ASSERT_NE(cbody, nullptr); + auto* ccmd = dynamic_cast(cbody->nodes[0].get()); + ASSERT_NE(ccmd, nullptr); + auto* bin = dynamic_cast(ccmd->nodes[0].get()); + ASSERT_NE(bin, nullptr); + EXPECT_EQ(bin->op.word, "+"); + + auto* els = dynamic_cast(body->nodes[1].get()); + ASSERT_NE(els, nullptr); + auto* ebody = dynamic_cast(els->body.get()); + ASSERT_NE(ebody, nullptr); + if (auto* icall = dynamic_cast(ebody->nodes[0].get())) { + ASSERT_TRUE(icall->has_paren); + auto* inner = dynamic_cast(icall->callee.get()); + ASSERT_NE(inner, nullptr); + EXPECT_EQ(inner->value.word, "c"); + EXPECT_TRUE(inner->has_paren); + } else { + ASSERT_GE(ebody->size(), 2u); + auto* call = dynamic_cast(ebody->nodes[0].get()); + ASSERT_NE(call, nullptr); + EXPECT_EQ(call->value.word, "c"); + EXPECT_TRUE(call->has_paren); + auto* tail = dynamic_cast(ebody->nodes[1].get()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->value.word, "e"); + } +} + +TEST(IdentifierTest, IdentifyLoops) { + for (auto [input, kw] : + { std::pair { "while(a){b}", "while" }, + { "for(a;b;c){d}", "for" } }) { + reader r { input }; + grouper g { r }; + + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* loop = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(loop, nullptr); + EXPECT_EQ(loop->value.word, kw); + EXPECT_TRUE(loop->is_loop); + EXPECT_TRUE(loop->has_paren); + EXPECT_TRUE(loop->has_body); + } +} + +TEST(IdentifierTest, IdentifyJumpStatements) { + for (auto [input, kw, has_body] : + { std::tuple { "break", "break", + false }, + { "continue", "continue", false }, + { "goto a", "goto", true }, + { "return b", "return", true } }) { + reader r { input }; + grouper g { r }; + + const auto res = g.parse(); + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + ASSERT_EQ(halt->size(), 1u); + + auto* jmp = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(jmp, nullptr); + EXPECT_EQ(jmp->value.word, kw); + EXPECT_EQ(jmp->has_body, has_body); + if (has_body) { + auto* body = dynamic_cast(jmp->body.get()); + ASSERT_NE(body, nullptr); + auto* tok = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(tok, nullptr); + EXPECT_EQ(tok->value.word, kw == "goto" ? "a" : "b"); + } + } +} \ No newline at end of file diff --git a/tests/reader_tests.cpp b/tests/reader_tests.cpp index 0c8919e..d8f873b 100644 --- a/tests/reader_tests.cpp +++ b/tests/reader_tests.cpp @@ -180,3 +180,48 @@ TEST(ReaderTest, CommentToken) { r.next_token(t); EXPECT_EQ(t.kind, token_kind::eof); } + +TEST(ReaderTest, FileOpenFailure) { + EXPECT_THROW(reader r { "nonexistent_file.qc" }, std::invalid_argument); +} + +TEST(ReaderTest, TokenDump) { + token t; + t.kind = token_kind::integer; + t.pos.line = 1; + t.pos.column = 2; + t.word = "42"; + std::ostringstream oss1; + std::ostringstream oss2; + t.dump(oss1); + t.dump(oss2, "", true); + EXPECT_EQ(oss1.str(), oss2.str()); +} + +TEST(ReaderTest, MissingClosingComment) { + std::string str = "/* unclosed"; + reader r { str }; + token t; + EXPECT_THROW(r.next_token(t), std::runtime_error); +} + +TEST(ReaderTest, InvalidUnicodeEscape) { + std::string str = "\"\\u00g0\""; + reader r { str }; + token t; + EXPECT_THROW(r.next_token(t), std::runtime_error); +} + +TEST(ReaderTest, InvalidEscapeSequence) { + std::string str = "\"\\q\""; + reader r { str }; + token t; + EXPECT_THROW(r.next_token(t), std::runtime_error); +} + +TEST(ReaderTest, MissingClosingQuote) { + std::string str = "\"no end"; + reader r { str }; + token t; + EXPECT_THROW(r.next_token(t), std::runtime_error); +} From 5a6d8388ff840cbd0a5621f4e686fd4fe8e10425 Mon Sep 17 00:00:00 2001 From: Yaroslav Riabtsev Date: Sat, 23 Aug 2025 19:09:07 +0200 Subject: [PATCH 2/4] readme split --- .github/workflows/html.yml | 13 ++- .github/workflows/tests.yml | 4 + data/readme.md | 161 ++++++++++++++++++++++++++++++++++++ readme.md | 81 +++--------------- 4 files changed, 188 insertions(+), 71 deletions(-) create mode 100644 data/readme.md diff --git a/.github/workflows/html.yml b/.github/workflows/html.yml index 1932420..b775a82 100644 --- a/.github/workflows/html.yml +++ b/.github/workflows/html.yml @@ -1,9 +1,14 @@ -name: Docs & Coverage +name: Deploy on: push: branches: - master + paths: + - 'src/**/*.cpp' + - 'include/**/*.hpp' + - 'tests/**/*.cpp' + - 'readme.md' workflow_dispatch: @@ -48,6 +53,12 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} verbose: true + - name: Upload coverage to Codacy + uses: codacy/codacy-coverage-reporter-action@a38818475bb21847788496e9f0fddaa4e84955ba + with: + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + coverage-reports: build/coverage.info + - name: Clean build run: rm -rf build/ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f0d121d..35c2113 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,6 +4,10 @@ on: pull_request: branches: - master + paths: + - 'src/**/*.cpp' + - 'include/**/*.hpp' + - 'tests/**/*.cpp' jobs: build-and-test: diff --git a/data/readme.md b/data/readme.md new file mode 100644 index 0000000..4dfe8d3 --- /dev/null +++ b/data/readme.md @@ -0,0 +1,161 @@ +# QuasiLang Syntax Guide + +QuasiLang is the source language parsed by **QuasiPiler**. +Sample programs: `test00.qc`–`test12.qc`. + +--- + +## Basics + +* **Comments** + * Line: `// comment` + * Block: `/* comment */` — may be inline or multi-line. +* **Whitespace**: ignored except as a separator. +* **Identifiers**: letters, digits, `_` (cannot start with a digit). +* **Literals** + * Numbers: integers, floats, exponents. + * Strings: `'...'` or `"..."` with escapes (`\n`, `\t`, `\\`). + * Booleans: `true`, `false` + * Null: `null` + +--- + +## Expressions + +* **Arithmetic & Assignment**: `+ - * / %` with `= += -= *= /= %=` +* **Comparison & Logic**: `== != < <= > >=` and `&& || !` +* **Bitwise & Shift**: `& | ^ << >>` with `&= |= ^= <<= >>=` +* **Increment/Decrement**: prefix and postfix `++ --` +* **Ternary**: `cond ? a : b` +* **Member access**: `obj.key`, `obj["key"]` +* **Indexing & slicing**: + * Single index: `arr[i]` + * Slices: `arr[start:end:step]` + * Any part may be omitted: `arr[:end]`, `arr[start:]`, `arr[::step]`, `arr[::]` +* **Function calls**: `f(arg1, arg2)` +* **Function declaration**: `fu(x,y){ return x+y; }` + +--- + +## Data Structures + +* **Lists**: `[1, 2, 3]` +* **Objects**: `{ "key": value, "other": 42 }` + *Keys must be constant strings.* +* **Tuples `()`** + + * Foreach bundles (abstract streams) + * Not concrete lists unless materialized with `list(...)` + +### Tuple-based foreach + +Tuples drive vectorized foreach expansion when applied after an expression: + +1. **Member/Index tuple** + + ```qc + obj.(a, b, c) // (obj.a, obj.b, obj.c) + arr[(i, j, k)] // (arr[i], arr[j], arr[k]) + ``` + +2. **Operators with tuples** + + ```qc + 5 + (1,2,3) // (6, 7, 8) + list(5 + (1,2,3)) // [6, 7, 8] + ``` + +3. **Chaining** + + ```qc + obj.(a,b).(x,y) // (obj.a.x, obj.a.y, obj.b.x, obj.b.y) + arr[(1,4)].(id,total) // (arr[1].id, arr[1].total, arr[4].id, arr[4].total) + ``` + +4. **Slices with tuples** + + ```qc + arr[1:6:2][(2,3)] // (arr[1][2], arr[1][3], arr[3][2], arr[3][3], arr[5][2], arr[5][3]) + list(arr[1:6:2])[2] // arr[5] + ``` + +--- + +## Statements & Declarations + +* **Variables**: `name = expression;` +* **Functions**: `name(p1,p2){ ... }` or `name = fu(p1){ ... };` +* **Conditionals**: + + ```qc + if (cond) { ... } + elif (other) { ... } + else { ... } + ``` + + Bodies may omit braces for a single statement: + + ```qc + if (cond) do_something(); + while (ok) step(); + ``` + + Return with conditional: + + ```qc + return if(a){b} else c; // valid + return if(a) b; else c; // invalid due to precedence + ``` +* **Loops**: `while(cond){...}`, `for(init; cond; step){...}` + Both also allow single-statement bodies without `{}`. +* **Exceptions**: + + ```qc + try { ... } catch (err) { ... } finally { ... } + ``` +* **Jumps**: `break;`, `continue;`, `return expr;`, `goto label;` +* **Labels**: `label_name:` + +--- + +## Examples + +**Foreach with tuples** + +```qc +user = { "id": 7, "name": "Ada", "meta": { "city":"Paris", "tz":"CET" } }; + +user.(id, name) // 7, "Ada" +user.(meta).("city","tz") // "Paris", "CET" +orders[(0,2)].(id,total) // orders[0].id, orders[0].total, orders[2].id, orders[2].total +``` + +**Slices and foreach** + +```qc +matrix = [row0,row1,row2,row3,row4,row5]; +matrix[1:6:2][(2,3)]; // row1[2], row1[3], row3[2], row3[3], row5[2], row5[3] +list(matrix[1:6:2])[2]; // row5 +``` + +**Arithmetic with tuples** + +```qc +print(5 + (1,2,3)); // 6,7,8 +print(list(5 + (1,2,3))); // [6,7,8] +``` + +**Control flow** + +```qc +if (ready) start(); +while (i < n) i++; +return if(ok){result} else null; +``` + +**Chained functions** + +```qc +adder = fu(x){ return fu(y){ return fu(z){ return x+y+z; }; }; }; +sum = adder(1)(2)(3); +``` diff --git a/readme.md b/readme.md index 08a9c57..fca23f4 100644 --- a/readme.md +++ b/readme.md @@ -2,11 +2,12 @@ ## _— the Hunchback Dragon of Compilers_ -[//]: # ([![version](https://img.shields.io/github/v/release/YaRiabtsev/QuasiPiler?include_prereleases)](https://github.com/YaRiabtsev/QuasiPiler/releases/latest)) -[//]: # ([![Checks](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml)) -[//]: # ([![Docs & Coverage](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml)) -[![codecov](https://codecov.io/gh/YaRiabtsev/QuasiPiler/graph/badge.svg?token=MCNEJFWMDU)](https://codecov.io/gh/YaRiabtsev/QuasiPiler) -[![license](https://img.shields.io/github/license/YaRiabtsev/QuasiPiler?color=e6e6e6)](https://github.com/YaRiabtsev/QuasiPiler/blob/master/license) +[![version](https://img.shields.io/github/v/release/ninjaro/QuasiPiler?include_prereleases)](https://github.com/ninjaro/QuasiPiler/releases/latest) +[![Checks](https://github.com/ninjaro/QuasiPiler/actions/workflows/tests.yml/badge.svg)](https://github.com/ninjaro/QuasiPiler/actions/workflows/tests.yml) +[![Deploy](https://github.com/ninjaro/QuasiPiler/actions/workflows/html.yml/badge.svg)](https://github.com/ninjaro/QuasiPiler/actions/workflows/html.yml) +[![codecov](https://codecov.io/gh/ninjaro/QuasiPiler/graph/badge.svg?token=MCNEJFWMDU)](https://codecov.io/gh/ninjaro/QuasiPiler) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/940dcf5e3cf64e759ce6ad17176d31f4)](https://app.codacy.com/gh/ninjaro/QuasiPiler/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) +[![license](https://img.shields.io/github/license/ninjaro/QuasiPiler?color=e6e6e6)](https://github.com/ninjaro/QuasiPiler/blob/master/license) > “A one-eyed transpiler is much more incomplete than a blind transpiler, for he knows what it is that’s lacking.” > — Victor-Marie of Gugle Inc. (1998–2017) @@ -37,72 +38,12 @@ This repo is my sanctuary under license — it begs mercy, not stars. I’ll bel ``` * ``: path to your QuasiCode file -## QuasiLang Syntax +## QuasiLang -### Basics +**See [QuasiLang Syntax Guide](data/readme.md) for the full syntax guide.** -- **Comments** - - Line comments begin with `//`. - - Block comments are enclosed in `/*` and `*/`. -- **Whitespace** is ignored except as a separator. -- **Identifiers** use letters, digits and underscores and may not start with a digit. -- **Literals** - - Numbers support integer and floating point forms (with optional exponent). - - Strings can use either single `'` or double `"` quotes and support common escape sequences. +**See [include](include/) and [src](src/) for implementation, and check [tests](tests/) for more examples.** -[//]: # (- **Separators and grouping**) - -[//]: # ( - `,` comma, `;` semicolon and `:` colon act as separators.) - -[//]: # ( - `()` parentheses, `[]` brackets and `{}` braces form groups.) - -[//]: # ( - Nested groups are used for lists, code blocks and expressions.) - -[//]: # () -[//]: # (### Expressions) - -[//]: # () -[//]: # (- Standard arithmetic and assignment operators are recognized: `+`, `-`, `*`, `/`, `%`, `=` and their compound forms (`+=`, `-=`, `*=`, `/=`, `%=`).) - -[//]: # (- Comparison and logical operators include `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!`.) - -[//]: # (- Bitwise operators: `&`, `|`, `^`, `<<`, `>>` and their compound assignments.) - -[//]: # (- Increment and decrement operators `++` and `--` are supported in prefix and postfix form.) - -[//]: # (- Member access uses `.` and indexing uses `[expr]`. Slice syntax `[start:end:step]` is available.) - -[//]: # (- Function calls use the form `name(arg1, arg2)`.) - -### Statements and Declarations - -[//]: # (- **Variable assignment** follows `name = expression;`.) - -[//]: # (- **Function declarations** use `name(param1, param2) { ... }`.) - -[//]: # (- **Control flow**) - -[//]: # ( - Conditional statements: `if (cond) { ... }`, optional `else` or `elif` blocks.) - -[//]: # ( - Loops: `while (cond) { ... }` and `for(init; cond; step) { ... }`.) - -[//]: # ( - `break`, `continue`, `return` and `goto` appear as standalone keywords and may take an optional expression for `return`.) - -[//]: # ( - `try { ... } catch { ... }` for exception handling.) - -[//]: # (- **Labels** can be defined with `label_name:` and referenced via `goto label_name`.) - -### Data Structures - -[//]: # (- **Lists** use `[item1, item2, ...]`.) - -[//]: # (- **Dictionaries/objects** use `{ "key": value }`.) - -## Examples - -```qc -// todo: Example of a simple QuasiLang program -``` ## Documentation and Contributing @@ -117,8 +58,8 @@ To build and run tests, enable debug mode, or generate coverage reports: $ cmake --build build --target coverage ``` -For detailed documentation, see the [Documentation](https://yariabtsev.github.io/QuasiPiler/doc/) and for the latest -coverage report, see [Coverage](https://yariabtsev.github.io/QuasiPiler/cov/). +For detailed documentation, see the [Documentation](https://ninjaro.github.io/QuasiPiler/doc/) and for the latest +coverage report, see [Coverage](https://ninjaro.github.io/QuasiPiler/cov/). ## Security Policy From 54bf56b51fdf091a24f3f6f608abe49cb90b11c5 Mon Sep 17 00:00:00 2001 From: Yaroslav Riabtsev Date: Sat, 23 Aug 2025 19:31:13 +0200 Subject: [PATCH 3/4] reader simplify --- include/reader.hpp | 1 + src/reader.cpp | 41 ++++++++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/include/reader.hpp b/include/reader.hpp index 8830c5f..e7b2322 100644 --- a/include/reader.hpp +++ b/include/reader.hpp @@ -146,6 +146,7 @@ class reader { */ token_kind read_number(std::string& into); // Helpers for numeric literal parsing + void read_digits(std::string& into); void read_integer_part(std::string& into); bool read_fraction_part(std::string& into); bool read_exponent_part(std::string& into); diff --git a/src/reader.cpp b/src/reader.cpp index c3cc38c..2f68fd4 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -246,6 +246,12 @@ void reader::read_string(std::string& into) { advance_char(); } +void reader::read_digits(std::string& into) { + do { + into += get_char(); + } while (is_valid() && std::isdigit(peek_uchar())); +} + void reader::read_integer_part(std::string& into) { if (is_valid() && peek_char() == '0') { into += get_char(); @@ -253,9 +259,7 @@ void reader::read_integer_part(std::string& into) { throw make_error("leading zeros not allowed"); } } else if (is_valid() && std::isdigit(peek_uchar())) { - do { - into += get_char(); - } while (is_valid() && std::isdigit(peek_uchar())); + read_digits(into); } else { throw make_error("expected digit"); } @@ -267,29 +271,32 @@ bool reader::read_fraction_part(std::string& into) { if (!is_valid() || !std::isdigit(peek_uchar())) { throw make_error("digit expected after decimal"); } - while (is_valid() && std::isdigit(peek_uchar())) { - into += get_char(); - } + read_digits(into); return true; } return false; } bool reader::read_exponent_part(std::string& into) { - if (is_valid() && (peek_char() == 'e' || peek_char() == 'E')) { - into += get_char(); - if (is_valid() && (peek_char() == '+' || peek_char() == '-')) { - into += get_char(); - } - if (!is_valid() || !std::isdigit(peek_uchar())) { - throw make_error("digit expected after exponent"); - } - while (is_valid() && std::isdigit(peek_uchar())) { + if (!is_valid()) { + return false; + } + char c = peek_char(); + if (c != 'e' && c != 'E') { + return false; + } + into += get_char(); + if (is_valid()) { + c = peek_char(); + if (c == '+' || c == '-') { into += get_char(); } - return true; } - return false; + if (!is_valid() || !std::isdigit(peek_uchar())) { + throw make_error("digit expected after exponent"); + } + read_digits(into); + return true; } token_kind reader::read_number(std::string& into) { From 8ccad04a9e05f5fe841850d61758c6bd9e11eeed Mon Sep 17 00:00:00 2001 From: Yaroslav Riabtsev Date: Sat, 23 Aug 2025 19:58:42 +0200 Subject: [PATCH 4/4] reader fix --- include/reader.hpp | 1 + src/reader.cpp | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/reader.hpp b/include/reader.hpp index e7b2322..703d4c8 100644 --- a/include/reader.hpp +++ b/include/reader.hpp @@ -146,6 +146,7 @@ class reader { */ token_kind read_number(std::string& into); // Helpers for numeric literal parsing + bool check_digit() const noexcept; void read_digits(std::string& into); void read_integer_part(std::string& into); bool read_fraction_part(std::string& into); diff --git a/src/reader.cpp b/src/reader.cpp index 2f68fd4..95c0bbb 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -247,30 +247,28 @@ void reader::read_string(std::string& into) { } void reader::read_digits(std::string& into) { + if (!check_digit()) { + throw make_error("digit expected after decimal"); + } do { into += get_char(); - } while (is_valid() && std::isdigit(peek_uchar())); + } while (check_digit()); } void reader::read_integer_part(std::string& into) { if (is_valid() && peek_char() == '0') { into += get_char(); - if (is_valid() && std::isdigit(peek_uchar())) { + if (check_digit()) { throw make_error("leading zeros not allowed"); } - } else if (is_valid() && std::isdigit(peek_uchar())) { - read_digits(into); } else { - throw make_error("expected digit"); + read_digits(into); } } bool reader::read_fraction_part(std::string& into) { if (is_valid() && peek_char() == '.') { into += get_char(); - if (!is_valid() || !std::isdigit(peek_uchar())) { - throw make_error("digit expected after decimal"); - } read_digits(into); return true; } @@ -282,7 +280,7 @@ bool reader::read_exponent_part(std::string& into) { return false; } char c = peek_char(); - if (c != 'e' && c != 'E') { + if (std::tolower(c) != 'e') { return false; } into += get_char(); @@ -292,9 +290,6 @@ bool reader::read_exponent_part(std::string& into) { into += get_char(); } } - if (!is_valid() || !std::isdigit(peek_uchar())) { - throw make_error("digit expected after exponent"); - } read_digits(into); return true; } @@ -309,6 +304,10 @@ token_kind reader::read_number(std::string& into) { return is_float ? token_kind::floating : token_kind::integer; } +bool reader::check_digit() const noexcept { + return is_valid() && std::isdigit(peek_uchar()); +} + void reader::init_token(token& t) const noexcept { t.word.clear(); t.pos = get_position();