diff --git a/.github/workflows/html.yml b/.github/workflows/html.yml index 1932420..b775a82 100644 --- a/.github/workflows/html.yml +++ b/.github/workflows/html.yml @@ -1,9 +1,14 @@ -name: Docs & Coverage +name: Deploy on: push: branches: - master + paths: + - 'src/**/*.cpp' + - 'include/**/*.hpp' + - 'tests/**/*.cpp' + - 'readme.md' workflow_dispatch: @@ -48,6 +53,12 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} verbose: true + - name: Upload coverage to Codacy + uses: codacy/codacy-coverage-reporter-action@a38818475bb21847788496e9f0fddaa4e84955ba + with: + project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} + coverage-reports: build/coverage.info + - name: Clean build run: rm -rf build/ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f0d121d..35c2113 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,6 +4,10 @@ on: pull_request: branches: - master + paths: + - 'src/**/*.cpp' + - 'include/**/*.hpp' + - 'tests/**/*.cpp' jobs: build-and-test: diff --git a/include/ast.hpp b/include/ast.hpp index 959a57b..8fd21cb 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -66,13 +66,30 @@ enum class group_kind { file, body, list, paren, command, item, key, halt }; [[nodiscard]] const char* group_kind_name(group_kind k) noexcept; +/** + * @brief Collection of AST nodes with a configurable size limit. + * + * Large sub-groups may be replaced with placeholder nodes to keep + * @c fixed_size within @c limit. + */ struct group_node : ast_node { - size_t limit; + size_t limit; ///< Maximum allowed node weight group_kind kind { group_kind::halt }; std::vector nodes; - std::priority_queue> - weights; /// node_size -> node_index - + /// queue of heavy child nodes: + std::priority_queue> weights; + + /** + * @brief Append a child node while respecting the size limit. + * + * Nodes contribute their @c fixed_size and @c full_size to the parent. If + * the accumulated @c fixed_size exceeds @c limit, larger child groups are + * replaced with ::placeholder_node instances so the tree can be lazily + * expanded later. + * + * @param node Node to append. + * @param src Reader used to reconstruct squeezed subtrees on demand. + */ void append(ast_node_ptr node, const reader& src); [[nodiscard]] bool empty() const noexcept override; [[nodiscard]] size_t size() const noexcept; @@ -81,7 +98,17 @@ struct group_node : ast_node { void dump( std::ostream& os, const std::string& prefix, bool is_last, bool full ) const override; - const position& get_start() const override; + [[nodiscard]] const position& get_start() const override; + /** + * @brief Replace a child group with a placeholder. + * + * The placeholder stores enough information to re-read the original subtree + * from @p src later. This is used when a group's @c fixed_size would exceed + * the configured limit and thus needs to be collapsed. + * + * @param index Index of the child to replace. + * @param src Reader used to recreate the subtree if needed. + */ void squeeze(size_t index, const reader& src); void pop_back(); }; @@ -95,7 +122,14 @@ struct wrapped_node : group_node { using wrapped_ptr = std::shared_ptr; -struct placeholder_node : wrapped_node { +/** + * @brief Node standing in place of a squeezed sub-tree. + * + * When a group exceeds the configured size limit it can be replaced by a + * placeholder node. The original reader is stored so the subtree can be + * reconstructed on demand. + */ +struct placeholder_node final : wrapped_node { reader* src { nullptr }; void dump( std::ostream& os, const std::string& prefix, bool is_last, bool full diff --git a/include/expression.hpp b/include/expression.hpp index e6800d9..d46f77a 100644 --- a/include/expression.hpp +++ b/include/expression.hpp @@ -31,17 +31,40 @@ class expression { public: + /** + * @brief Element of the input stream for the expression parser. + * @details When @c is_op is set the item represents an operator token; + * otherwise it stores a pointer to an AST node. + */ struct item { bool is_op { false }; token tok; ast_node_ptr node; }; + /** + * @brief Split a raw node list into tokens and operands. + * + * Consecutive operator tokens are combined into multi-character operators + * such as += or ==. + */ static std::vector make_items(const std::vector& nodes); - + /** + * @brief Parse a binary/ternary expression from a token list. + * + * The function implements a Pratt style parser. @p min_prec + * specifies the minimal operator precedence accepted for the + * current recursion level. + * + * @param items Token/operand stream produced by make_items(). + * @param idx Current position within @p items, updated on return. + * @param min_prec Minimal precedence level to parse. + */ static ast_node_ptr parse_expression(std::vector& items, size_t& idx, int min_prec); - + /** + * @brief Parse a prefix expression and any trailing postfix operators. + */ static ast_node_ptr parse_prefix(std::vector& items, size_t& idx); private: diff --git a/include/grouper.hpp b/include/grouper.hpp index f201b3a..64dd756 100644 --- a/include/grouper.hpp +++ b/include/grouper.hpp @@ -27,10 +27,19 @@ #include "ast.hpp" +/** + * @brief Parses tokens into hierarchical groups and expressions. + * + * The grouper is responsible for constructing the AST from a token stream. + * It handles bracket matching, command separation and expression parsing. + */ class grouper { public: explicit grouper(reader& r, size_t limit = 64); - + /** + * @brief Parse a sequence starting at the current reader position. + * @param kind Expected top-level group kind. + */ group_ptr parse(group_kind kind = group_kind::file); private: @@ -43,7 +52,13 @@ class grouper { void peek(); [[nodiscard]] group_ptr identify_subgroup(const group_ptr& group) const; - + /** + * @brief Attach @p inode to the last statement if it is a secondary + * keyword. + * + * Handles constructs like else or catch by merging them + * with the previous command group. + */ [[nodiscard]] bool handle_chain(const group_ptr& result, const group_ptr& inode) const; @@ -55,23 +70,47 @@ class grouper { void identify_body(const group_ptr& group) const; void identify(const group_ptr& group, const group_ptr& result) const; - + /** + * @brief Transform token groups representing arithmetic into AST nodes. + * + * Runs the expression parser over certain group kinds. If the entire group + * forms a valid expression, its children are replaced with the resulting + * expression subtree. + */ void parse_arithmetic(const group_ptr& group) const; - + /** + * @brief Close the current command when a separator is encountered. + */ bool append_command(group_ptr& group, group_ptr& top, group_kind kind) const; - + /** + * @brief Begin parsing of a bracketed sub-group. + * + * Pushes a new wrapped_node onto @p top when an opening bracket is + * encountered. + */ void append_wrapped(const group_ptr& top); - + /** + * @brief Finalize a wrapped sub-group when a closing bracket is seen. + */ void close_wrapped(const group_ptr& group, group_ptr& top, group_kind kind); - + /** + * @brief Parse a sequence of tokens into the supplied group. + * + * This is the core loop that recognises brackets and separators and + * builds the initial hierarchical structure. + */ void parse_group(group_kind kind, group_ptr& group); - + /** + * @brief Safely append a node to its parent group. + */ void append( const group_ptr& parent, const ast_node_ptr& node, const std::source_location& location = std::source_location::current() ) const; - + /** + * @brief Create a formatted runtime error describing a parse failure. + */ [[nodiscard]] std::runtime_error make_error( const std::string& message, const group_ptr& context = {}, const std::source_location& location = std::source_location::current() diff --git a/include/reader.hpp b/include/reader.hpp index 7a6b15d..52d3291 100644 --- a/include/reader.hpp +++ b/include/reader.hpp @@ -29,10 +29,13 @@ #include #include +/** + * @brief Byte and line location within the input stream. + */ struct position { - std::streamoff offset; - int line; - int column; + std::streamoff offset; ///< absolute offset from the beginning of the file + int line; ///< zero based line number + int column; ///< zero based column number }; enum class token_kind { @@ -49,14 +52,14 @@ enum class token_kind { special_character }; -struct token { +struct token final { token_kind kind; position pos; std::string word; - virtual ~token(); + ~token(); - virtual void dump( + void dump( std::ostream& os, const std::string& prefix, bool is_last ) const noexcept; @@ -65,6 +68,13 @@ struct token { using token_ptr = std::shared_ptr; +/** + * @brief Lightweight tokenizer for QuasiLang source code. + * + * The reader reads from either a file or a memory buffer and produces + * tokens on demand via next_token(). Position information is tracked so + * callers can report meaningful diagnostics. + */ class reader { public: explicit reader( @@ -74,11 +84,19 @@ class reader { explicit reader(std::string& data) noexcept; ~reader(); - + /** + * @brief Read the next token from the input stream. + * + * @param out Token object to be filled with the parsed data. + */ void next_token(token& out); void jump_to_position(position pos); - + /** + * @brief Throw an exception with the current position information. + * + * Used by parsers to abort processing while preserving diagnostics. + */ void interrupt(); position get_position() const; @@ -108,15 +126,33 @@ class reader { void read_whitespace(std::string& into); void read_keyword(std::string& into); - + /** + * @brief Read a quoted string literal with escape handling. + * + * Supports common escape sequences and Unicode escapes of the + * form \uXXXX. The resulting decoded text is stored in + * @p into without the surrounding quotes. + * @throw std::runtime_error on malformed input. + */ void read_string(std::string& into); void read_comment(std::string& into); - + /** + * @brief Parse an integer or floating point literal. + * + * Digits are consumed according to the QuasiLang grammar. If a + * fractional part or exponent is present the returned kind is + * token_kind::floating. + */ token_kind read_number(std::string& into); void init_token(token& t) const noexcept; - + /** + * @brief Helper to create formatted runtime errors. + * + * In debug builds the message includes context information such + * as the current position and originating source location. + */ [[nodiscard]] std::runtime_error make_error( const std::string& message, const std::source_location& location = std::source_location::current() diff --git a/readme.md b/readme.md index 08a9c57..77054e7 100644 --- a/readme.md +++ b/readme.md @@ -2,11 +2,12 @@ ## _— the Hunchback Dragon of Compilers_ -[//]: # ([![version](https://img.shields.io/github/v/release/YaRiabtsev/QuasiPiler?include_prereleases)](https://github.com/YaRiabtsev/QuasiPiler/releases/latest)) -[//]: # ([![Checks](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml)) -[//]: # ([![Docs & Coverage](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml)) -[![codecov](https://codecov.io/gh/YaRiabtsev/QuasiPiler/graph/badge.svg?token=MCNEJFWMDU)](https://codecov.io/gh/YaRiabtsev/QuasiPiler) -[![license](https://img.shields.io/github/license/YaRiabtsev/QuasiPiler?color=e6e6e6)](https://github.com/YaRiabtsev/QuasiPiler/blob/master/license) +[![version](https://img.shields.io/github/v/release/ninjaro/QuasiPiler?include_prereleases)](https://github.com/ninjaro/QuasiPiler/releases/latest) +[![Checks](https://github.com/ninjaro/QuasiPiler/actions/workflows/tests.yml/badge.svg)](https://github.com/ninjaro/QuasiPiler/actions/workflows/tests.yml) +[![Deploy](https://github.com/ninjaro/QuasiPiler/actions/workflows/html.yml/badge.svg)](https://github.com/ninjaro/QuasiPiler/actions/workflows/html.yml) +[![codecov](https://codecov.io/gh/ninjaro/QuasiPiler/graph/badge.svg?token=MCNEJFWMDU)](https://codecov.io/gh/ninjaro/QuasiPiler) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/940dcf5e3cf64e759ce6ad17176d31f4)](https://app.codacy.com/gh/ninjaro/QuasiPiler/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) +[![license](https://img.shields.io/github/license/ninjaro/QuasiPiler?color=e6e6e6)](https://github.com/ninjaro/QuasiPiler/blob/master/license) > “A one-eyed transpiler is much more incomplete than a blind transpiler, for he knows what it is that’s lacking.” > — Victor-Marie of Gugle Inc. (1998–2017) @@ -37,73 +38,171 @@ This repo is my sanctuary under license — it begs mercy, not stars. I’ll bel ``` * ``: path to your QuasiCode file -## QuasiLang Syntax +## QuasiLang Syntax Guide ### Basics -- **Comments** - - Line comments begin with `//`. - - Block comments are enclosed in `/*` and `*/`. -- **Whitespace** is ignored except as a separator. -- **Identifiers** use letters, digits and underscores and may not start with a digit. -- **Literals** - - Numbers support integer and floating point forms (with optional exponent). - - Strings can use either single `'` or double `"` quotes and support common escape sequences. +* **Comments** -[//]: # (- **Separators and grouping**) + * Line comments: `// This is a comment` + * Block comments: -[//]: # ( - `,` comma, `;` semicolon and `:` colon act as separators.) + ```qc + /* + This is a block comment + */ + ``` -[//]: # ( - `()` parentheses, `[]` brackets and `{}` braces form groups.) +* **Whitespace** is ignored except as a separator. -[//]: # ( - Nested groups are used for lists, code blocks and expressions.) +* **Identifiers** consist of letters, digits, and underscores, but cannot start with a digit. -[//]: # () -[//]: # (### Expressions) +* **Literals** -[//]: # () -[//]: # (- Standard arithmetic and assignment operators are recognized: `+`, `-`, `*`, `/`, `%`, `=` and their compound forms (`+=`, `-=`, `*=`, `/=`, `%=`).) + * **Numbers:** Support integer and floating-point (with optional exponent). + * **Strings:** Can use single `'` or double `"` quotes. Common escape sequences (like `\n`, `\t`, `\\`, etc.) are supported. -[//]: # (- Comparison and logical operators include `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!`.) +* **Separators and Grouping** -[//]: # (- Bitwise operators: `&`, `|`, `^`, `<<`, `>>` and their compound assignments.) + * Separators: `,` (comma), `;` (semicolon), `:` (colon) + * Grouping: -[//]: # (- Increment and decrement operators `++` and `--` are supported in prefix and postfix form.) + * `()` for expressions and function parameters + * `[]` for lists and indexing + * `{}` for code blocks and objects -[//]: # (- Member access uses `.` and indexing uses `[expr]`. Slice syntax `[start:end:step]` is available.) +--- -[//]: # (- Function calls use the form `name(arg1, arg2)`.) +### Expressions + +* **Arithmetic & Assignment:** + `+`, `-`, `*`, `/`, `%`, `=`, `+=`, `-=`, `*=`, `/=`, `%=` +* **Comparison & Logic:** + `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!` +* **Bitwise:** + `&`, `|`, `^`, `<<`, `>>`, and compound assignments (`&=`, `|=`, etc.) +* **Increment/Decrement:** + `++`, `--` (prefix and postfix) +* **Member Access:** + `obj.key` or `obj["key"]` +* **Indexing & Slicing:** + `arr[2]`, `arr[1:4]`, `arr[::2]` +* **Function Calls:** + `func(arg1, arg2)` + +--- ### Statements and Declarations -[//]: # (- **Variable assignment** follows `name = expression;`.) +* **Variable Assignment:** + `name = expression;` +* **Function Declaration:** + `name(param1, param2) { ... }` +* **Control Flow:** + + * **Conditional:** + + ```qc + if (cond) { ... } + elif (other) { ... } + else { ... } + ``` + * **Loops:** + + * While: `while (cond) { ... }` + * For: `for (init; cond; step) { ... }` + * **Jump Statements:** + + * `break;` + * `continue;` + * `return;` or `return expr;` + * `goto label_name;` + * **Exception Handling:** + + ```qc + try { + // code + } catch (err) { + // handler + } finally { + // cleanup + } + ``` +* **Labels:** + Define with `label_name:` and use with `goto label_name;` + +--- -[//]: # (- **Function declarations** use `name(param1, param2) { ... }`.) +### Data Structures -[//]: # (- **Control flow**) +* **Lists:** + `[item1, item2, ...]` +* **Dictionaries/Objects:** + `{ "key": value, "other": 42 }` -[//]: # ( - Conditional statements: `if (cond) { ... }`, optional `else` or `elif` blocks.) +--- -[//]: # ( - Loops: `while (cond) { ... }` and `for(init; cond; step) { ... }`.) +### Example Programs -[//]: # ( - `break`, `continue`, `return` and `goto` appear as standalone keywords and may take an optional expression for `return`.) +**Hello World** -[//]: # ( - `try { ... } catch { ... }` for exception handling.) +```qc +// hello.qc +greeting = "Hello, world!"; +print(greeting); +``` -[//]: # (- **Labels** can be defined with `label_name:` and referenced via `goto label_name`.) +**Basic Control Flow** -### Data Structures +```qc +numbers = [1, 2, 3, 4, 5]; +sum = 0; + +for(i = 0; i < len(numbers); i++) { + sum += numbers[i]; +} + +if (sum > 15) { + print("Sum is greater than 15"); +} elif (sum > 10) { + print("Sum is greater than 10"); +} else { + print("Sum is", sum); +} +``` -[//]: # (- **Lists** use `[item1, item2, ...]`.) +**Functions and Error Handling** -[//]: # (- **Dictionaries/objects** use `{ "key": value }`.) +```qc +safe_div(a, b) { + try { + return a / b; + } catch (...) { + print("Error: Division by zero"); + return null; + } +} + +result = safe_div(10, 0); +``` -## Examples +**Labels and Goto (Rare, but Supported)** ```qc -// todo: Example of a simple QuasiLang program +counter = 0; +start: + counter++; + if (counter < 3) { + goto start; + } +print("Done!"); ``` +--- + +**See `include/frontend` and `src/frontend` for implementation, and check `tests/frontend` for more examples.** + + ## Documentation and Contributing To build and run tests, enable debug mode, or generate coverage reports: @@ -117,8 +216,8 @@ To build and run tests, enable debug mode, or generate coverage reports: $ cmake --build build --target coverage ``` -For detailed documentation, see the [Documentation](https://yariabtsev.github.io/QuasiPiler/doc/) and for the latest -coverage report, see [Coverage](https://yariabtsev.github.io/QuasiPiler/cov/). +For detailed documentation, see the [Documentation](https://ninjaro.github.io/QuasiPiler/doc/) and for the latest +coverage report, see [Coverage](https://ninjaro.github.io/QuasiPiler/cov/). ## Security Policy diff --git a/tests/arithmetic_tests.cpp b/tests/arithmetic_tests.cpp index 3b9c879..7e183ab 100644 --- a/tests/arithmetic_tests.cpp +++ b/tests/arithmetic_tests.cpp @@ -26,12 +26,12 @@ #include TEST(ArithmeticTest, ParseBinary) { - std::string input = "a+b;"; + std::string input = "a+b"; reader r { input }; grouper g { r }; auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* bin = dynamic_cast(cmd->nodes[0].get()); @@ -40,12 +40,12 @@ TEST(ArithmeticTest, ParseBinary) { } TEST(ArithmeticTest, ParsePrefixUnary) { - std::string input = "+a;"; + std::string input = "+a"; reader r { input }; grouper g { r }; auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* un = dynamic_cast(cmd->nodes[0].get()); @@ -54,12 +54,12 @@ TEST(ArithmeticTest, ParsePrefixUnary) { } TEST(ArithmeticTest, ParsePostfixUnary) { - std::string input = "a++;"; + std::string input = "a++"; reader r { input }; grouper g { r }; auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* un = dynamic_cast(cmd->nodes[0].get()); @@ -68,12 +68,12 @@ TEST(ArithmeticTest, ParsePostfixUnary) { } TEST(ArithmeticTest, ParseNestedGroups) { - std::string input = "++(a--);"; + std::string input = "++(a--)"; reader r { input }; grouper g { r }; auto res = g.parse(); ASSERT_EQ(res->kind, group_kind::file); - ASSERT_GE(res->size(), 1u); + ASSERT_EQ(res->size(), 1u); auto* cmd = dynamic_cast(res->nodes[0].get()); ASSERT_NE(cmd, nullptr); auto* pre = dynamic_cast(cmd->nodes[0].get());