diff --git a/.github/workflows/html.yml b/.github/workflows/html.yml
index 1932420..b775a82 100644
--- a/.github/workflows/html.yml
+++ b/.github/workflows/html.yml
@@ -1,9 +1,14 @@
-name: Docs & Coverage
+name: Deploy
on:
push:
branches:
- master
+ paths:
+ - 'src/**/*.cpp'
+ - 'include/**/*.hpp'
+ - 'tests/**/*.cpp'
+ - 'readme.md'
workflow_dispatch:
@@ -48,6 +53,12 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
+ - name: Upload coverage to Codacy
+ uses: codacy/codacy-coverage-reporter-action@a38818475bb21847788496e9f0fddaa4e84955ba
+ with:
+ project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
+ coverage-reports: build/coverage.info
+
- name: Clean build
run: rm -rf build/
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f0d121d..35c2113 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -4,6 +4,10 @@ on:
pull_request:
branches:
- master
+ paths:
+ - 'src/**/*.cpp'
+ - 'include/**/*.hpp'
+ - 'tests/**/*.cpp'
jobs:
build-and-test:
diff --git a/include/ast.hpp b/include/ast.hpp
index 959a57b..8fd21cb 100644
--- a/include/ast.hpp
+++ b/include/ast.hpp
@@ -66,13 +66,30 @@ enum class group_kind { file, body, list, paren, command, item, key, halt };
[[nodiscard]] const char* group_kind_name(group_kind k) noexcept;
+/**
+ * @brief Collection of AST nodes with a configurable size limit.
+ *
+ * Large sub-groups may be replaced with placeholder nodes to keep
+ * @c fixed_size within @c limit.
+ */
struct group_node : ast_node {
- size_t limit;
+ size_t limit; ///< Maximum allowed node weight
group_kind kind { group_kind::halt };
std::vector nodes;
- std::priority_queue>
- weights; /// node_size -> node_index
-
+ /// queue of heavy child nodes:
+ std::priority_queue> weights;
+
+ /**
+ * @brief Append a child node while respecting the size limit.
+ *
+ * Nodes contribute their @c fixed_size and @c full_size to the parent. If
+ * the accumulated @c fixed_size exceeds @c limit, larger child groups are
+ * replaced with ::placeholder_node instances so the tree can be lazily
+ * expanded later.
+ *
+ * @param node Node to append.
+ * @param src Reader used to reconstruct squeezed subtrees on demand.
+ */
void append(ast_node_ptr node, const reader& src);
[[nodiscard]] bool empty() const noexcept override;
[[nodiscard]] size_t size() const noexcept;
@@ -81,7 +98,17 @@ struct group_node : ast_node {
void dump(
std::ostream& os, const std::string& prefix, bool is_last, bool full
) const override;
- const position& get_start() const override;
+ [[nodiscard]] const position& get_start() const override;
+ /**
+ * @brief Replace a child group with a placeholder.
+ *
+ * The placeholder stores enough information to re-read the original subtree
+ * from @p src later. This is used when a group's @c fixed_size would exceed
+ * the configured limit and thus needs to be collapsed.
+ *
+ * @param index Index of the child to replace.
+ * @param src Reader used to recreate the subtree if needed.
+ */
void squeeze(size_t index, const reader& src);
void pop_back();
};
@@ -95,7 +122,14 @@ struct wrapped_node : group_node {
using wrapped_ptr = std::shared_ptr;
-struct placeholder_node : wrapped_node {
+/**
+ * @brief Node standing in place of a squeezed sub-tree.
+ *
+ * When a group exceeds the configured size limit it can be replaced by a
+ * placeholder node. The original reader is stored so the subtree can be
+ * reconstructed on demand.
+ */
+struct placeholder_node final : wrapped_node {
reader* src { nullptr };
void dump(
std::ostream& os, const std::string& prefix, bool is_last, bool full
diff --git a/include/expression.hpp b/include/expression.hpp
index e6800d9..d46f77a 100644
--- a/include/expression.hpp
+++ b/include/expression.hpp
@@ -31,17 +31,40 @@
class expression {
public:
+ /**
+ * @brief Element of the input stream for the expression parser.
+ * @details When @c is_op is set the item represents an operator token;
+ * otherwise it stores a pointer to an AST node.
+ */
struct item {
bool is_op { false };
token tok;
ast_node_ptr node;
};
+ /**
+ * @brief Split a raw node list into tokens and operands.
+ *
+ * Consecutive operator tokens are combined into multi-character operators
+ * such as += or ==.
+ */
static std::vector- make_items(const std::vector& nodes);
-
+ /**
+ * @brief Parse a binary/ternary expression from a token list.
+ *
+ * The function implements a Pratt style parser. @p min_prec
+ * specifies the minimal operator precedence accepted for the
+ * current recursion level.
+ *
+ * @param items Token/operand stream produced by make_items().
+ * @param idx Current position within @p items, updated on return.
+ * @param min_prec Minimal precedence level to parse.
+ */
static ast_node_ptr
parse_expression(std::vector
- & items, size_t& idx, int min_prec);
-
+ /**
+ * @brief Parse a prefix expression and any trailing postfix operators.
+ */
static ast_node_ptr parse_prefix(std::vector
- & items, size_t& idx);
private:
diff --git a/include/grouper.hpp b/include/grouper.hpp
index f201b3a..64dd756 100644
--- a/include/grouper.hpp
+++ b/include/grouper.hpp
@@ -27,10 +27,19 @@
#include "ast.hpp"
+/**
+ * @brief Parses tokens into hierarchical groups and expressions.
+ *
+ * The grouper is responsible for constructing the AST from a token stream.
+ * It handles bracket matching, command separation and expression parsing.
+ */
class grouper {
public:
explicit grouper(reader& r, size_t limit = 64);
-
+ /**
+ * @brief Parse a sequence starting at the current reader position.
+ * @param kind Expected top-level group kind.
+ */
group_ptr parse(group_kind kind = group_kind::file);
private:
@@ -43,7 +52,13 @@ class grouper {
void peek();
[[nodiscard]] group_ptr identify_subgroup(const group_ptr& group) const;
-
+ /**
+ * @brief Attach @p inode to the last statement if it is a secondary
+ * keyword.
+ *
+ * Handles constructs like else or catch by merging them
+ * with the previous command group.
+ */
[[nodiscard]] bool
handle_chain(const group_ptr& result, const group_ptr& inode) const;
@@ -55,23 +70,47 @@ class grouper {
void identify_body(const group_ptr& group) const;
void identify(const group_ptr& group, const group_ptr& result) const;
-
+ /**
+ * @brief Transform token groups representing arithmetic into AST nodes.
+ *
+ * Runs the expression parser over certain group kinds. If the entire group
+ * forms a valid expression, its children are replaced with the resulting
+ * expression subtree.
+ */
void parse_arithmetic(const group_ptr& group) const;
-
+ /**
+ * @brief Close the current command when a separator is encountered.
+ */
bool
append_command(group_ptr& group, group_ptr& top, group_kind kind) const;
-
+ /**
+ * @brief Begin parsing of a bracketed sub-group.
+ *
+ * Pushes a new wrapped_node onto @p top when an opening bracket is
+ * encountered.
+ */
void append_wrapped(const group_ptr& top);
-
+ /**
+ * @brief Finalize a wrapped sub-group when a closing bracket is seen.
+ */
void close_wrapped(const group_ptr& group, group_ptr& top, group_kind kind);
-
+ /**
+ * @brief Parse a sequence of tokens into the supplied group.
+ *
+ * This is the core loop that recognises brackets and separators and
+ * builds the initial hierarchical structure.
+ */
void parse_group(group_kind kind, group_ptr& group);
-
+ /**
+ * @brief Safely append a node to its parent group.
+ */
void append(
const group_ptr& parent, const ast_node_ptr& node,
const std::source_location& location = std::source_location::current()
) const;
-
+ /**
+ * @brief Create a formatted runtime error describing a parse failure.
+ */
[[nodiscard]] std::runtime_error make_error(
const std::string& message, const group_ptr& context = {},
const std::source_location& location = std::source_location::current()
diff --git a/include/reader.hpp b/include/reader.hpp
index 7a6b15d..52d3291 100644
--- a/include/reader.hpp
+++ b/include/reader.hpp
@@ -29,10 +29,13 @@
#include
#include
+/**
+ * @brief Byte and line location within the input stream.
+ */
struct position {
- std::streamoff offset;
- int line;
- int column;
+ std::streamoff offset; ///< absolute offset from the beginning of the file
+ int line; ///< zero based line number
+ int column; ///< zero based column number
};
enum class token_kind {
@@ -49,14 +52,14 @@ enum class token_kind {
special_character
};
-struct token {
+struct token final {
token_kind kind;
position pos;
std::string word;
- virtual ~token();
+ ~token();
- virtual void dump(
+ void dump(
std::ostream& os, const std::string& prefix, bool is_last
) const noexcept;
@@ -65,6 +68,13 @@ struct token {
using token_ptr = std::shared_ptr;
+/**
+ * @brief Lightweight tokenizer for QuasiLang source code.
+ *
+ * The reader reads from either a file or a memory buffer and produces
+ * tokens on demand via next_token(). Position information is tracked so
+ * callers can report meaningful diagnostics.
+ */
class reader {
public:
explicit reader(
@@ -74,11 +84,19 @@ class reader {
explicit reader(std::string& data) noexcept;
~reader();
-
+ /**
+ * @brief Read the next token from the input stream.
+ *
+ * @param out Token object to be filled with the parsed data.
+ */
void next_token(token& out);
void jump_to_position(position pos);
-
+ /**
+ * @brief Throw an exception with the current position information.
+ *
+ * Used by parsers to abort processing while preserving diagnostics.
+ */
void interrupt();
position get_position() const;
@@ -108,15 +126,33 @@ class reader {
void read_whitespace(std::string& into);
void read_keyword(std::string& into);
-
+ /**
+ * @brief Read a quoted string literal with escape handling.
+ *
+ * Supports common escape sequences and Unicode escapes of the
+ * form \uXXXX. The resulting decoded text is stored in
+ * @p into without the surrounding quotes.
+ * @throw std::runtime_error on malformed input.
+ */
void read_string(std::string& into);
void read_comment(std::string& into);
-
+ /**
+ * @brief Parse an integer or floating point literal.
+ *
+ * Digits are consumed according to the QuasiLang grammar. If a
+ * fractional part or exponent is present the returned kind is
+ * token_kind::floating.
+ */
token_kind read_number(std::string& into);
void init_token(token& t) const noexcept;
-
+ /**
+ * @brief Helper to create formatted runtime errors.
+ *
+ * In debug builds the message includes context information such
+ * as the current position and originating source location.
+ */
[[nodiscard]] std::runtime_error make_error(
const std::string& message,
const std::source_location& location = std::source_location::current()
diff --git a/readme.md b/readme.md
index 08a9c57..77054e7 100644
--- a/readme.md
+++ b/readme.md
@@ -2,11 +2,12 @@
## _— the Hunchback Dragon of Compilers_
-[//]: # ([](https://github.com/YaRiabtsev/QuasiPiler/releases/latest))
-[//]: # ([](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml))
-[//]: # ([](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml))
-[](https://codecov.io/gh/YaRiabtsev/QuasiPiler)
-[](https://github.com/YaRiabtsev/QuasiPiler/blob/master/license)
+[](https://github.com/ninjaro/QuasiPiler/releases/latest)
+[](https://github.com/ninjaro/QuasiPiler/actions/workflows/tests.yml)
+[](https://github.com/ninjaro/QuasiPiler/actions/workflows/html.yml)
+[](https://codecov.io/gh/ninjaro/QuasiPiler)
+[](https://app.codacy.com/gh/ninjaro/QuasiPiler/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
+[](https://github.com/ninjaro/QuasiPiler/blob/master/license)
> “A one-eyed transpiler is much more incomplete than a blind transpiler, for he knows what it is that’s lacking.”
> — Victor-Marie of Gugle Inc. (1998–2017)
@@ -37,73 +38,171 @@ This repo is my sanctuary under license — it begs mercy, not stars. I’ll bel
```
* ``: path to your QuasiCode file
-## QuasiLang Syntax
+## QuasiLang Syntax Guide
### Basics
-- **Comments**
- - Line comments begin with `//`.
- - Block comments are enclosed in `/*` and `*/`.
-- **Whitespace** is ignored except as a separator.
-- **Identifiers** use letters, digits and underscores and may not start with a digit.
-- **Literals**
- - Numbers support integer and floating point forms (with optional exponent).
- - Strings can use either single `'` or double `"` quotes and support common escape sequences.
+* **Comments**
-[//]: # (- **Separators and grouping**)
+ * Line comments: `// This is a comment`
+ * Block comments:
-[//]: # ( - `,` comma, `;` semicolon and `:` colon act as separators.)
+ ```qc
+ /*
+ This is a block comment
+ */
+ ```
-[//]: # ( - `()` parentheses, `[]` brackets and `{}` braces form groups.)
+* **Whitespace** is ignored except as a separator.
-[//]: # ( - Nested groups are used for lists, code blocks and expressions.)
+* **Identifiers** consist of letters, digits, and underscores, but cannot start with a digit.
-[//]: # ()
-[//]: # (### Expressions)
+* **Literals**
-[//]: # ()
-[//]: # (- Standard arithmetic and assignment operators are recognized: `+`, `-`, `*`, `/`, `%`, `=` and their compound forms (`+=`, `-=`, `*=`, `/=`, `%=`).)
+ * **Numbers:** Support integer and floating-point (with optional exponent).
+ * **Strings:** Can use single `'` or double `"` quotes. Common escape sequences (like `\n`, `\t`, `\\`, etc.) are supported.
-[//]: # (- Comparison and logical operators include `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!`.)
+* **Separators and Grouping**
-[//]: # (- Bitwise operators: `&`, `|`, `^`, `<<`, `>>` and their compound assignments.)
+ * Separators: `,` (comma), `;` (semicolon), `:` (colon)
+ * Grouping:
-[//]: # (- Increment and decrement operators `++` and `--` are supported in prefix and postfix form.)
+ * `()` for expressions and function parameters
+ * `[]` for lists and indexing
+ * `{}` for code blocks and objects
-[//]: # (- Member access uses `.` and indexing uses `[expr]`. Slice syntax `[start:end:step]` is available.)
+---
-[//]: # (- Function calls use the form `name(arg1, arg2)`.)
+### Expressions
+
+* **Arithmetic & Assignment:**
+ `+`, `-`, `*`, `/`, `%`, `=`, `+=`, `-=`, `*=`, `/=`, `%=`
+* **Comparison & Logic:**
+ `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!`
+* **Bitwise:**
+ `&`, `|`, `^`, `<<`, `>>`, and compound assignments (`&=`, `|=`, etc.)
+* **Increment/Decrement:**
+ `++`, `--` (prefix and postfix)
+* **Member Access:**
+ `obj.key` or `obj["key"]`
+* **Indexing & Slicing:**
+ `arr[2]`, `arr[1:4]`, `arr[::2]`
+* **Function Calls:**
+ `func(arg1, arg2)`
+
+---
### Statements and Declarations
-[//]: # (- **Variable assignment** follows `name = expression;`.)
+* **Variable Assignment:**
+ `name = expression;`
+* **Function Declaration:**
+ `name(param1, param2) { ... }`
+* **Control Flow:**
+
+ * **Conditional:**
+
+ ```qc
+ if (cond) { ... }
+ elif (other) { ... }
+ else { ... }
+ ```
+ * **Loops:**
+
+ * While: `while (cond) { ... }`
+ * For: `for (init; cond; step) { ... }`
+ * **Jump Statements:**
+
+ * `break;`
+ * `continue;`
+ * `return;` or `return expr;`
+ * `goto label_name;`
+ * **Exception Handling:**
+
+ ```qc
+ try {
+ // code
+ } catch (err) {
+ // handler
+ } finally {
+ // cleanup
+ }
+ ```
+* **Labels:**
+ Define with `label_name:` and use with `goto label_name;`
+
+---
-[//]: # (- **Function declarations** use `name(param1, param2) { ... }`.)
+### Data Structures
-[//]: # (- **Control flow**)
+* **Lists:**
+ `[item1, item2, ...]`
+* **Dictionaries/Objects:**
+ `{ "key": value, "other": 42 }`
-[//]: # ( - Conditional statements: `if (cond) { ... }`, optional `else` or `elif` blocks.)
+---
-[//]: # ( - Loops: `while (cond) { ... }` and `for(init; cond; step) { ... }`.)
+### Example Programs
-[//]: # ( - `break`, `continue`, `return` and `goto` appear as standalone keywords and may take an optional expression for `return`.)
+**Hello World**
-[//]: # ( - `try { ... } catch { ... }` for exception handling.)
+```qc
+// hello.qc
+greeting = "Hello, world!";
+print(greeting);
+```
-[//]: # (- **Labels** can be defined with `label_name:` and referenced via `goto label_name`.)
+**Basic Control Flow**
-### Data Structures
+```qc
+numbers = [1, 2, 3, 4, 5];
+sum = 0;
+
+for(i = 0; i < len(numbers); i++) {
+ sum += numbers[i];
+}
+
+if (sum > 15) {
+ print("Sum is greater than 15");
+} elif (sum > 10) {
+ print("Sum is greater than 10");
+} else {
+ print("Sum is", sum);
+}
+```
-[//]: # (- **Lists** use `[item1, item2, ...]`.)
+**Functions and Error Handling**
-[//]: # (- **Dictionaries/objects** use `{ "key": value }`.)
+```qc
+safe_div(a, b) {
+ try {
+ return a / b;
+ } catch (...) {
+ print("Error: Division by zero");
+ return null;
+ }
+}
+
+result = safe_div(10, 0);
+```
-## Examples
+**Labels and Goto (Rare, but Supported)**
```qc
-// todo: Example of a simple QuasiLang program
+counter = 0;
+start:
+ counter++;
+ if (counter < 3) {
+ goto start;
+ }
+print("Done!");
```
+---
+
+**See `include/frontend` and `src/frontend` for implementation, and check `tests/frontend` for more examples.**
+
+
## Documentation and Contributing
To build and run tests, enable debug mode, or generate coverage reports:
@@ -117,8 +216,8 @@ To build and run tests, enable debug mode, or generate coverage reports:
$ cmake --build build --target coverage
```
-For detailed documentation, see the [Documentation](https://yariabtsev.github.io/QuasiPiler/doc/) and for the latest
-coverage report, see [Coverage](https://yariabtsev.github.io/QuasiPiler/cov/).
+For detailed documentation, see the [Documentation](https://ninjaro.github.io/QuasiPiler/doc/) and for the latest
+coverage report, see [Coverage](https://ninjaro.github.io/QuasiPiler/cov/).
## Security Policy
diff --git a/tests/arithmetic_tests.cpp b/tests/arithmetic_tests.cpp
index 3b9c879..7e183ab 100644
--- a/tests/arithmetic_tests.cpp
+++ b/tests/arithmetic_tests.cpp
@@ -26,12 +26,12 @@
#include
TEST(ArithmeticTest, ParseBinary) {
- std::string input = "a+b;";
+ std::string input = "a+b";
reader r { input };
grouper g { r };
auto res = g.parse();
ASSERT_EQ(res->kind, group_kind::file);
- ASSERT_GE(res->size(), 1u);
+ ASSERT_EQ(res->size(), 1u);
auto* cmd = dynamic_cast(res->nodes[0].get());
ASSERT_NE(cmd, nullptr);
auto* bin = dynamic_cast(cmd->nodes[0].get());
@@ -40,12 +40,12 @@ TEST(ArithmeticTest, ParseBinary) {
}
TEST(ArithmeticTest, ParsePrefixUnary) {
- std::string input = "+a;";
+ std::string input = "+a";
reader r { input };
grouper g { r };
auto res = g.parse();
ASSERT_EQ(res->kind, group_kind::file);
- ASSERT_GE(res->size(), 1u);
+ ASSERT_EQ(res->size(), 1u);
auto* cmd = dynamic_cast(res->nodes[0].get());
ASSERT_NE(cmd, nullptr);
auto* un = dynamic_cast(cmd->nodes[0].get());
@@ -54,12 +54,12 @@ TEST(ArithmeticTest, ParsePrefixUnary) {
}
TEST(ArithmeticTest, ParsePostfixUnary) {
- std::string input = "a++;";
+ std::string input = "a++";
reader r { input };
grouper g { r };
auto res = g.parse();
ASSERT_EQ(res->kind, group_kind::file);
- ASSERT_GE(res->size(), 1u);
+ ASSERT_EQ(res->size(), 1u);
auto* cmd = dynamic_cast(res->nodes[0].get());
ASSERT_NE(cmd, nullptr);
auto* un = dynamic_cast(cmd->nodes[0].get());
@@ -68,12 +68,12 @@ TEST(ArithmeticTest, ParsePostfixUnary) {
}
TEST(ArithmeticTest, ParseNestedGroups) {
- std::string input = "++(a--);";
+ std::string input = "++(a--)";
reader r { input };
grouper g { r };
auto res = g.parse();
ASSERT_EQ(res->kind, group_kind::file);
- ASSERT_GE(res->size(), 1u);
+ ASSERT_EQ(res->size(), 1u);
auto* cmd = dynamic_cast(res->nodes[0].get());
ASSERT_NE(cmd, nullptr);
auto* pre = dynamic_cast(cmd->nodes[0].get());