diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..a5b6272 --- /dev/null +++ b/.clang-format @@ -0,0 +1,237 @@ +# find . -regex '.*\.\(cpp\|hpp\|cc\|cxx\)' -exec clang-format -style=file -i {} \; +--- +Language: Cpp +# BasedOnStyle: WebKit +AccessModifierOffset: -4 +AlignAfterOpenBracket: BlockIndent +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCaseColons: false +AlignEscapedNewlines: Right +AlignOperands: DontAlign +AlignTrailingComments: + Kind: Never + OverEmptyLines: 0 +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BitFieldColonSpacing: Both +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterExternBlock: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true +BreakBeforeBinaryOperators: All +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Custom +BreakBeforeInlineASMColon: OnlyMultiline +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeColon +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: false +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: AfterExternBlock +IndentGotoLabels: true +IndentPPDirectives: None +IndentRequiresClause: true +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtEOF: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: Inner +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 4 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Left +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Always +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeJsonColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: true +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Never +SpacesInParensOptions: + InCStyleCasts: false + InConditionalStatements: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +... + diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..ea6cfa1 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,147 @@ +# Generated from CLion Inspection settings +--- +Checks: '-*, +bugprone-argument-comment, +bugprone-assert-side-effect, +bugprone-bad-signal-to-kill-thread, +bugprone-branch-clone, +bugprone-copy-constructor-init, +bugprone-dangling-handle, +bugprone-dynamic-static-initializers, +bugprone-fold-init-type, +bugprone-forward-declaration-namespace, +bugprone-forwarding-reference-overload, +bugprone-inaccurate-erase, +bugprone-incorrect-roundings, +bugprone-integer-division, +bugprone-lambda-function-name, +bugprone-macro-parentheses, +bugprone-macro-repeated-side-effects, +bugprone-misplaced-operator-in-strlen-in-alloc, +bugprone-misplaced-pointer-arithmetic-in-alloc, +bugprone-misplaced-widening-cast, +bugprone-move-forwarding-reference, +bugprone-multiple-statement-macro, +bugprone-no-escape, +bugprone-parent-virtual-call, +bugprone-posix-return, +bugprone-reserved-identifier, +bugprone-sizeof-container, +bugprone-sizeof-expression, +bugprone-spuriously-wake-up-functions, +bugprone-string-constructor, +bugprone-string-integer-assignment, +bugprone-string-literal-with-embedded-nul, +bugprone-suspicious-enum-usage, +bugprone-suspicious-include, +bugprone-suspicious-memset-usage, +bugprone-suspicious-missing-comma, +bugprone-suspicious-semicolon, +bugprone-suspicious-string-compare, +bugprone-suspicious-memory-comparison, +bugprone-suspicious-realloc-usage, +bugprone-swapped-arguments, +bugprone-terminating-continue, +bugprone-throw-keyword-missing, +bugprone-too-small-loop-variable, +bugprone-undefined-memory-manipulation, +bugprone-undelegated-constructor, +bugprone-unhandled-self-assignment, +bugprone-unused-raii, +bugprone-unused-return-value, +bugprone-use-after-move, +bugprone-virtual-near-miss, +cert-dcl21-cpp, +cert-dcl58-cpp, +cert-err34-c, +cert-err52-cpp, +cert-err60-cpp, +cert-flp30-c, +cert-msc50-cpp, +cert-msc51-cpp, +cert-str34-c, +cppcoreguidelines-interfaces-global-init, +cppcoreguidelines-narrowing-conversions, +cppcoreguidelines-pro-type-member-init, +cppcoreguidelines-pro-type-static-cast-downcast, +cppcoreguidelines-slicing, +google-default-arguments, +google-explicit-constructor, +google-runtime-operator, +hicpp-exception-baseclass, +hicpp-multiway-paths-covered, +misc-misplaced-const, +misc-new-delete-overloads, +#misc-no-recursion, +misc-non-copyable-objects, +misc-throw-by-value-catch-by-reference, +misc-unconventional-assign-operator, +misc-uniqueptr-reset-release, +modernize-avoid-bind, +modernize-concat-nested-namespaces, +modernize-deprecated-headers, +modernize-deprecated-ios-base-aliases, +modernize-loop-convert, +modernize-make-shared, +modernize-make-unique, +modernize-pass-by-value, +modernize-raw-string-literal, +modernize-redundant-void-arg, +modernize-replace-auto-ptr, +modernize-replace-disallow-copy-and-assign-macro, +modernize-replace-random-shuffle, +modernize-return-braced-init-list, +modernize-shrink-to-fit, +modernize-unary-static-assert, +modernize-use-auto, +modernize-use-bool-literals, +modernize-use-emplace, +modernize-use-equals-default, +modernize-use-equals-delete, +modernize-use-nodiscard, +modernize-use-noexcept, +modernize-use-nullptr, +modernize-use-override, +modernize-use-transparent-functors, +modernize-use-uncaught-exceptions, +mpi-buffer-deref, +mpi-type-mismatch, +openmp-use-default-none, +performance-faster-string-find, +performance-for-range-copy, +performance-implicit-conversion-in-loop, +performance-inefficient-algorithm, +performance-inefficient-string-concatenation, +performance-inefficient-vector-operation, +performance-move-const-arg, +performance-move-constructor-init, +performance-no-automatic-move, +performance-noexcept-move-constructor, +performance-trivially-destructible, +performance-type-promotion-in-math-fn, +performance-unnecessary-copy-initialization, +performance-unnecessary-value-param, +portability-simd-intrinsics, +readability-avoid-const-params-in-decls, +readability-const-return-type, +readability-container-size-empty, +readability-convert-member-functions-to-static, +readability-delete-null-pointer, +readability-deleted-default, +readability-inconsistent-declaration-parameter-name, +readability-make-member-function-const, +readability-misleading-indentation, +readability-misplaced-array-index, +readability-non-const-parameter, +readability-redundant-control-flow, +readability-redundant-declaration, +readability-redundant-function-ptr-dereference, +readability-redundant-smartptr-get, +readability-redundant-string-cstr, +readability-redundant-string-init, +readability-simplify-subscript-expr, +readability-static-accessed-through-instance, +readability-static-definition-in-anonymous-namespace, +readability-string-compare, +readability-uniqueptr-delete-release, +readability-use-anyofallof' \ No newline at end of file diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 0000000..a6b1ece --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,19 @@ +# Security Policy + +We take security seriously and appreciate reports of any vulnerabilities. + +## Reporting + +Please use GitHub's **Private Vulnerability Reporting** feature to submit +security issues. Alternatively, you may email +[yaroslav.riabtsev@rwth-aachen.de](mailto:yaroslav.riabtsev@rwth-aachen.de). + +We’ll *totally* try to acknowledge your report within **5 business days** +(give or take... forever). As for critical issues — yeah, we *aim* to tackle +those within **30 days** of confirming they’re real, unless they’re +complicated, confusing, or we just forget. + +## Supported Versions + +At this time the project has no official releases. Security fixes will be +applied to the `master` branch. \ No newline at end of file diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..218d02f --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,46 @@ +name: CodeQL + +on: + pull_request: + branches: + - master + +permissions: + actions: read + contents: read + security-events: write + +jobs: + analyze: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + language: [ 'cpp' ] + steps: + - uses: actions/checkout@v4 + + - uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: manual + + - uses: MarkusJx/googletest-installer@v1.1 + + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get install -y libcxxopts-dev + + - name: Build (Debug) + run: | + cmake -S . -B build-debug -DCMAKE_BUILD_TYPE=Debug + cmake --build build-debug --parallel $(nproc) + + - name: Build (Release) + run: | + cmake -S . -B build-release -DCMAKE_BUILD_TYPE=Release + cmake --build build-release --parallel $(nproc) + + - name: Run CodeQL Analysis + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/html.yml b/.github/workflows/html.yml new file mode 100644 index 0000000..1932420 --- /dev/null +++ b/.github/workflows/html.yml @@ -0,0 +1,72 @@ +name: Docs & Coverage + +on: + push: + branches: + - master + + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/configure-pages@v3 + - uses: MarkusJx/googletest-installer@v1.1 + + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get install -y doxygen graphviz lcov llvm libcxxopts-dev + + - name: Generate Doxygen + run: doxygen Doxyfile + + - name: Build with coverage + run: | + cmake -B build -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DCOVERAGE=ON + cmake --build build --target coverage --parallel $(nproc) + + - name: Upload results to Codecov + uses: codecov/codecov-action@v5 + with: + fail_ci_if_error: true + disable_search: true + files: build/coverage.info + flags: unittests + name: codecov-umbrella + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true + + - name: Clean build + run: rm -rf build/ + + - name: Build with Jekyll + uses: actions/jekyll-build-pages@v1 + with: + source: ./ + destination: ./_site + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..bcac8fd --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,40 @@ +name: Checks + +on: + pull_request: + branches: + - master + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: MarkusJx/googletest-installer@v1.1 + + - name: Install Dependencies + run: | + sudo apt-get update + sudo apt-get install -y libcxxopts-dev + + - name: Build project + run: | + cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug + cmake --build build --parallel $(nproc) + + - name: Run unit tests + run: | + cd build + ctest -V --parallel $(nproc) + + code-style: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Clang-format + uses: jidicula/clang-format-action@v4.13.0 + with: + clang-format-version: '18' + check-path: '.' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 259148f..cf43da7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Prerequisites *.d +# Clion files +.idea/ + # Compiled Object files *.slo *.lo @@ -26,7 +29,20 @@ *.a *.lib +# Debug files +cmake-build-debug*/ + # Executables *.exe *.out *.app + +# QT compiled translation file +*.qm + +build/ +doc/ +cov/ + +CMakeLists.txt.user +CMakeLists.txt.user.* \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..68f20a1 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,182 @@ +cmake_minimum_required(VERSION 3.28) + +project(qpiler VERSION 1.0.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + add_compile_options(-Og -g -fno-omit-frame-pointer + -Werror +# -Wno-deprecated-declarations + + -Wall + -Wextra + -Wpedantic + + -Wcast-align + -Wcast-qual + -Wconversion + -Wctor-dtor-privacy + -Wenum-compare + -Wfloat-equal + -Wnon-virtual-dtor + -Wold-style-cast + -Woverloaded-virtual + -Wredundant-decls + -Wsign-conversion + -Wsign-promo + ) +elseif (CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -DNDEBUG") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s") +endif () + +add_library(qpiler_lib + src/reader.cpp + src/ast.cpp + src/grouper.cpp +) + +target_include_directories(qpiler_lib PUBLIC include) + +target_precompile_headers(qpiler_lib PRIVATE + include/reader.hpp + include/ast.hpp + include/grouper.hpp +) + +set_target_properties(qpiler_lib PROPERTIES UNITY_BUILD ON) + +add_executable(qpiler src/main.cpp) + +target_link_libraries(qpiler PRIVATE qpiler_lib) + +option(BUILD_TESTS "Build unit tests" ON) +option(COVERAGE "Enable coverage reporting" OFF) + +if (BUILD_TESTS) + find_package(GTest REQUIRED) + + if (UNIX) + find_package(Threads REQUIRED) + endif () + + add_executable(unit_tests + tests/main.cpp + + tests/reader_tests.cpp + tests/ast_tests.cpp + tests/grouper_tests.cpp + ) + + target_link_libraries(unit_tests + qpiler_lib + GTest::GTest + GTest::Main + Threads::Threads + ) + + enable_testing() + add_test(NAME unit_tests COMMAND unit_tests) + + set(SOURCE_COMPILERS_DIR "${CMAKE_SOURCE_DIR}/data") + set(TARGET_COMPILERS_DIR "${CMAKE_BINARY_DIR}/test_data") + + if (NOT EXISTS ${TARGET_COMPILERS_DIR}) + message(STATUS "Copying QPiler test files from ${SOURCE_COMPILERS_DIR} to ${TARGET_COMPILERS_DIR}") + if (UNIX) + set(COPY_COMMAND create_symlink) + else () + set(COPY_COMMAND copy_directory) + endif () + + execute_process( + COMMAND ${CMAKE_COMMAND} -E ${COPY_COMMAND} ${SOURCE_COMPILERS_DIR} ${TARGET_COMPILERS_DIR} + ) + else () + message(STATUS "Symlink or test data directory already exists") + endif () + + if (COVERAGE) + message(STATUS "Coverage: ON, compiler is ${CMAKE_CXX_COMPILER_ID}") + + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(COV_FLAGS --coverage -O0) + foreach (tgt IN ITEMS qpiler_lib unit_tests) + target_compile_options(${tgt} PUBLIC ${COV_FLAGS}) + target_link_libraries(${tgt} PUBLIC gcov) + endforeach () + + # custom target to run gcov/lcov + find_program(LCOV lcov) + find_program(GENHTML genhtml) + add_custom_target(coverage + COMMENT "Generating GCC/gcov coverage report" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} --build . --target unit_tests + COMMAND unit_tests + COMMAND ${LCOV} --capture --directory . --output-file coverage.info + COMMAND ${GENHTML} coverage.info --output-directory coverage_report + DEPENDS unit_tests + ) + + elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(COV_FLAGS + -fprofile-instr-generate + -fcoverage-mapping + -O0 + ) + set(PROFRAW_FILE "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.profraw") + set(PROFDATA_FILE "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.profdata") + set(COVERAGE_REPORT_DIR "${CMAKE_BINARY_DIR}/../cov") + + foreach (tgt IN ITEMS qpiler_lib unit_tests) + target_compile_options(${tgt} PUBLIC ${COV_FLAGS}) + target_link_options(${tgt} PUBLIC -fprofile-instr-generate) + endforeach () + + + find_program(LLVM_PROFDATA llvm-profdata) + find_program(LLVM_COV llvm-cov) + add_custom_target(coverage + BYPRODUCTS ${PROFRAW_FILE} ${PROFDATA_FILE} + COMMENT "Generating Clang/LLVM-cov coverage report" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} --build . --target unit_tests + COMMAND LLVM_PROFILE_FILE=${PROFRAW_FILE} $ + COMMAND ${LLVM_PROFDATA} merge -sparse -output=${PROFDATA_FILE} ${PROFRAW_FILE} + COMMAND ${LLVM_COV} show $ + -instr-profile=${PROFDATA_FILE} + -format=html + -output-dir=${COVERAGE_REPORT_DIR} + -ignore-filename-regex='.*/tests/.*' + -show-branches=count + -show-line-counts + -show-regions + -show-instantiations + -show-expansions + -use-color + -coverage-watermark=90,60 + COMMAND ${LLVM_COV} export $ + -instr-profile=${PROFDATA_FILE} + -format=lcov + -ignore-filename-regex='.*/tests/.*' > ${CMAKE_BINARY_DIR}/coverage.info + DEPENDS unit_tests qpiler_lib + ) + else () + message(WARNING "Coverage is only supported with GCC or Clang") + endif () + endif () + +endif () + +option(ENABLE_ASAN "Enable AddressSanitizer" OFF) + +if (BUILD_TESTS AND ENABLE_ASAN) + message(STATUS "AddressSanitizer ENABLED for unit_tests") + target_compile_options(unit_tests PRIVATE -fsanitize=address -fno-omit-frame-pointer -g) + target_link_options(unit_tests PRIVATE -fsanitize=address) +endif() \ No newline at end of file diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..136dd23 --- /dev/null +++ b/Doxyfile @@ -0,0 +1,37 @@ +PROJECT_NAME = "QuasiPiler" +PROJECT_NUMBER = 1.0 +PROJECT_BRIEF = "-- the Hunchback Dragon of Compilers" + + +INPUT = include src tests +RECURSIVE = YES + +OUTPUT_DIRECTORY = +HTML_OUTPUT = doc + +GENERATE_HTML = YES +GENERATE_LATEX = NO + +EXTRACT_PRIVATE = YES + +EXTRACT_ALL = YES +EXTRACT_STATIC = YES +EXTRACT_PRIVATE = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +EXTRACT_ANON_NSPACES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES + +# QT_AUTOBRIEF = YES + +GROUP_MEMBERS = YES +DEFAULT_MEMBER_GROUP = "alphabetical" + +REFERENCED_BY_ALL = YES + +SHOW_INCLUDE_FILES = YES +SOURCE_BROWSER = YES + +MARKDOWN_SUPPORT = YES +OPTIMIZE_OUTPUT_FOR_C = YES \ No newline at end of file diff --git a/data/test00.qc b/data/test00.qc new file mode 100644 index 0000000..40cf9ab --- /dev/null +++ b/data/test00.qc @@ -0,0 +1,13 @@ ++a; + +-a; + +++a; + +a++; + +a--; + +--a; + +!a; diff --git a/data/test01.qc b/data/test01.qc new file mode 100644 index 0000000..ae3296c --- /dev/null +++ b/data/test01.qc @@ -0,0 +1,35 @@ +a+b; + +a-b; + +a*b; + +a/b; + +a%b; + +a^b; + +a&b; + +a|b; + +a<>b; + +a&&b; + +a||b; + +a==b; + +a!=b; + +ab; + +a<=b; + +a>=b; diff --git a/data/test02.qc b/data/test02.qc new file mode 100644 index 0000000..002f028 --- /dev/null +++ b/data/test02.qc @@ -0,0 +1,21 @@ +a=b; + +a+=b; + +a-=b; + +a*=b; + +a/=b; + +a%=b; + +a^=b; + +a<<=b; + +a>>=b; + +a|=b; + +a&=b; diff --git a/data/test03.qc b/data/test03.qc new file mode 100644 index 0000000..24f248a --- /dev/null +++ b/data/test03.qc @@ -0,0 +1,25 @@ +[1,2,3]; + +arr[idx]; + +arr[start:end]; + +arr[start:]; + +arr[:end]; + +arr[start:end:step]; + +arr[start::]; + +arr[:end:]; + +arr[::step]; + +arr[start:end:]; + +arr[start::step]; + +arr[:end:step]; + +arr{0, 1, 2}; diff --git a/data/test04.qc b/data/test04.qc new file mode 100644 index 0000000..39f097a --- /dev/null +++ b/data/test04.qc @@ -0,0 +1,13 @@ +{"key":1}; + +{'key':1}; + +{'key1':1, 'key2':2}; + +{'key1':1, "key2":2}; + +obj.key; + +obj['key']; + +obj{'key1', 'key2'}; \ No newline at end of file diff --git a/data/test05.qc b/data/test05.qc new file mode 100644 index 0000000..52d0ede --- /dev/null +++ b/data/test05.qc @@ -0,0 +1,15 @@ +if(a){b} + +if(a){b}else{c} + +if(a){b}elif(c){d}elif(e){f}else{g} + +try{a}catch(b){c}finally{d} + +while(a){b} + +while(a){b} + +for(i=0;i<10;i++){a} + +for(i=0;i<10;i++){a} \ No newline at end of file diff --git a/data/test06.qc b/data/test06.qc new file mode 100644 index 0000000..a547363 --- /dev/null +++ b/data/test06.qc @@ -0,0 +1,15 @@ +if(a) b; + +if(a) b; else c; + +if(a) b; elif(c) d; elif(e) f; else g; + +try a; catch(b) c; finally d; + +while(a) b; + +while(a) b; + +for(i=0;i<10;i++) a; + +for(i=0;i<10;i++) a; \ No newline at end of file diff --git a/data/test07.qc b/data/test07.qc new file mode 100644 index 0000000..e57e80b --- /dev/null +++ b/data/test07.qc @@ -0,0 +1,16 @@ +(a + b != c && d || e); + +{ + a + b * c; + + a + b = c, c = d; + + a + b += c, c = a + b, d; + + {a + b * c; d + e - f, g + h << (i >> j)} + + return a + b - c * d / e % f ^ g << h >> i | j & k; +} + + + diff --git a/data/test08.qc b/data/test08.qc new file mode 100644 index 0000000..c18b077 --- /dev/null +++ b/data/test08.qc @@ -0,0 +1,11 @@ +main(); + +main(a); + +main(a, b); + +main(a+b, c); + +main(a, b, c) + d; + +a + main(a, b, c); \ No newline at end of file diff --git a/data/test09.qc b/data/test09.qc new file mode 100644 index 0000000..dca72d0 --- /dev/null +++ b/data/test09.qc @@ -0,0 +1,18 @@ +main1(){} + +main2(){return 1;} + +main3(a){return 2;} + +main4(a,b){return 3;} + +main5(a,b,c){return a+b*c;} + +main6(a,b){ +c = a + b; +return a+b*c; +} + +main7(a,b,c){ +return main7(a,b,c); +} \ No newline at end of file diff --git a/data/test10.qc b/data/test10.qc new file mode 100644 index 0000000..96a5723 --- /dev/null +++ b/data/test10.qc @@ -0,0 +1,7 @@ ++a*b-c/d; + +++a--; + +++(a--); + +b=c^=d+=e%=f>>=g||h&&i|j^k&l!=m>=n< + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef AST_HPP +#define AST_HPP + +#include +#include +#include +#include + +enum class token_kind { + eof, + open_bracket, + close_bracket, + separator, + keyword, + string, + comment, + whitespace, + integer, + floating, + special_character +}; + +struct token { + token_kind kind; + int line; + int column; + std::streamoff file_offset; + std::string word; + + virtual ~token(); + + virtual void dump(std::ostream& os, const std::string& prefix, bool is_last) + const noexcept; + + void dump(std::ostream& os) const noexcept; +}; + +using token_ptr = std::shared_ptr; + +struct ast_node { + size_t fixed_size { 1 }, full_size { 1 }; + virtual ~ast_node(); + + virtual ast_node const* first() const noexcept; + + virtual bool empty() const noexcept; + + virtual void dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full + ) const noexcept; + void dump(std::ostream& os, bool full) const noexcept; + void dump(std::ostream& os) const noexcept; + + virtual void placeholde(); +}; + +using ast_node_ptr = std::shared_ptr; + +struct token_node : ast_node { + token value; + bool empty() const noexcept override; + + void dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full + ) const noexcept override; +}; + +using token_node_ptr = std::shared_ptr; + +enum class group_kind { file, body, list, paren, command, item, key, halt }; + +struct group_node : ast_node { + size_t limit; + group_kind kind { group_kind::halt }; + std::vector nodes; + std::priority_queue> + weights; /// node_size -> node_index + + bool placeholder { false }; + void append(ast_node_ptr node); + bool empty() const noexcept override; + size_t size() const noexcept; + ast_node const* first() const noexcept override; + void dump( + std::ostream& os, const std::string& prefix, bool is_last, bool full + ) const noexcept override; + void placeholde() override; +}; + +using group_ptr = std::shared_ptr; +#endif // AST_HPP diff --git a/include/grouper.hpp b/include/grouper.hpp new file mode 100644 index 0000000..68825dc --- /dev/null +++ b/include/grouper.hpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef GROUPER_HPP +#define GROUPER_HPP + +#include "reader.hpp" + +class grouper { +public: + explicit grouper(reader& r, size_t limit = 64); + + group_ptr parse_group(group_kind kind = group_kind::file); + +private: + reader& src; + size_t limit; + + [[nodiscard]] token peek() const; + + [[nodiscard]] std::runtime_error make_error( + const std::string& message, + const std::source_location& location = std::source_location::current() + ) const; +}; + +#endif // GROUPER_HPP diff --git a/include/reader.hpp b/include/reader.hpp new file mode 100644 index 0000000..bad0dda --- /dev/null +++ b/include/reader.hpp @@ -0,0 +1,89 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef READER_HPP +#define READER_HPP + +#include +#include +#include + +#include "ast.hpp" + +class reader { +public: + explicit reader( + const std::filesystem::path& path, std::streamsize buffer_size = 4096 + ); + + explicit reader(std::string& data) noexcept; + + ~reader(); + + void next_token(token& out); + + void jump_to_position(std::streamoff position, int line, int column); + + void interrupt(); + +private: + std::ifstream ifs; + std::string buffer; + std::streamsize max_buffer_size {}; + std::streamoff file_offset {}; + int line { 0 }; + int column { 0 }; + size_t buffer_position { 0 }; + + bool is_valid() const noexcept; + + char peek_char() const noexcept; + + unsigned char peek_uchar() const noexcept; + + char get_char(); + + void advance_char(); + + void reload_buffer(); + + void read_whitespace(std::string& into); + + void read_keyword(std::string& into); + + void read_string(std::string& into); + + void read_comment(std::string& into); + + token_kind read_number(std::string& into); + + void init_token(token& t) const noexcept; + + [[nodiscard]] std::runtime_error make_error( + const std::string& message, + const std::source_location& location = std::source_location::current() + ) const; +}; + +#endif // READER_HPP \ No newline at end of file diff --git a/include/token.hpp b/include/token.hpp new file mode 100644 index 0000000..19a4b75 --- /dev/null +++ b/include/token.hpp @@ -0,0 +1,81 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TOKEN_HPP +#define TOKEN_HPP + +#include +#include +#include +#include +#include + +/** + * @brief Kinds of lexical tokens. + */ +enum class token_kind { + eof, ///< End of file or input + open_bracket, ///< One of '(', '[', '{' + close_bracket, ///< One of ')', ']', '}' + separator, ///< ',', ';' or ':' + keyword, ///< Identifier or reserved keyword + string, ///< Quoted string literal + comment, ///< Single or multiline comment + whitespace, ///< Sequence of whitespace characters + integer, ///< Integer number + floating, ///< Floating point number + special_character ///< Any other single character +}; + +/** + * @brief Describes a single lexical token extracted by the reader. + */ +struct token { + token_kind kind {}; ///< Token type + int line { 0 }; ///< Line where the token starts + int column { 0 }; ///< Column where the token starts + std::streamoff file_offset {}; ///< Position in the file + std::string word; ///< Raw text of the token + + virtual ~token(); + + /** + * @brief Dump token information. + * + * Example: + * @code + * token t; + * t.dump(std::cout); + * @endcode + */ + virtual void dump(std::ostream& os, const std::string& prefix, bool is_last) + const noexcept; + + /// Convenience wrapper around dump(os, "", true) + void dump(std::ostream& os) const noexcept; +}; + +using token_ptr = std::shared_ptr; + +#endif // TOKEN_HPP diff --git a/LICENSE b/license similarity index 100% rename from LICENSE rename to license diff --git a/logo/dlogo.svg b/logo/dlogo.svg new file mode 100644 index 0000000..7c31e7b --- /dev/null +++ b/logo/dlogo.svg @@ -0,0 +1,124 @@ + + + + +Created by potrace 1.16, written by Peter Selinger 2001-2019 + + + + + + + + + + + + + + + + + + diff --git a/logo/llogo.svg b/logo/llogo.svg new file mode 100644 index 0000000..cde8cbf --- /dev/null +++ b/logo/llogo.svg @@ -0,0 +1,124 @@ + + + + +Created by potrace 1.16, written by Peter Selinger 2001-2019 + + + + + + + + + + + + + + + + + + diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..08a9c57 --- /dev/null +++ b/readme.md @@ -0,0 +1,131 @@ +![QuasiPiler Logo Light](logo/llogo.svg#gh-light-mode-only)![QuasiPiler Logo Dark](logo/dlogo.svg#gh-dark-mode-only) +## _— the Hunchback Dragon of Compilers_ + + +[//]: # ([![version](https://img.shields.io/github/v/release/YaRiabtsev/QuasiPiler?include_prereleases)](https://github.com/YaRiabtsev/QuasiPiler/releases/latest)) +[//]: # ([![Checks](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/tests.yml)) +[//]: # ([![Docs & Coverage](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml/badge.svg)](https://github.com/YaRiabtsev/QuasiPiler/actions/workflows/html.yml)) +[![codecov](https://codecov.io/gh/YaRiabtsev/QuasiPiler/graph/badge.svg?token=MCNEJFWMDU)](https://codecov.io/gh/YaRiabtsev/QuasiPiler) +[![license](https://img.shields.io/github/license/YaRiabtsev/QuasiPiler?color=e6e6e6)](https://github.com/YaRiabtsev/QuasiPiler/blob/master/license) + +> “A one-eyed transpiler is much more incomplete than a blind transpiler, for he knows what it is that’s lacking.” +> — Victor-Marie of Gugle Inc. (1998–2017) + +This repo is my sanctuary under license — it begs mercy, not stars. I’ll bell when (or if) it works. +“Documentation and Contributing” is a friendly suggestion, not a Martin Luther pinboard. + +## Setup and Installation + +### Requirements + +* **C++20** compiler +* **cxxopts**: for command line options +* **GTest**: for unit tests +* **lcov**: for code coverage reports +* **doxygen** and **graphviz**: for generating documentation + +### Building the Application + +1. Build with CMake in Release mode: + ```bash + $ cmake -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=Release -B build -S . + $ cmake --build build + ``` +2. Run the Application: + ```bash + $ qpiler [options] + ``` + * ``: path to your QuasiCode file + +## QuasiLang Syntax + +### Basics + +- **Comments** + - Line comments begin with `//`. + - Block comments are enclosed in `/*` and `*/`. +- **Whitespace** is ignored except as a separator. +- **Identifiers** use letters, digits and underscores and may not start with a digit. +- **Literals** + - Numbers support integer and floating point forms (with optional exponent). + - Strings can use either single `'` or double `"` quotes and support common escape sequences. + +[//]: # (- **Separators and grouping**) + +[//]: # ( - `,` comma, `;` semicolon and `:` colon act as separators.) + +[//]: # ( - `()` parentheses, `[]` brackets and `{}` braces form groups.) + +[//]: # ( - Nested groups are used for lists, code blocks and expressions.) + +[//]: # () +[//]: # (### Expressions) + +[//]: # () +[//]: # (- Standard arithmetic and assignment operators are recognized: `+`, `-`, `*`, `/`, `%`, `=` and their compound forms (`+=`, `-=`, `*=`, `/=`, `%=`).) + +[//]: # (- Comparison and logical operators include `==`, `!=`, `<`, `<=`, `>`, `>=`, `&&`, `||`, `!`.) + +[//]: # (- Bitwise operators: `&`, `|`, `^`, `<<`, `>>` and their compound assignments.) + +[//]: # (- Increment and decrement operators `++` and `--` are supported in prefix and postfix form.) + +[//]: # (- Member access uses `.` and indexing uses `[expr]`. Slice syntax `[start:end:step]` is available.) + +[//]: # (- Function calls use the form `name(arg1, arg2)`.) + +### Statements and Declarations + +[//]: # (- **Variable assignment** follows `name = expression;`.) + +[//]: # (- **Function declarations** use `name(param1, param2) { ... }`.) + +[//]: # (- **Control flow**) + +[//]: # ( - Conditional statements: `if (cond) { ... }`, optional `else` or `elif` blocks.) + +[//]: # ( - Loops: `while (cond) { ... }` and `for(init; cond; step) { ... }`.) + +[//]: # ( - `break`, `continue`, `return` and `goto` appear as standalone keywords and may take an optional expression for `return`.) + +[//]: # ( - `try { ... } catch { ... }` for exception handling.) + +[//]: # (- **Labels** can be defined with `label_name:` and referenced via `goto label_name`.) + +### Data Structures + +[//]: # (- **Lists** use `[item1, item2, ...]`.) + +[//]: # (- **Dictionaries/objects** use `{ "key": value }`.) + +## Examples + +```qc +// todo: Example of a simple QuasiLang program +``` + +## Documentation and Contributing + +To build and run tests, enable debug mode, or generate coverage reports: + +1. **Build with Debug and Coverage:** + ```bash + $ cmake -B build CMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DCOVERAGE=ON + ``` +2. **Generate Coverage Report and HTML:** + ```bash + $ cmake --build build --target coverage + ``` + +For detailed documentation, see the [Documentation](https://yariabtsev.github.io/QuasiPiler/doc/) and for the latest +coverage report, see [Coverage](https://yariabtsev.github.io/QuasiPiler/cov/). + +## Security Policy + +Please report any security issues using GitHub's private vulnerability reporting +or by emailing [yaroslav.riabtsev@rwth-aachen.de](mailto:yaroslav.riabtsev@rwth-aachen.de). +See the [security policy](.github/SECURITY.md) for full details. + +## License + +This project is open-source and available under the MIT License. \ No newline at end of file diff --git a/src/ast.cpp b/src/ast.cpp new file mode 100644 index 0000000..09bc284 --- /dev/null +++ b/src/ast.cpp @@ -0,0 +1,142 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ast.hpp" + +token::~token() = default; + +static const char* token_kind_name(const token_kind k) noexcept { + static constexpr const char* names[] + = { "eof", "open_bracket", "close_bracket", "separator", + "keyword", "string", "comment", "whitespace", + "integer", "floating", "special_character" }; + return names[static_cast(k)]; +} + +void token::dump( + std::ostream& os, const std::string& prefix, const bool is_last +) const noexcept { + os << prefix << (is_last ? "`-" : "|-") << "Token(" << token_kind_name(kind) + << ") <" << line << ":" << column << ">(\"" << word << "\")\n"; +} + +void token::dump(std::ostream& os) const noexcept { dump(os, "", true); } + +ast_node::~ast_node() = default; + +ast_node const* ast_node::first() const noexcept { return this; } + +bool ast_node::empty() const noexcept { return true; } + +void ast_node::dump( + std::ostream& os, const std::string& prefix, const bool is_last, bool +) const noexcept { + os << prefix << (is_last ? "`-" : "|-") << "Null\n"; +} + +void ast_node::dump(std::ostream& os, const bool full) const noexcept { + dump(os, "", true, full); +} + +void ast_node::dump(std::ostream& os) const noexcept { dump(os, true); } + +void ast_node::placeholde() { + throw std::runtime_error( + "cannot placeholde a base ast_node, use derived classes" + ); +} + +bool token_node::empty() const noexcept { return false; } + +void token_node::dump( + std::ostream& os, const std::string& prefix, const bool is_last, bool +) const noexcept { + os << prefix << (is_last ? "`-" : "|-") << "TokenNode\n"; + value.dump(os, prefix + (is_last ? " " : "| "), true); +} + +static const char* group_kind_name(const group_kind k) noexcept { + static constexpr const char* names[] + = { "file", "body", "list", "paren", "command", "item", "key", "halt" }; + return names[static_cast(k)]; +} + +void group_node::append(ast_node_ptr node) { + fixed_size += node->fixed_size; + full_size += node->full_size; + if (node->fixed_size > 1) { + weights.emplace(node->fixed_size, size()); + } + nodes.push_back(std::move(node)); + while (!weights.empty() && full_size > limit) { + auto [weight, index] = weights.top(); + weights.pop(); + fixed_size += 1 - weight; + nodes[index]->placeholde(); + } + if (full_size > limit) { + throw std::runtime_error("limit is too small for group node"); + } +} + +bool group_node::empty() const noexcept { return size() == 0; } + +size_t group_node::size() const noexcept { return nodes.size(); } + +ast_node const* group_node::first() const noexcept { + if (size() == 1) { + return nodes[0]->first(); + } + return ast_node::first(); +} + +void group_node::dump( + std::ostream& os, const std::string& prefix, const bool is_last, + const bool full +) const noexcept { + if (kind != group_kind::file) { + os << prefix << (is_last ? "`-" : "|-"); + } + os << "Group(" << group_kind_name(kind) << ")"; + if (placeholder) { + os << " "; + if (full) { + // extract squeezed + } + } + if (!full) { + os << " <" << fixed_size << "/" << full_size << " nested nodes>"; + } + os << "\n"; + const std::string child_prefix + = prefix + (kind != group_kind::file ? (is_last ? " " : "| ") : ""); + for (size_t i = 0; i < nodes.size(); ++i) { + nodes[i]->dump(os, child_prefix, i + 1 == nodes.size(), full); + } +} + +void group_node::placeholde() { + placeholder = true; + fixed_size = 1; +} diff --git a/src/grouper.cpp b/src/grouper.cpp new file mode 100644 index 0000000..6d45956 --- /dev/null +++ b/src/grouper.cpp @@ -0,0 +1,154 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "grouper.hpp" + +grouper::grouper(reader& r, const size_t limit) + : src(r) + , limit(limit) { + if (limit < 2) { + throw make_error("minimum limit is 2"); + } +} + +group_ptr grouper::parse_group(const group_kind kind) { + auto group = std::make_shared(); + group->limit = limit; + auto top = std::make_shared(); + top->limit = limit; + while (true) { + const token current = peek(); + if (current.kind == token_kind::separator) { + if (current.word == ":") { + top->kind = group_kind::key; + } else if (current.word == ",") { + top->kind = group_kind::item; + } else if (current.word == ";") { + top->kind = group_kind::command; + } else { + throw make_error( + "unexpected separator: " + current.word + ); // todo: top->dump() + } + if (top->kind == kind) { + if (group->empty()) { + return top; + } + try { + group->append(top); + } catch (const std::runtime_error&) { + throw make_error("group limit exceeded"); + } + throw make_error("wrong group kind"); // todo: group->dump() + } + try { + group->append(top); + } catch (const std::runtime_error&) { + throw make_error("group limit exceeded"); + } + top = std::make_shared(); + top->limit = limit; + } else if (current.kind == token_kind::open_bracket) { + group_kind sub_kind; + if (current.word == "{") { + sub_kind = group_kind::body; + } else if (current.word == "[") { + sub_kind = group_kind::list; + } else if (current.word == "(") { + sub_kind = group_kind::paren; + } else { + throw make_error( + "unexpected open bracket: " + current.word + ); // todo: top->dump() + } + try { + top->append(parse_group(sub_kind)); + } catch (const std::runtime_error&) { + throw make_error("group limit exceeded"); + } + } else if (current.kind == token_kind::close_bracket + || current.kind == token_kind::eof) { + try { + group->append(top); + } catch (const std::runtime_error&) { + throw make_error("group limit exceeded"); + } + top = std::make_shared(); + top->limit = limit; + if (current.kind == token_kind::eof) { + group->kind = group_kind::file; + } else if (current.word == "}") { + group->kind = group_kind::body; + } else if (current.word == "]") { + group->kind = group_kind::list; + } else if (current.word == ")") { + group->kind = group_kind::paren; + } else { + throw make_error( + "unexpected close bracket: " + current.word + ); // todo: group->dump() + } + if (group->kind == kind) { + return group; + } + throw make_error("wrong group kind"); + } else { + auto tk = std::make_shared(); + tk->value = current; + try { + top->append(tk); + } catch (const std::runtime_error&) { + throw make_error("group limit exceeded"); + } + } + } +} + +token grouper::peek() const { + token current; + do { + src.next_token(current); + } while (current.kind == token_kind::whitespace + || current.kind == token_kind::comment); + return current; +} + +std::runtime_error grouper::make_error( + const std::string& message, const std::source_location& location +) const { + std::ostringstream oss; + oss << "[Grouper-Error] " << message << ". " << std::endl; + // oss << "during parsing of group:" << std::endl; + // obj->dump(oss, "\t", true, false); + // oss << "\n"; + oss << "in file: " << location.file_name() << '(' << location.line() << ':' + << location.column() << ") `" << location.function_name() << "`" + << std::endl; + try { + src.interrupt(); + } catch (const std::runtime_error& e) { + oss << e.what(); + } + return std::runtime_error(oss.str()); +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..53aee34 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "reader.hpp" + +int main(const int argc, char* argv[]) { + std::filesystem::path path; + try { + cxxopts::Options options( + "QuasiPiler", "the Hunchback Dragon of Compilers" + ); + options + .add_options()("i,input", "Input file", cxxopts::value(path))( + "h,help", "show help" + ); + options.parse_positional({ "input" }); + if (const auto result = options.parse(argc, argv); + result.count("help")) { + std::cout << options.help() << "\n"; + return 0; + } + if (path.empty() || !exists(path) || !is_regular_file(path)) { + std::cerr << "input file is required.\n"; + return 1; + } + } catch (const cxxopts::exceptions::exception& e) { + std::cerr << "error parsing options: " << e.what() << "\n"; + return 1; + } + + reader r(path); + + return 0; +} \ No newline at end of file diff --git a/src/reader.cpp b/src/reader.cpp new file mode 100644 index 0000000..dc39008 --- /dev/null +++ b/src/reader.cpp @@ -0,0 +1,382 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "reader.hpp" + +#include + +reader::reader( + const std::filesystem::path& path, const std::streamsize buffer_size +) + : max_buffer_size(buffer_size) { + ifs.open(path, std::ios::in | std::ios::binary); + if (!ifs.is_open()) { + throw std::invalid_argument("cannot open file: " + path.string()); + } + ifs.seekg(0, std::ios::beg); + file_offset = ifs.tellg(); + buffer.resize(static_cast(max_buffer_size)); + reload_buffer(); +} + +reader::reader(std::string& data) noexcept + : buffer(std::move(data)) { + if (!buffer.empty()) { + line = 0; + column = 0; + } +} + +reader::~reader() { + if (ifs.is_open()) { + ifs.close(); + } +} + +bool reader::is_valid() const noexcept { + return !buffer.empty() && buffer_position < buffer.size(); +} + +char reader::peek_char() const noexcept { return buffer[buffer_position]; } + +unsigned char reader::peek_uchar() const noexcept { + return static_cast(peek_char()); +} + +char reader::get_char() { + const char current_char = peek_char(); + advance_char(); + return current_char; +} + +void reader::advance_char() { + assert(!buffer.empty()); + ++buffer_position; + ++column; + if (buffer_position >= buffer.size()) { + reload_buffer(); + } +} + +void reader::reload_buffer() { + if (!ifs.is_open() || ifs.eof()) { + return; + } + file_offset = ifs.tellg(); + ifs.read(&buffer[0], max_buffer_size); + const auto got = ifs.gcount(); + buffer.resize(static_cast(got)); + buffer_position = 0; +} + +void reader::read_whitespace(std::string& into) { + into.clear(); + while (is_valid() && std::isspace(peek_uchar())) { + if (peek_char() == '\n') { + ++line; + column = -1; + } + into += get_char(); + } +} + +void reader::read_keyword(std::string& into) { + into.clear(); + do { + into += get_char(); + } while (is_valid() && (std::isalnum(peek_uchar()) || peek_char() == '_')); +} + +void reader::read_comment(std::string& into) { + assert(is_valid() && into.size() == 1 && into[0] == '/'); + into += get_char(); + const bool is_multiline = into.back() == '*'; + while (is_valid()) { + const char current_char = get_char(); + if (is_multiline && current_char == '/' && into.back() == '*' + && into.size() > 2) { + into += current_char; + return; + } + into += current_char; + if (current_char == '\n') { + ++line; + column = -1; + if (!is_multiline) { + column = 0; + break; + } + } + } + if (is_multiline) { + throw make_error("missing closing comment delimiter"); + } +} + +void reader::read_string(std::string& into) { + into.clear(); + const char quote = get_char(); + bool escaped = false; + while (is_valid()) { + const char current_char = peek_char(); + if (escaped) { + switch (current_char) { + case '"': + into += '"'; + break; + case '\'': + into += '\''; + break; + case '\\': + into += '\\'; + break; + case '/': + into += '/'; + break; + case 'b': + into += '\b'; + break; + case 'f': + into += '\f'; + break; + case 'n': + into += '\n'; + break; + case 'r': + into += '\r'; + break; + case 't': + into += '\t'; + break; + case 'u': { + std::string hex; + for (int i = 0; i < 4; ++i) { + advance_char(); + if (!is_valid() || !std::isxdigit(peek_uchar())) { + throw make_error("invalid Unicode escape"); + } + hex += peek_char(); + } + // const int codepoint = std::stoi(hex, nullptr, 16); + // std::wstring_convert, char32_t> + // converter; + // into += converter.to_bytes(static_cast(codepoint)); + const auto codepoint + = static_cast(std::stoul(hex, nullptr, 16)); + auto encode = [](const char32_t cp) { + std::string out; + if (cp <= 0x7F) { + out += static_cast(cp); + } else if (cp <= 0x7FF) { + out += static_cast(0xC0 | (cp >> 6)); + out += static_cast(0x80 | (cp & 0x3F)); + } else if (cp <= 0xFFFF) { + out += static_cast(0xE0 | (cp >> 12)); + out += static_cast(0x80 | ((cp >> 6) & 0x3F)); + out += static_cast(0x80 | (cp & 0x3F)); + } else { + out += static_cast(0xF0 | (cp >> 18)); + out += static_cast(0x80 | ((cp >> 12) & 0x3F)); + out += static_cast(0x80 | ((cp >> 6) & 0x3F)); + out += static_cast(0x80 | (cp & 0x3F)); + } + return out; + }; + into += encode(codepoint); + break; + } + default: + throw make_error("invalid escape sequence"); + } + escaped = false; + } else if (current_char == '\\') { + escaped = true; + } else if (current_char == quote) { + break; + } else { + into += current_char; + } + advance_char(); + } + if (!is_valid() || peek_char() != quote) { + throw make_error("missing closing quote"); + } + advance_char(); +} + +token_kind reader::read_number(std::string& into) { + into.clear(); + bool is_float = false; + if (is_valid() && peek_char() == '0') { + into += get_char(); + if (is_valid() && std::isdigit(peek_uchar())) { + throw make_error("leading zeros not allowed"); + } + } else if (is_valid() && std::isdigit(peek_uchar())) { + do { + into += get_char(); + } while (is_valid() && std::isdigit(peek_uchar())); + } else { + throw make_error("expected digit"); + } + + if (is_valid() && peek_char() == '.') { + is_float = true; + into += get_char(); + if (!is_valid() || !std::isdigit(peek_uchar())) { + throw make_error("digit expected after decimal"); + } + while (is_valid() && std::isdigit(peek_uchar())) { + into += get_char(); + } + } + + if (is_valid() && (peek_char() == 'e' || peek_char() == 'E')) { + is_float = true; + into += get_char(); + if (is_valid() && (peek_char() == '+' || peek_char() == '-')) { + into += get_char(); + } + if (!is_valid() || !std::isdigit(peek_uchar())) { + throw make_error("digit expected after exponent"); + } + while (is_valid() && std::isdigit(peek_uchar())) { + into += get_char(); + } + } + return is_float ? token_kind::floating : token_kind::integer; +} + +void reader::init_token(token& t) const noexcept { + t.word.clear(); + t.line = line; + t.column = column; + t.file_offset = file_offset + static_cast(buffer_position); +} + +std::runtime_error reader::make_error( + const std::string& message, const std::source_location& location +) const { + std::ostringstream oss; + oss << "[Reader-Error] " << message << ". "; +#ifndef NDEBUG + if (!ifs.is_open()) { + oss << "no file open. "; + } + if (!is_valid()) { + oss << "position is out of range. line: " << (line + 1) + << ", column: " << (column + 1) << " exceeds available input. "; + } else { + const char current_char = peek_char(); + oss << "character '" << current_char + << "' (ASCII: " << static_cast(current_char) + << ") was found at line " << (line + 1) << ", column " + << (column + 1) << ". "; + } + oss << "in file: " << location.file_name() << '(' << location.line() << ':' + << location.column() << ") `" << location.function_name() << "`"; + oss << std::endl << buffer; +#endif + return std::runtime_error(oss.str()); +} + +void reader::next_token(token& out) { + init_token(out); + out.kind = token_kind::special_character; + + if (!is_valid()) { + out.kind = token_kind::eof; + out.word.clear(); + return; + } + switch (const char current_char = peek_char()) { + case '(': + case '[': + case '{': + out.kind = token_kind::open_bracket; + out.word = std::string(1, get_char()); + break; + case ')': + case ']': + case '}': + out.kind = token_kind::close_bracket; + out.word = std::string(1, get_char()); + break; + case ',': + case ';': + case ':': + out.kind = token_kind::separator; + out.word = std::string(1, get_char()); + break; + case '/': + out.word = std::string(1, get_char()); + if (is_valid() && (peek_char() == '/' || peek_char() == '*')) { + read_comment(out.word); + out.kind = token_kind::comment; + } + break; + default: + if (std::isalpha(static_cast(current_char)) + || current_char == '_') { + read_keyword(out.word); + out.kind = token_kind::keyword; + } else if (std::isdigit(static_cast(current_char))) { + out.kind = read_number(out.word); + } else if (current_char == '"' || current_char == '\'') { + read_string(out.word); + out.kind = token_kind::string; + } else if (std::isspace(static_cast(current_char))) { + read_whitespace(out.word); + out.kind = token_kind::whitespace; + } else { + out.word = std::string(1, get_char()); + } + } +} + +void reader::jump_to_position( + const std::streamoff position, const int line, const int column +) { + if (position < 0) { + throw make_error("position is out of range"); + } + if (!ifs.is_open()) { + buffer_position = static_cast(position); + if (buffer_position > buffer.size()) { + throw make_error("position is out of range"); + } + } else { + ifs.seekg(position, std::ios::beg); + reload_buffer(); + } + this->line = line; + this->column = column; +} + +void reader::interrupt() { + if (ifs.is_open() && ifs.eof()) { + return; + } + throw make_error("interrupted"); +} diff --git a/tests/ast_tests.cpp b/tests/ast_tests.cpp new file mode 100644 index 0000000..190fb69 --- /dev/null +++ b/tests/ast_tests.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "grouper.hpp" +#include + +TEST(AstDump, ExampleAST) { + // std::ostringstream path_out; + // path_out << "test_data/sum.md"; + // std::ofstream out(path_out.str()); + + for (int i = 0; i < 12; ++i) { + try { + std::stringstream idx; + idx << std::setfill('0') << std::setw(2) << i; + std::ostringstream path_in; + path_in << "test_data/test" << idx.str() << ".qc"; + reader r(path_in.str()); + grouper g { r }; + auto res = g.parse_group(); + // out << "
test" << idx.str() + // << ".qc:\n\n ```\n"; + + std::ostringstream path_out; + path_out << "test_data/test" << idx.str() << ".dump"; + std::ofstream out(path_out.str()); + res->dump(out, "", true, true); + // out << "```\n
\n\n"; + } catch (const std::runtime_error& e) { + std::cout << "Error processing test case " << i << ": " << e.what() + << "\n\n"; + } + } +} diff --git a/tests/grouper_tests.cpp b/tests/grouper_tests.cpp new file mode 100644 index 0000000..78c5bfc --- /dev/null +++ b/tests/grouper_tests.cpp @@ -0,0 +1,112 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ast.hpp" +#include "grouper.hpp" +#include + +TEST(GrouperTest, ParsesSimpleBody) { + std::string input = "{a;b}"; + reader r { input }; + grouper g { r }; + + auto res = g.parse_group(); + ASSERT_EQ(res->kind, group_kind::file); + ASSERT_EQ(res->size(), 1u); + + auto* halt = dynamic_cast(res->nodes[0].get()); + ASSERT_NE(halt, nullptr); + EXPECT_EQ(halt->kind, group_kind::halt); + ASSERT_EQ(halt->size(), 1u); + + auto* body = dynamic_cast(halt->nodes[0].get()); + ASSERT_NE(body, nullptr); + EXPECT_EQ(body->kind, group_kind::body); + ASSERT_EQ(body->size(), 2u); + + auto* cmd = dynamic_cast(body->nodes[0].get()); + ASSERT_NE(cmd, nullptr); + EXPECT_EQ(cmd->kind, group_kind::command); + auto* a = dynamic_cast(cmd->nodes[0].get()); + ASSERT_NE(a, nullptr); + EXPECT_EQ(a->value.word, "a"); + + auto* trailing = dynamic_cast(body->nodes[1].get()); + ASSERT_NE(trailing, nullptr); + auto* b = dynamic_cast(trailing->nodes[0].get()); + ASSERT_NE(b, nullptr); + EXPECT_EQ(b->value.word, "b"); +} + +TEST(GrouperTest, ParsesNestedListBody) { + std::string input = "[a,{b;c}]"; + reader r { input }; + grouper g { r }; + + auto res = g.parse_group(); + ASSERT_EQ(res->kind, group_kind::file); + ASSERT_EQ(res->size(), 1u); + + auto* list = dynamic_cast(res->first()); + ASSERT_NE(list, nullptr); + EXPECT_EQ(list->kind, group_kind::list); + ASSERT_EQ(list->size(), 2u); + + auto* item = dynamic_cast(list->nodes[0].get()); + ASSERT_NE(item, nullptr); + EXPECT_EQ(item->kind, group_kind::item); + auto* a = dynamic_cast(item->nodes[0].get()); + ASSERT_NE(a, nullptr); + EXPECT_EQ(a->value.word, "a"); + + auto* next = dynamic_cast(list->nodes[1].get()); + ASSERT_NE(next, nullptr); + auto* body = dynamic_cast(next->nodes[0].get()); + ASSERT_NE(body, nullptr); + EXPECT_EQ(body->kind, group_kind::body); +} + +TEST(GrouperTest, MissingClosingThrows) { + std::string input = "[a"; + reader r { input }; + grouper g { r }; + EXPECT_THROW(g.parse_group(), std::runtime_error); +} + +TEST(GrouperTest, ConstructorEnforcesLimit) { + std::string input = "a"; + reader r { input }; + EXPECT_THROW(grouper(r, 1);, std::runtime_error); +} + +TEST(GrouperTest, LimitTooSmallThrows) { + for (auto [str, lim] : std::vector> { + { "{a;[b,c,d];e}", 14 }, + { "a,b,c,d,e,f", 12 }, + { "{[a,a,a,a,a],[b,b,b,b]}", 24 } }) { + reader r { str }; + grouper g { r, lim }; + EXPECT_THROW(g.parse_group(), std::runtime_error); + } +} diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 0000000..f7caee1 --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/tests/reader_tests.cpp b/tests/reader_tests.cpp new file mode 100644 index 0000000..0c8919e --- /dev/null +++ b/tests/reader_tests.cpp @@ -0,0 +1,182 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2025 Yaroslav Riabtsev + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "reader.hpp" + +#include + +TEST(ReaderTest, Constructor) { + std::string str; + reader r { str }; +} + +TEST(ReaderTest, GoodWhiteSpaceToken) { + token t; + for (std::string str : { " ", "\t\t\t", " ", "\n\n\n", "\r\r\r", + " \t \n\t\t\t\r " }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::whitespace); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } +} + +TEST(ReaderTest, GoodIntegerToken) { + token t; + for (std::string& str : + std::vector { "0", "1", "73", "2147483647", "1234567890", + std::string(1024, '9') }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::integer); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } +} + +TEST(ReaderTest, GoodFloatingToken) { + token t; + for (std::string& str : std::vector { + "0.0", "0.0000123456789", "2.71828", "3.141592", "36.6", + "1234567890.0987654321", std::string(1022, '9') + ".0", + "0." + std::string(1022, '9'), + std::string(512, '9') + "." + std::string(511, '9') }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::floating); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } + for (std::string& str : std::vector { + "0e123", "1E456", "73e+789", "168E+012", "15e-345", "42E-678", + "1234567890.0987654321", std::string(1022, '9') + ".0", + "0." + std::string(1022, '9'), + std::string(512, '9') + "." + std::string(511, '9') }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::floating); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } + for (std::string& str : std::vector { + "0.1e123", "1.2E456", "73.84e+789", "168.861E+012", "15.25e-345", + "42.42E-678", "1234567890.0987654321", + std::string(1022, '9') + ".0", "0." + std::string(1022, '9'), + std::string(512, '9') + "." + std::string(511, '9') }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::floating); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } +} + +TEST(ReaderTest, BadIntegerToken) { + std::string str = "0123"; + reader r { str }; + token t; + EXPECT_THROW(r.next_token(t), std::runtime_error); +} + +TEST(ReaderTest, BadFloatingToken) { + token t; + for (std::string& str : + std::vector { "123.", "123e", "123E", "123e+", "123e-", + "123E+", "123E-", "123eE", "123Ee" }) { + reader r { str }; + EXPECT_THROW(r.next_token(t), std::runtime_error); + } +} + +TEST(ReaderTest, KeyWordToken) { + token t; + for (std::string& str : std::vector { + "_abc123ABC_123cbaCBA___", "abc123ABC_123cbaCBA___", "keyword", + "a", "b1", "c42", "verybadvareabelename" }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::keyword); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } +} + +TEST(ReaderTest, StringToken) { + token t; + for (std::string& str : std::vector { + "\"Hello, world!\"", "'Hello, world!'", + "'Line 1\nLine2\tTabbed\rCarriage\bBackspace\fFormFeed'", + R"('Special !@#$%^&*()_+-=[]{};:\"\'\\|,<.>/?`~')", + R"('All ASCII chars:!"\'#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~')", + "\"Non-ASCII: üñîçødé, 中文, русский, العربية\"", + R"("The quick brown fox jumps over the lazy dog")", + R"("EXPECT_EQ(result->to_string(), \\\"EXPECT_EQ(result->to_string(),")", + R"("\\\\\\\"\\\\\\\");\\\"); isn't it a good test string?")", + R"("C:\\\\Projects\\\\JSONTest\\\\result.json")", + R"("First line\nSecond line\rBackspace\b happens here\nTabbed\/")", + R"("line:\tTabbed\fEnd of string")", + R"("\\/First line\\nSecond line\\rBackspace\\b happens here\\nTabbed")", + R"("line:\\tTabbed\\fEnd of string\\\"")", + R"("I hate Emoji! 🤣🤫🔥\u1234")" }) { + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::string); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } +} + +TEST(ReaderTest, CommentToken) { + token t; + std::string multiline = "/*"; + for (std::string& str : std::vector { + "// \"Hello, world!\"", "// 'Hello, world!'", + "//'Line 1\\Line2\tTabbed\rCarriage\bBackspace\fFormFeed'", + R"(// 'Special !@#$%^&*()_+-=[]{};:\"\'\\|,<.>/?`~')", + R"(//'All ASCII chars:!"\'#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~')", + "//Non-ASCII: üñîçødé, 中文, русский, العربية\"", + R"(//"The quick brown fox jumps over the lazy dog")", + R"(//"EXPECT_EQ(result->to_string(), \\\"EXPECT_EQ(result->to_string(),")", + R"(//\\\\\\\"\\\\\\\");\\\"); isn't it a good test string?")", + R"(// C:\\\\Projects\\\\JSONTest\\\\result.json")", + R"(///////First line\nSecond line\rBackspace\b happens here\nTabbed\/")", + R"(//line:\tTabbed\fEnd of string")", + R"(//|\\/First line\\nSecond line\\rBackspace\\b happens here\\nTabbed")", + R"(///line:\\tTabbed\\fEnd of string\\\"")", + R"(//I hate Emoji! 🤣🤫🔥\u1234")" }) { + multiline += str + "\n"; + reader r { str }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::comment); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); + } + multiline += "*/"; + reader r { multiline }; + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::comment); + r.next_token(t); + EXPECT_EQ(t.kind, token_kind::eof); +}