diff --git a/AGENTS.md b/AGENTS.md index 3926f490..287a8cd4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -54,7 +54,7 @@ abap-ast (zero deps, typed AST + printer for ABAP source) openai-codegen ──► abap-ast (OpenAPI → ABAP client codegen) ``` -Foundation packages (no `@abapify` deps): `ts-xsd`, `speci`, `logger`, `acds`. +Foundation packages (no `@abapify` deps): `ts-xsd`, `speci`, `logger`, `acds`, `aclass`, `abap-ast`. ## MCP ↔ CLI Coupling (intentional) @@ -159,6 +159,7 @@ Each package has its own `AGENTS.md` with detailed conventions: - [`packages/abap-ast/AGENTS.md`](packages/abap-ast/AGENTS.md) — zero-dependency AST + deterministic printer for ABAP; foundation for code generation. - [`packages/adk/AGENTS.md`](packages/adk/AGENTS.md) — ABAP Development Kit, object CRUD, save/lock flow, ETag management - [`packages/acds/AGENTS.md`](packages/acds/AGENTS.md) — ABAP CDS parser, tokenizer, AST types +- [`packages/aclass/AGENTS.md`](packages/aclass/AGENTS.md) — ABAP OO parser (CLAS/INTF), Chevrotain lexer + typed AST - [`packages/adt-cli/AGENTS.md`](packages/adt-cli/AGENTS.md) — CLI commands, service pattern, client initialization - [`packages/adt-client/AGENTS.md`](packages/adt-client/AGENTS.md) — Contract-driven REST client, schema conventions, type inference - [`packages/adt-contracts/AGENTS.md`](packages/adt-contracts/AGENTS.md) — Contract testing framework, schema integration diff --git a/bun.lock b/bun.lock index 66b9f04b..0d55661f 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "abapify", @@ -81,18 +82,25 @@ }, "packages/abap-ast": { "name": "@abapify/abap-ast", - "version": "0.3.1", + "version": "0.3.6", }, "packages/acds": { "name": "@abapify/acds", - "version": "0.3.1", + "version": "0.3.6", + "dependencies": { + "chevrotain": "^11.0.0", + }, + }, + "packages/aclass": { + "name": "@abapify/aclass", + "version": "0.1.0", "dependencies": { "chevrotain": "^11.0.0", }, }, "packages/adk": { "name": "@abapify/adk", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-client": "workspace:*", "@abapify/adt-locks": "workspace:*", @@ -101,7 +109,7 @@ }, "packages/adt-atc": { "name": "@abapify/adt-atc", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-plugin": "workspace:*", "chalk": "^5.3.0", @@ -109,7 +117,7 @@ }, "packages/adt-aunit": { "name": "@abapify/adt-aunit", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-contracts": "workspace:*", "@abapify/adt-plugin": "workspace:*", @@ -119,7 +127,7 @@ }, "packages/adt-auth": { "name": "@abapify/adt-auth", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/logger": "workspace:*", "proxy-agent": "^6.4.0", @@ -127,7 +135,7 @@ }, "packages/adt-cli": { "name": "@abapify/adt-cli", - "version": "0.3.1", + "version": "0.3.6", "bin": { "adt": "./dist/bin/adt.mjs", }, @@ -169,7 +177,7 @@ }, "packages/adt-client": { "name": "@abapify/adt-client", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-contracts": "workspace:*", "@abapify/adt-schemas": "workspace:*", @@ -178,7 +186,7 @@ }, "packages/adt-codegen": { "name": "@abapify/adt-codegen", - "version": "0.3.1", + "version": "0.3.6", "bin": { "adt-codegen": "./dist/cli.mjs", }, @@ -192,11 +200,11 @@ }, "packages/adt-config": { "name": "@abapify/adt-config", - "version": "0.3.1", + "version": "0.3.6", }, "packages/adt-contracts": { "name": "@abapify/adt-contracts", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-schemas": "workspace:*", "@abapify/speci": "workspace:*", @@ -207,7 +215,7 @@ }, "packages/adt-diff": { "name": "@abapify/adt-diff", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adk": "workspace:*", "@abapify/adt-contracts": "workspace:*", @@ -223,7 +231,7 @@ }, "packages/adt-export": { "name": "@abapify/adt-export", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adk": "workspace:*", "@abapify/adt-locks": "workspace:*", @@ -236,18 +244,18 @@ }, "packages/adt-fixtures": { "name": "@abapify/adt-fixtures", - "version": "0.3.1", + "version": "0.3.6", }, "packages/adt-locks": { "name": "@abapify/adt-locks", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-client": "workspace:*", }, }, "packages/adt-mcp": { "name": "@abapify/adt-mcp", - "version": "0.3.1", + "version": "0.3.6", "bin": { "adt-mcp": "./dist/bin/adt-mcp.mjs", "adt-mcp-http": "./dist/bin/adt-mcp-http.mjs", @@ -271,7 +279,7 @@ }, "packages/adt-playwright": { "name": "@abapify/adt-playwright", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-auth": "workspace:*", "@abapify/adt-config": "workspace:*", @@ -284,14 +292,14 @@ }, "packages/adt-plugin": { "name": "@abapify/adt-plugin", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adk": "workspace:*", }, }, "packages/adt-plugin-abapgit": { "name": "@abapify/adt-plugin-abapgit", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/acds": "workspace:*", "@abapify/adk": "workspace:*", @@ -303,7 +311,7 @@ }, "packages/adt-plugin-gcts": { "name": "@abapify/adt-plugin-gcts", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adk": "workspace:*", "@abapify/adt-plugin": "workspace:*", @@ -311,7 +319,7 @@ }, "packages/adt-plugin-gcts-cli": { "name": "@abapify/adt-plugin-gcts-cli", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-contracts": "workspace:*", "@abapify/adt-plugin": "workspace:*", @@ -320,7 +328,7 @@ }, "packages/adt-puppeteer": { "name": "@abapify/adt-puppeteer", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-auth": "workspace:*", "@abapify/adt-config": "workspace:*", @@ -333,11 +341,11 @@ }, "packages/adt-rfc": { "name": "@abapify/adt-rfc", - "version": "0.3.1", + "version": "0.3.6", }, "packages/adt-schemas": { "name": "@abapify/adt-schemas", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/ts-xsd": "workspace:*", "zod": "^3.24.0 || ^4.0.0", @@ -348,7 +356,7 @@ }, "packages/adt-tui": { "name": "@abapify/adt-tui", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-contracts": "workspace:*", "fast-xml-parser": "^5.3.1", @@ -367,14 +375,14 @@ }, "packages/asjson-parser": { "name": "@abapify/asjson-parser", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "jsonc-eslint-parser": "^2.1.0", }, }, "packages/browser-auth": { "name": "@abapify/browser-auth", - "version": "0.3.1", + "version": "0.3.6", "dependencies": { "@abapify/adt-config": "workspace:*", }, @@ -384,32 +392,33 @@ }, "packages/logger": { "name": "@abapify/logger", - "version": "0.3.1", + "version": "0.3.6", }, "packages/openai-codegen": { "name": "@abapify/openai-codegen", - "version": "0.3.1", + "version": "0.3.6", "bin": { "openai-codegen": "./dist/cli.mjs", }, "dependencies": { - "@abapify/abap-ast": "workspace:*", + "@abapify/abap-ast": "0.3.6", "@apidevtools/swagger-parser": "^10.1.0", "commander": "^11.1.0", "fast-xml-parser": "^5.5.0", "yaml": "^2.5.0", }, "devDependencies": { + "@abapify/aclass": "workspace:*", "@abaplint/core": "^2.118.12", }, }, "packages/speci": { "name": "@abapify/speci", - "version": "0.3.1", + "version": "0.3.6", }, "packages/ts-xsd": { "name": "@abapify/ts-xsd", - "version": "0.3.1", + "version": "0.3.6", "bin": { "ts-xsd": "./dist/codegen/cli.mjs", }, @@ -487,6 +496,8 @@ "@abapify/acds": ["@abapify/acds@workspace:packages/acds"], + "@abapify/aclass": ["@abapify/aclass@workspace:packages/aclass"], + "@abapify/adk": ["@abapify/adk@workspace:packages/adk"], "@abapify/adt-atc": ["@abapify/adt-atc@workspace:packages/adt-atc"], @@ -4475,7 +4486,7 @@ "@abapify/openai-codegen/commander": ["commander@11.1.0", "", {}, "sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ=="], - "@abapify/openai-codegen/fast-xml-parser": ["fast-xml-parser@5.5.9", "", { "dependencies": { "fast-xml-builder": "^1.1.4", "path-expression-matcher": "^1.2.0", "strnum": "^2.2.2" }, "bin": { "fxparser": "src/cli/cli.js" } }, "sha512-jldvxr1MC6rtiZKgrFnDSvT8xuH+eJqxqOBThUVjYrxssYTo1avZLGql5l0a0BAERR01CadYzZ83kVEkbyDg+g=="], + "@abapify/openai-codegen/fast-xml-parser": ["fast-xml-parser@5.7.1", "", { "dependencies": { "@nodable/entities": "^2.1.0", "fast-xml-builder": "^1.1.5", "path-expression-matcher": "^1.5.0", "strnum": "^2.2.3" }, "bin": { "fxparser": "src/cli/cli.js" } }, "sha512-8Cc3f8GUGUULg34pBch/KGyPLglS+OFs05deyOlY7fL2MTagYPKrVQNmR1fLF/yJ9PH5ZSTd3YDF6pnmeZU+zA=="], "@abapify/ts-xsd/@xmldom/xmldom": ["@xmldom/xmldom@0.9.9", "", {}, "sha512-qycIHAucxy/LXAYIjmLmtQ8q9GPnMbnjG1KXhWm9o5sCr6pOYDATkMPiTNa6/v8eELyqOQ2FsEqeoFYmgv/gJg=="], @@ -5455,9 +5466,9 @@ "@abapify/browser-auth/@types/node/undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="], - "@abapify/openai-codegen/fast-xml-parser/path-expression-matcher": ["path-expression-matcher@1.2.0", "", {}, "sha512-DwmPWeFn+tq7TiyJ2CxezCAirXjFxvaiD03npak3cRjlP9+OjTmSy1EpIrEbh+l6JgUundniloMLDQ/6VTdhLQ=="], + "@abapify/openai-codegen/fast-xml-parser/path-expression-matcher": ["path-expression-matcher@1.5.0", "", {}, "sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ=="], - "@abapify/openai-codegen/fast-xml-parser/strnum": ["strnum@2.2.2", "", {}, "sha512-DnR90I+jtXNSTXWdwrEy9FakW7UX+qUZg28gj5fk2vxxl7uS/3bpI4fjFYVmdK9etptYBPNkpahuQnEwhwECqA=="], + "@abapify/openai-codegen/fast-xml-parser/strnum": ["strnum@2.2.3", "", {}, "sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg=="], "@abaplint/core/fast-xml-parser/path-expression-matcher": ["path-expression-matcher@1.5.0", "", {}, "sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ=="], diff --git a/netlify.toml b/netlify.toml index c3af57e0..2182976d 100644 --- a/netlify.toml +++ b/netlify.toml @@ -13,6 +13,13 @@ base = "website" command = "bun install && bun run build" publish = "build" + # Skip the deploy entirely when nothing under `website/` or this file + # changed since the last successful build. Without this, Netlify runs + # the full build and only at the end decides "no content change" — + # which gets reported as a red `error` state + failing GitHub commit + # status. Exit 0 from this command tells Netlify to skip the deploy + # (reported as `skipped`, not `error`). Exit 1 means "proceed". + ignore = "git diff --quiet $CACHED_COMMIT_REF $COMMIT_REF -- . ../netlify.toml" [build.environment] NODE_VERSION = "20" diff --git a/openspec/changes/add-aclass-parser/proposal.md b/openspec/changes/add-aclass-parser/proposal.md new file mode 100644 index 00000000..ce61a637 --- /dev/null +++ b/openspec/changes/add-aclass-parser/proposal.md @@ -0,0 +1,168 @@ +# Add ABAP OO parser (`@abapify/aclass`) + +## Why + +`@abapify/abap-ast` already emits typed ABAP classes and interfaces, and +`@abapify/acds` parses CDS source into a typed AST. The missing piece is the +**reverse direction for ABAP OO**: read existing `.clas.abap` / `.intf.abap` +source back into an AST so downstream tools can inspect, modify, and re-emit +it. + +Concrete drivers: + +- **Round-trip codegen.** We want to prove (and guard via test) that + `print(parse(generated))` reproduces the bytes `@abapify/openai-codegen` + emits. Today there is no parser, so the generator's output contract is + only guarded by snapshot tests. A round-trip test closes the loop. +- **ADT read-back.** `@abapify/adt-client` can download the active source + of any class/interface. Higher-level tooling (refactoring, analysis, + diffs) needs typed access to the declarations — signatures, types, + attributes — not just string-level diffs. +- **`adt-plugin-abapgit` consumer.** The abapGit serializer already owns + handlers that decide _which_ file goes where. A typed parser lets those + handlers reason about _what_ is in the file (test class? behaviour + pool? abstract class?) from the source itself instead of relying on + XML-side `VSEOCLASS.CATEGORY` alone. + +Symmetric to `acds` for CDS sources, `aclass` for ABAP OO sources. + +## What Changes + +New package: + +- **`@abapify/aclass`** — Chevrotain-based parser that reads + `.clas.abap` and `.intf.abap` source files and yields a typed AST of + the **structural** declarations (class/interface headers, sections, + method/attribute/event/type/alias declarations, inheritance, implements). + Method _bodies_ are preserved as an opaque source slice (start/end + offset + raw text). Top-level statements (DATA/TYPES outside classes) + are NOT in scope. + +The chosen AST mirrors `@abapify/abap-ast` node shapes where they overlap +(`ClassDef`, `InterfaceDef`, `Section`, `MethodDef`, `AttributeDef`, +`TypeDef`, `MethodParam`, `TypeRef`, ABAPDoc lines). Nodes that only make +sense in parsed source (source-position slices, trailing comments, +`MethodImpl` raw body) live on `aclass` types and may be converted into +`abap-ast` nodes when that is lossless. + +New AST usages unlocked by parsing: + +- `print(parse(src))` round-trip test for every file that + `openai-codegen` emits today. +- "What is the signature of method X in this class?" without calling ADT. +- Refactoring primitives (rename method, add parameter, replace + RAISING clause) become straightforward tree transforms. + +### What this PR adds (scope of the first change) + +1. `packages/aclass/` package skeleton (package.json, tsconfig, eslint + config, tsdown config, vitest config), wired into `nx` like `acds`. +2. **Lexer** — a single `tokens.ts` file defining the ABAP token set + needed for class/interface headers: keywords (`CLASS`, `INTERFACE`, + `DEFINITION`, `IMPLEMENTATION`, `PUBLIC`/`PROTECTED`/`PRIVATE`, + `SECTION`, `METHODS`, `CLASS-METHODS`, `DATA`, `CLASS-DATA`, `TYPES`, + `CONSTANTS`, `EVENTS`, `CLASS-EVENTS`, `ALIASES`, `INTERFACES`, + `INHERITING`, `FROM`, `FOR`, `TESTING`, `RISK`, `LEVEL`, `DURATION`, + `FINAL`, `ABSTRACT`, `CREATE`, `IMPORTING`, `EXPORTING`, `CHANGING`, + `RETURNING`, `RAISING`, `TYPE`, `REF`, `TO`, `REDEFINITION`, + `OPTIONAL`, `DEFAULT`, `VALUE`, `BEGIN`, `END`, `OF`, `STANDARD`, + `SORTED`, `HASHED`, `TABLE`, `STRUCTURE`, `WITH`, `KEY`, `EMPTY`, + `UNIQUE`, `NON-UNIQUE`), identifiers, string literals, integer + literals, `ABAPDocLine` (`"!`), line comments (`"`), star comments + (`*`), end-of-statement dot, comma, colon, paren open/close, + backtick literal, hyphen, whitespace. +3. **Parser** — a statement splitter (`parser.ts`) that tokenises with + the Wave 0 lexer, splits the stream on `Dot`, classifies each + statement by its leading keyword (`classDef`, `classImpl`, + `interfaceDef`, and the member-declaration heads), and builds typed + AST nodes directly from the token slice. Method implementations + (`METHOD foo. … ENDMETHOD.`) are captured as a single _opaque_ node + whose body is the raw text between `METHOD .` and + `ENDMETHOD.`. Never throws for malformed input — unknown shapes fall + through to `RawMember` so round-trip printing stays lossless. + Trade-off vs a Chevrotain `CstParser`: we lose automatic recovery + machinery but gain drastically simpler code, direct offset access + for source spans, and an easy fallback for any shape outside MVP + grammar. +4. **Typed AST** — emitted directly by the parser. No separate visitor + stage; `parse()` returns `AbapSourceFile` with a single top-level + `definitions: AbapDefinition[]` array, each element a `ClassDef`, + `ClassImpl`, or `InterfaceDef`. +5. **Error normalization** — Chevrotain lex + parse errors mapped to a + stable `{ line, column, message, severity: 'error' }` shape. +6. **Public API** — one entry point: + `parse(source: string): { ast: AbapSourceFile; errors: ParseError[] }`. + Export all AST node types. +7. **Tests (vitest)** — + - `tests/grammar/class-header.test.ts` — every class-header variant + the printer emits: plain, `FINAL`, `ABSTRACT`, `INHERITING FROM`, + `CREATE PRIVATE`, `FOR TESTING RISK LEVEL HARMLESS DURATION SHORT`. + - `tests/grammar/sections.test.ts` — three sections in any order, + empty sections, `PROTECTED` allowed to be missing. + - `tests/grammar/methods.test.ts` — `METHODS foo.`, + `METHODS foo RETURNING VALUE(r) TYPE string RAISING zcx_bar.`, + importing/exporting/changing/returning/raising mixes, + `REDEFINITION`, `ABSTRACT`, `FOR TESTING`. + - `tests/grammar/data-types-aliases.test.ts` — `DATA`, + `CLASS-DATA`, `CONSTANTS`, `TYPES BEGIN OF / END OF`, + `ALIASES x FOR zif_y~x`, `INTERFACES zif_z.`, `EVENTS`. + - `tests/grammar/interface.test.ts` — bare interface, interface + with methods / events / aliases / types. + - `tests/fixtures/` + `tests/fixtures.test.ts` — every + `*.clas.abap` / `*.intf.abap` file under + `samples/petstore3-client/generated/abapgit/src/` parses without + errors and yields a non-empty AST with at least one top-level + definition. + - `tests/roundtrip.test.ts` — **the invariant test**. For every + fixture, `print(parse(src).ast) === src` (modulo whitespace / + trailing blank lines). Uses `@abapify/abap-ast` as the renderer + so this test also doubles as a coverage check on the printer + against real generator output. +8. **Dependencies** — `chevrotain` (peer of `acds` already), no + `@abapify/abap-ast` runtime import inside the parser (kept as + devDependency for the roundtrip test only, to avoid a circular + dep between `aclass ⇄ abap-ast`). `aclass` publishes its own AST + types; a converter `toAbapAst(node)` / `fromAbapAst(node)` may land + in a later PR. + +### Out of scope (explicitly deferred) + +- Parsing of method bodies beyond raw text capture. A downstream + expression parser can consume the captured slice later. +- Global (non-class) DATA/TYPES declarations, FORM/PERFORM, old-style + reports, function pools. Classic on-prem ABAP constructs don't + affect today's codegen output. +- SELECT / UPDATE / INSERT / MODIFY / COMMIT statement parsing. +- Macros (`DEFINE / END-OF-DEFINITION`), includes (`INCLUDE zX.`). +- CDS annotations embedded in class source. +- AST walker helpers (`walkDefinitions`, `walkMembers`, …) — will + follow once the core parser is stable and has consumers. +- Semantic validators (is the super-class visible, is the aliased + interface implemented). The parser only guarantees well-formedness + of the surface syntax it covers. + +## Impact + +- **Affected specs**: new `aclass` spec under + `openspec/changes/add-aclass-parser/specs/aclass/spec.md`, created + by this change. +- **Affected code**: + - **Added**: `packages/aclass/` package (sources, tests, configs, + README, AGENTS.md). + - **Added**: nx registration (`nx.json` / `tsconfig.base.json` + paths) — same pattern used for `acds` and `abap-ast`. + - **Not modified**: no existing package depends on `aclass` in + this change. `@abapify/openai-codegen` will pick it up in a + follow-up change that adds the round-trip test from the + generator side. + +## Out of scope for this proposal but tracked + +- `walkDefinitions` / `walkMembers` tree traversal helpers (mirror + `acds` walker). +- Converters `aclass → @abapify/abap-ast` (push) and + `@abapify/abap-ast → aclass` (pull) for lossless round-tripping + when the two ASTs diverge. +- CLI (`bunx aclass parse zcl_foo.clas.abap`) — low priority, only + useful for debugging. +- Error-recovery tests (partial parse still yields useful AST). diff --git a/openspec/changes/add-aclass-parser/specs/aclass/spec.md b/openspec/changes/add-aclass-parser/specs/aclass/spec.md new file mode 100644 index 00000000..9fa3adff --- /dev/null +++ b/openspec/changes/add-aclass-parser/specs/aclass/spec.md @@ -0,0 +1,182 @@ +# Spec: `@abapify/aclass` + +ABAP OO source → typed AST parser, symmetric to `@abapify/acds` for +CDS and `@abapify/abap-ast` for code emission. + +## Package invariants + +1. **Scope is strictly structural.** The parser recognises class / + interface _declarations_: headers, sections, member declarations + (methods, attributes, events, types, constants, aliases), + inheritance, implements lists, and the opaque bodies of method + implementations. It does NOT parse statements inside a method + body, nor does it parse top-level DATA/TYPES outside classes. +2. **Method body is an opaque source slice.** Every `MethodImpl` + node carries the raw text between its opening `METHOD .` + and its terminating `ENDMETHOD.`, plus a `{ startOffset, endOffset, +startLine, startColumn }` span. Callers that need structured + access to method bodies are expected to plug their own expression + parser on top of the slice. +3. **No dependency on `@abapify/abap-ast` at runtime.** The two + packages are siblings; depending on `abap-ast` at runtime would + couple parser evolution to emitter evolution and would create a + circular dep the moment `abap-ast` gains optional parser helpers. + Shared shapes are re-declared; a converter module may ship in a + later change. +4. **Chevrotain only.** No hand-rolled lexer, no regex-driven + tokenizer. Grammar is LL(≤4); lookahead beyond 4 is an + error-signal that the grammar needs refactoring, not that the + limit needs raising. +5. **Return value is `{ ast, errors }`** — never throw for malformed + input. Partial ASTs are acceptable when the parser can recover; + unrecoverable errors still yield a best-effort AST of whatever + was understood up to the first break. + +## Grammar coverage (MVP) + +| Topic | Rule | +| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `classDef` | `CLASS DEFINITION [PUBLIC] [FINAL] [ABSTRACT] [INHERITING FROM ] [CREATE {PUBLIC\|PROTECTED\|PRIVATE}] [FOR TESTING [RISK LEVEL ] [DURATION ]].
* ENDCLASS.` | +| `classImpl` | `CLASS IMPLEMENTATION. * ENDCLASS.` | +| `interfaceDef` | `INTERFACE [PUBLIC]. * ENDINTERFACE.` | +| `section` | `{PUBLIC\|PROTECTED\|PRIVATE} SECTION. *` | +| `classMember` | any of `methodDecl`, `attributeDecl`, `typeDecl`, `constantDecl`, `eventDecl`, `aliasDecl`, `interfaceStmt` | +| `methodDecl` | `[CLASS-]METHODS [ABSTRACT] [FINAL] [REDEFINITION] [FOR TESTING] [IMPORTING ] [EXPORTING ] [CHANGING ] [RETURNING VALUE() TYPE ] [RAISING ].` | +| `attributeDecl` | `[CLASS-]DATA TYPE .` | +| `typeDecl` | `TYPES TYPE .` or `TYPES BEGIN OF . … END OF .` or `TYPES TYPE STANDARD\|SORTED\|HASHED TABLE OF [WITH [UNIQUE\|NON-UNIQUE] KEY …].` | +| `constantDecl` | `CONSTANTS TYPE VALUE .` | +| `eventDecl` | `[CLASS-]EVENTS [EXPORTING ].` | +| `aliasDecl` | `ALIASES FOR .` | +| `interfaceStmt` | `INTERFACES .` | +| `methodImpl` | `METHOD . ENDMETHOD.` | +| `abapDoc` | `"! ` line(s) immediately above any declaration | +| `typeRef` | ``, `REF TO `, `qualified~name`, `qualifier=>name` | +| `paramList` | ` TYPE [OPTIONAL\|DEFAULT ]` repeated | +| `excList` | ` [ … ]` | + +### Deferred from MVP (tracked) + +- `METHODS ... EVENT HANDLER FOR ->.` syntax +- `ALIASES` with `IMPLEMENTED BY` clauses (rare) +- Sort-key clauses inside `HASHED TABLE` (accepted but not validated) +- Generic type expressions inside `RAISING` (multiple CX- types are + already supported; structured `RESUMABLE`/`EXCEPTIONS` clauses are + not) +- `CLASS-EVENTS` parameter types beyond the simple `EXPORTING VALUE(x)` + form. + +## AST shape + +``` +AbapSourceFile { + kind: 'AbapSourceFile', + source: string, // original input, for offset math + definitions: AbapDefinition[], +} + +ParseResult { + ast: AbapSourceFile, + errors: ParseError[], // lex + parse diagnostics +} + +AbapDefinition = + | ClassDef + | ClassImpl + | InterfaceDef + +ClassDef { + kind: 'ClassDef', + name: string, + abapDoc?: string[], + isFinal: boolean, + isAbstract: boolean, + isForTesting: boolean, + createVisibility: 'public' | 'protected' | 'private', + superClass?: string, + sections: Section[], + span: SourceSpan, +} + +Section { + kind: 'Section', + visibility: 'public' | 'protected' | 'private', + members: ClassMember[], + span: SourceSpan, +} + +ClassMember = + | MethodDecl | AttributeDecl | TypeDecl | ConstantDecl | EventDecl + | AliasDecl | InterfaceStmt + +MethodDecl { + kind: 'MethodDecl', + name: string, + abapDoc?: string[], + isClassMethod: boolean, + isAbstract: boolean, + isFinal: boolean, + isRedefinition: boolean, + isForTesting: boolean, + importing: Param[], + exporting: Param[], + changing: Param[], + returning?: Param, + raising: string[], + span: SourceSpan, +} + +MethodImpl { + kind: 'MethodImpl', + name: string, + body: string, // raw text, no trimming + bodySpan: SourceSpan, + span: SourceSpan, +} + +// … analogous shapes for AttributeDecl, TypeDecl, ConstantDecl, +// EventDecl, AliasDecl, InterfaceStmt, InterfaceDef, ClassImpl … + +SourceSpan { + startOffset: number, + endOffset: number, + startLine: number, + startColumn: number, +} + +ParseError { + severity: 'error' | 'warning', + line: number, + column: number, + message: string, +} +``` + +## Testing contract + +- **Unit tests** per grammar topic (see proposal, section "What this + PR adds"). +- **Fixtures test** parses every `*.clas.abap` / `*.intf.abap` under + `samples/petstore3-client/generated/abapgit/src/`. Must return + zero errors and a non-empty `definitions` array. +- **Roundtrip test** is the release gate. For every fixture, + `abapAstPrint(parse(src).ast) === src` after whitespace + normalisation. If the AST shapes diverge from `abap-ast`, a + converter is used in the test only. The roundtrip test depends on + `@abapify/abap-ast` as a devDependency and runs in the `aclass` + test suite. +- **Coverage target**: 80%+ statement coverage, 100% on the grammar + rules listed above. + +## Release gates (for this change) + +1. `nx test aclass` green locally and in CI. +2. Roundtrip test green for every fixture (the five petstore3 files + plus three hand-crafted fixtures covering edge cases: abstract + class, interface with events, class with nested type declarations). +3. `nx typecheck aclass` green. +4. `nx lint aclass` green. +5. `nx build aclass` produces `dist/index.mjs` + `dist/index.d.mts` + under 200 kB. +6. Root `AGENTS.md` rules-index gains a link to the new + `packages/aclass/AGENTS.md` (which in turn mirrors + `packages/acds/AGENTS.md`). diff --git a/openspec/changes/add-aclass-parser/tasks.md b/openspec/changes/add-aclass-parser/tasks.md new file mode 100644 index 00000000..7b8803cf --- /dev/null +++ b/openspec/changes/add-aclass-parser/tasks.md @@ -0,0 +1,48 @@ +# Tasks — `@abapify/aclass` + +## Wave 0 — skeleton + lexer (first PR) + +1. [x] Scaffold `packages/aclass/` (package.json, tsconfig, tsdown.config.ts, vitest.config.ts, eslint.config.js, project.json for Nx) mirroring `packages/acds/`. +2. [x] Wire `@abapify/aclass` into `tsconfig.base.json` paths and the root workspace `package.json` workspaces array. +3. [x] Write `src/tokens.ts` — Chevrotain token definitions for the ABAP OO subset listed in the spec. Keyword order must put longer keywords before their prefixes (`INTERFACES` before `INTERFACE`, `CLASS-DATA` before `CLASS`, `NON-UNIQUE` before `UNIQUE`) so the lexer doesn't split them. +4. [x] Add `src/tokens.test.ts` — tokenise a minimal `CLASS zcl_x DEFINITION PUBLIC FINAL. PUBLIC SECTION. METHODS foo. ENDCLASS.` string and assert the expected token stream. +5. [x] Add `packages/aclass/AGENTS.md` copied from `packages/acds/AGENTS.md` and adapted for ABAP. +6. [x] Add root `AGENTS.md` entry under "On Demand" rules. + +## Wave 1 — parser + visitor (second PR) + +1. [x] Write `src/parser.ts` — Chevrotain `CstParser` with rules `sourceFile`, `classDef`, `classImpl`, `interfaceDef`, `section`, `classMember`, `methodDecl`, `attributeDecl`, `typeDecl`, `constantDecl`, `eventDecl`, `aliasDecl`, `interfaceStmt`, `methodImpl`, `typeRef`, `paramList`, `param`, `excList`, `abapDocLine`. +2. [x] Write `src/ast.ts` — TypeScript interfaces for every node kind in the spec's AST shape section. +3. [x] Write `src/visitor.ts` — CST → typed AST. Preserves `SourceSpan` for every node (derive from Chevrotain token offsets). +4. [x] Write `src/errors.ts` — normalise Chevrotain lex + parse errors into `ParseError`. +5. [x] Write `src/index.ts` — export `parse(source)`, all AST interfaces, `ParseError`. +6. [x] Grammar tests: `tests/grammar/class-header.test.ts`, `tests/grammar/sections.test.ts`, `tests/grammar/methods.test.ts`, `tests/grammar/data-types-aliases.test.ts`, `tests/grammar/interface.test.ts`. + +## Wave 2 — fixtures + roundtrip (third PR) + +1. [x] Copy every `.clas.abap` / `.intf.abap` from `samples/petstore3-client/generated/abapgit/src/` into `packages/aclass/tests/fixtures/petstore3/` (as the baseline corpus; frozen copy). +2. [x] Add three hand-crafted edge-case fixtures: `abstract-class.clas.abap`, `event-interface.intf.abap`, `nested-types.clas.abap`. +3. [x] `tests/fixtures.test.ts` — every fixture parses without errors and yields ≥ 1 top-level definition. +4. [x] `tests/roundtrip.test.ts` — for every fixture, assert `abapAstPrint(parse(src).ast) === src` modulo whitespace. Use `@abapify/abap-ast` as a devDependency (not runtime). +5. [x] Add `tests/roundtrip.helpers.ts` with a `toAbapAst(node)` converter for aclass nodes that don't map one-to-one (e.g. `MethodImpl.body` → `raw({ source: body })`). +6. [x] Release gates: run `nx test aclass`, `nx lint aclass`, `nx typecheck aclass`, `nx build aclass`. All green. + +## Wave 3 — documentation + consumer wiring (fourth PR, optional) + +1. [x] Add a roundtrip CI test inside `openai-codegen` that depends on `aclass` and parses every newly-generated file (catches AST regressions). +2. [x] `packages/aclass/README.md` with usage examples. +3. [x] Update website docs (`website/site-docs/sdk/packages/aclass.md`). +4. [x] Archive this change under `openspec/archive/add-aclass-parser/` once all waves merged. + +## Definition of done + +A change lands when: + +1. Every fixture parses cleanly and roundtrip-matches byte-for-byte + (modulo whitespace) the original source. +2. `abap-ast` never needs to be changed to accommodate the parser — + if parser emits a shape `abap-ast` can't render, the AST is + extended on the parser side only. +3. `@abapify/aclass` has zero `@abapify/abap-ast` imports in + `src/**/*.ts` (runtime boundary); only `tests/**/*.ts` are + permitted to depend on `abap-ast`. diff --git a/packages/aclass/AGENTS.md b/packages/aclass/AGENTS.md new file mode 100644 index 00000000..d30baf5b --- /dev/null +++ b/packages/aclass/AGENTS.md @@ -0,0 +1,128 @@ +# aclass - AI Agent Guide + +## Package Overview + +**ABAP OO source parser** — tokenises and parses `.clas.abap` / +`.intf.abap` source files into a typed AST. Scope is strictly +structural: class / interface headers, sections, and member +declarations. Method bodies are preserved as opaque source slices; +their statements are NOT parsed in this package. + +Symmetric to `@abapify/acds` (CDS parser) and the reverse direction of +`@abapify/abap-ast` (ABAP emitter). All three packages can interlock +into a full ABAP OO round-trip: + +``` +.clas.abap / .intf.abap + → aclass lexer + parser + visitor ← this package + → typed AST + → abap-ast printer + → .clas.abap / .intf.abap (roundtrip invariant: bytes match) +``` + +## Current status + +Waves 0–3 are all shipped: + +- **Wave 0** — Chevrotain lexer (`tokens.ts`) with compound-keyword and + ABAPDoc handling. +- **Wave 1** — statement-based parser (`parser.ts`) + typed AST + (`ast.ts`). No Chevrotain CstParser / visitor — ABAP's `keyword … dot` + statement shape makes a statement splitter both simpler and more + robust than a full CST pipeline for this scope. +- **Wave 2** — fixture suite against the live petstore3 corpus and + structural roundtrip / idempotence tests. +- **Wave 3** — consumer wiring via `assertCleanParse()` (`assert.ts`), + used by `@abapify/openai-codegen` as a CI gate. + +See +[`openspec/changes/add-aclass-parser/tasks.md`](../../openspec/changes/add-aclass-parser/tasks.md) +for the detailed task record. + +## Architecture + +``` +Source string (.clas.abap / .intf.abap) + → AclassLexer (src/tokens.ts) Tokenise + → statement split (on `Dot`) Declarative-statement stream + → parse() (src/parser.ts) Typed AST + → AbapSourceFile (src/ast.ts) { definitions, source } + → assertCleanParse (src/assert.ts) Consumer CI gate +``` + +Why a statement splitter, not a Chevrotain CstParser: ABAP is +`keyword … dot` at the surface level. Declaring every production in +Chevrotain would require also declaring the expression grammar just +to parse type references — and method bodies are explicitly opaque in +this package, so the expression grammar would be wasted. The +statement splitter classifies each statement by its head keyword and +builds typed AST nodes directly from the token slice; unknown shapes +fall through to `RawMember` for lossless roundtrip. + +### Key files + +| File | Purpose | +| --------------- | ------------------------------------------------------------------------------ | +| `src/index.ts` | Public exports: `parse`, `assertCleanParse`, AST types, lexer, token types. | +| `src/tokens.ts` | Chevrotain token definitions (keywords, symbols, literals, comments). | +| `src/lex.ts` | `tokenize(source)` — thin wrapper that normalises lex errors. | +| `src/errors.ts` | Chevrotain lex + parse errors → stable `ParseError` shape. | +| `src/parser.ts` | Statement splitter + per-statement typed AST builders. | +| `src/ast.ts` | AST node interfaces. | +| `src/assert.ts` | `assertCleanParse(source, fileLabel)` + `AclassParseError` — reusable CI gate. | + +## Conventions + +### Token order matters + +1. Compound keywords (`CLASS-DATA`, `CLASS-METHODS`, `CLASS-EVENTS`, + `NON-UNIQUE`, `READ-ONLY`) MUST be declared **before** their + prefixes in `allTokens`, otherwise the lexer splits them at the + hyphen. +2. `INTERFACES` (plural, member keyword) MUST come **before** + `INTERFACE` (definition keyword). This matters because + `INTERFACES zif_foo.` would otherwise match `INTERFACE` and leave + `S zif_foo.` in the stream. +3. `ABAPDocLine` (`"! …`) MUST come **before** `LineComment` (`" …`) + so documentation lines are captured rather than skipped. +4. Multi-char symbols (`=>`, `->`, `::`) MUST come before their + single-char prefixes. +5. `Identifier` MUST be declared last among word-shaped tokens so + all keywords win via `longer_alt: Identifier`. + +### Case-insensitive + +ABAP is case-insensitive. Every keyword uses a `/i` regex; the lexer +matches `CLASS`, `Class`, and `class` as the same token. Callers who +need the original casing read it from the token's `image` property. + +### `"` prefix is a comment, NOT a string + +Single-quoted strings use `'…'`. `"…` is a line comment, and `"! …` +is ABAPDoc. This differs from most languages and is the reason +`ABAPDocLine` and `LineComment` live in the lexer, not the parser. + +### Star comments only at column 1 + +A literal `*` at column 1 starts a full-line comment (legacy +convention). Mid-line `*` is NOT a comment; the lexer uses a custom +pattern with `line_breaks: false` that rejects offsets where the +preceding character isn't a newline. + +## Commands + +```bash +bunx nx test aclass # Vitest +bunx nx build aclass # tsdown +bunx nx typecheck aclass # tsc --noEmit +bunx nx lint aclass # ESLint +``` + +## Anti-patterns to avoid + +| Don't | Do instead | +| ---------------------------------------- | --------------------------------------------------- | +| Hand-rolled string scanning | Extend `allTokens` | +| Add `then` property on AST nodes | Use `thenBody` (JS thenable clash — see `abap-ast`) | +| Depend on `@abapify/abap-ast` at runtime | Devdep only; aclass owns its AST | +| `throw` on malformed input | Return `{ ast, errors }` | diff --git a/packages/aclass/eslint.config.js b/packages/aclass/eslint.config.js new file mode 100644 index 00000000..b7f62772 --- /dev/null +++ b/packages/aclass/eslint.config.js @@ -0,0 +1,3 @@ +import baseConfig from '../../eslint.config.mjs'; + +export default [...baseConfig]; diff --git a/packages/aclass/package.json b/packages/aclass/package.json new file mode 100644 index 00000000..ba06af9c --- /dev/null +++ b/packages/aclass/package.json @@ -0,0 +1,33 @@ +{ + "name": "@abapify/aclass", + "publishConfig": { + "access": "public" + }, + "version": "0.1.0", + "description": "ABAP OO source parser — tokenizer, Chevrotain CstParser, and typed AST for CLASS / INTERFACE declarations", + "type": "module", + "types": "./dist/index.d.mts", + "exports": { + ".": "./dist/index.mjs", + "./package.json": "./package.json" + }, + "dependencies": { + "chevrotain": "^11.0.0" + }, + "files": [ + "dist", + "README.md" + ], + "keywords": [ + "abap", + "clas", + "intf", + "oo", + "parser", + "chevrotain", + "sap", + "adt" + ], + "author": "abapify", + "license": "MIT" +} diff --git a/packages/aclass/src/assert.ts b/packages/aclass/src/assert.ts new file mode 100644 index 00000000..64879b36 --- /dev/null +++ b/packages/aclass/src/assert.ts @@ -0,0 +1,48 @@ +/** + * Assertions derived from `parse()`. Useful for downstream consumers + * (generators, linters, CI gates) that want to guarantee their output + * stays inside the structural subset of ABAP OO that `aclass` + * understands, without writing the same filtering logic each time. + */ +import { parse } from './parser'; +import type { ParseError } from './errors'; + +export class AclassParseError extends Error { + public readonly source: string; + public readonly errors: readonly ParseError[]; + constructor( + fileLabel: string, + errors: readonly ParseError[], + source: string, + ) { + super( + `aclass: ${errors.length} parse error${errors.length === 1 ? '' : 's'} in ${fileLabel}:\n` + + errors + .slice(0, 10) + .map((e) => ` [${e.line}:${e.column}] ${e.message}`) + .join('\n'), + ); + this.name = 'AclassParseError'; + this.source = source; + this.errors = errors; + } +} + +/** + * Throw `AclassParseError` if the source has any lex / parse errors. + * `fileLabel` is included in the message so CI output points at the + * offending file without the caller having to format it. + * + * Intended use in test suites: + * + * ```ts + * for (const f of generated) { + * assertCleanParse(readFileSync(f, 'utf8'), f); + * } + * ``` + */ +export function assertCleanParse(source: string, fileLabel = ''): void { + const { errors } = parse(source); + if (errors.length === 0) return; + throw new AclassParseError(fileLabel, errors, source); +} diff --git a/packages/aclass/src/ast.ts b/packages/aclass/src/ast.ts new file mode 100644 index 00000000..6e8ea308 --- /dev/null +++ b/packages/aclass/src/ast.ts @@ -0,0 +1,221 @@ +/** + * Typed AST for `@abapify/aclass` — ABAP OO structural parser. + * + * Covers class / interface declarations, sections, and member + * declarations. Method-implementation bodies are preserved as opaque + * source slices; their statements are NOT modelled. + */ + +export interface SourceSpan { + readonly startOffset: number; + readonly endOffset: number; + readonly startLine: number; + readonly startColumn: number; +} + +export type Visibility = 'public' | 'protected' | 'private'; + +// Type references — `source` is the authoritative verbatim slice. + +export type TypeRef = + | BuiltinTypeRef + | NamedTypeRef + | RefToTypeRef + | TableTypeRef; + +export interface BuiltinTypeRef { + readonly kind: 'BuiltinTypeRef'; + readonly name: string; + readonly source: string; +} + +export interface NamedTypeRef { + readonly kind: 'NamedTypeRef'; + readonly name: string; + readonly source: string; +} + +export interface RefToTypeRef { + readonly kind: 'RefToTypeRef'; + readonly target: TypeRef; + readonly source: string; +} + +export type TableKind = 'standard' | 'sorted' | 'hashed'; + +export interface TableTypeRef { + readonly kind: 'TableTypeRef'; + readonly tableKind: TableKind; + readonly row: TypeRef; + readonly keyClause: string; + readonly source: string; +} + +export interface MethodParam { + readonly kind: 'MethodParam'; + readonly name: string; + readonly type: TypeRef; + readonly isValue: boolean; + readonly isOptional: boolean; + readonly defaultValue?: string; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +// Top-level shapes + +export type AbapDefinition = ClassDef | ClassImpl | InterfaceDef; + +export interface AbapSourceFile { + readonly kind: 'AbapSourceFile'; + readonly source: string; + readonly definitions: readonly AbapDefinition[]; +} + +export interface ClassDef { + readonly kind: 'ClassDef'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly isFinal: boolean; + readonly isAbstract: boolean; + readonly isForTesting: boolean; + readonly createVisibility: Visibility; + readonly superClass?: string; + readonly sections: readonly Section[]; + readonly span: SourceSpan; +} + +export interface Section { + readonly kind: 'Section'; + readonly visibility: Visibility; + readonly members: readonly ClassMember[]; + readonly span: SourceSpan; +} + +export type ClassMember = + | MethodDecl + | AttributeDecl + | TypeDecl + | ConstantDecl + | EventDecl + | InterfaceStmt + | AliasDecl + | RawMember; + +export interface MethodDecl { + readonly kind: 'MethodDecl'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly isClassMethod: boolean; + readonly isAbstract: boolean; + readonly isFinal: boolean; + readonly isRedefinition: boolean; + readonly isForTesting: boolean; + readonly importing: readonly MethodParam[]; + readonly exporting: readonly MethodParam[]; + readonly changing: readonly MethodParam[]; + readonly returning?: MethodParam; + readonly raising: readonly string[]; + readonly span: SourceSpan; +} + +export interface AttributeDecl { + readonly kind: 'AttributeDecl'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly isClassData: boolean; + readonly isReadOnly: boolean; + readonly type: TypeRef; + readonly span: SourceSpan; +} + +export interface TypeDecl { + readonly kind: 'TypeDecl'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly shape: TypeDeclShape; + readonly span: SourceSpan; +} + +export type TypeDeclShape = + | { readonly kind: 'alias'; readonly type: TypeRef } + | { readonly kind: 'structure'; readonly fields: readonly StructureField[] }; + +export interface StructureField { + readonly kind: 'StructureField'; + readonly name: string; + readonly type: TypeRef; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +export interface ConstantDecl { + readonly kind: 'ConstantDecl'; + readonly name: string; + readonly type: TypeRef; + readonly value: string; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +export interface EventDecl { + readonly kind: 'EventDecl'; + readonly name: string; + readonly abapDoc?: readonly string[]; + /** True when the declaration is `CLASS-EVENTS`, false for instance-level `EVENTS`. */ + readonly isClassEvent: boolean; + /** + * Parameters declared after `EXPORTING`. Each entry is a `MethodParam` + * for shape consistency with `MethodDecl.importing` / .exporting. Events + * only support `VALUE(...) TYPE ` in practice; other slots are + * always empty for events. + */ + readonly exporting: readonly MethodParam[]; + readonly span: SourceSpan; +} + +export interface InterfaceStmt { + readonly kind: 'InterfaceStmt'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +export interface AliasDecl { + readonly kind: 'AliasDecl'; + readonly name: string; + readonly target: string; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +export interface RawMember { + readonly kind: 'RawMember'; + readonly source: string; + readonly abapDoc?: readonly string[]; + readonly span: SourceSpan; +} + +export interface ClassImpl { + readonly kind: 'ClassImpl'; + readonly name: string; + readonly methods: readonly MethodImpl[]; + readonly span: SourceSpan; +} + +export interface MethodImpl { + readonly kind: 'MethodImpl'; + readonly name: string; + readonly body: string; + readonly bodySpan: SourceSpan; + readonly span: SourceSpan; +} + +export interface InterfaceDef { + readonly kind: 'InterfaceDef'; + readonly name: string; + readonly abapDoc?: readonly string[]; + readonly isPublic: boolean; + readonly members: readonly ClassMember[]; + readonly span: SourceSpan; +} diff --git a/packages/aclass/src/errors.ts b/packages/aclass/src/errors.ts new file mode 100644 index 00000000..ffcf5de9 --- /dev/null +++ b/packages/aclass/src/errors.ts @@ -0,0 +1,40 @@ +/** + * Normalised parse errors for `@abapify/aclass`. + * + * Both Chevrotain lex errors and parse errors are flattened into a single + * `ParseError` shape so callers don't need to know which phase produced + * the diagnostic. + */ +import type { ILexingError, IRecognitionException } from 'chevrotain'; + +export interface ParseError { + severity: 'error' | 'warning'; + message: string; + line: number; + column: number; + offset: number; + length: number; +} + +export function fromLexError(err: ILexingError): ParseError { + return { + severity: 'error', + message: err.message, + line: err.line ?? 1, + column: err.column ?? 1, + offset: err.offset, + length: err.length, + }; +} + +export function fromParseError(err: IRecognitionException): ParseError { + const token = err.token; + return { + severity: 'error', + message: err.message, + line: token.startLine ?? 1, + column: token.startColumn ?? 1, + offset: token.startOffset, + length: (token.endOffset ?? token.startOffset) - token.startOffset + 1, + }; +} diff --git a/packages/aclass/src/index.ts b/packages/aclass/src/index.ts new file mode 100644 index 00000000..38194428 --- /dev/null +++ b/packages/aclass/src/index.ts @@ -0,0 +1,33 @@ +/** + * `@abapify/aclass` — ABAP OO source parser. + * + * Parses `.clas.abap` / `.intf.abap` source files into a typed AST. + * Scope is structural: class / interface headers, sections, and member + * declarations (methods, attributes, types, constants, aliases, + * implements). Method bodies are captured as opaque source slices. + * + * See `packages/aclass/AGENTS.md` for conventions and the + * `openspec/changes/add-aclass-parser` proposal for scope. + * + * Main entry point: + * + * ```ts + * import { parse } from '@abapify/aclass'; + * const { ast, errors } = parse(sourceText); + * ``` + */ +export { parse } from './parser'; +export type { ParseResult } from './parser'; + +export * from './ast'; + +export { AclassLexer, allTokens } from './tokens'; +export * as tokens from './tokens'; + +export type { ParseError } from './errors'; +export { fromLexError, fromParseError } from './errors'; + +export { tokenize } from './lex'; +export type { LexResult } from './lex'; + +export { assertCleanParse, AclassParseError } from './assert'; diff --git a/packages/aclass/src/lex.ts b/packages/aclass/src/lex.ts new file mode 100644 index 00000000..ba6f98c3 --- /dev/null +++ b/packages/aclass/src/lex.ts @@ -0,0 +1,25 @@ +/** + * Wave 0 entry point: tokenise an ABAP OO source string and return the + * resulting token list along with any lex errors, normalised to + * `ParseError`. + * + * Wave 1 replaces this with a full `parse()` that feeds the token list + * into a Chevrotain `CstParser` and yields a typed AST. This function + * stays around as the low-level primitive. + */ +import { AclassLexer } from './tokens'; +import type { IToken } from 'chevrotain'; +import { fromLexError, type ParseError } from './errors'; + +export interface LexResult { + tokens: IToken[]; + errors: ParseError[]; +} + +export function tokenize(source: string): LexResult { + const result = AclassLexer.tokenize(source); + return { + tokens: result.tokens, + errors: result.errors.map(fromLexError), + }; +} diff --git a/packages/aclass/src/parser.ts b/packages/aclass/src/parser.ts new file mode 100644 index 00000000..e8e38b50 --- /dev/null +++ b/packages/aclass/src/parser.ts @@ -0,0 +1,1465 @@ +/** + * Statement-based ABAP OO parser. + * + * ABAP is a statement-terminated language: every declaration ends with + * a `.` (`Dot` token). Rather than building a full Chevrotain CstParser + * (which would require declaring the entire expression grammar just to + * parse type references), we: + * + * 1. Tokenise with `AclassLexer` (Wave 0). + * 2. Split the token stream into statements on `Dot`. + * 3. Classify each statement by its leading keyword and build a typed + * AST node from the token slice — or produce a `RawMember` / + * `ParseError` when the shape is outside MVP scope. + * + * The trade-off versus a CstParser: we lose automatic recovery and + * lookahead machinery, but gain (a) drastically simpler code, (b) direct + * access to token offsets for building exact source spans, (c) easy + * `RawMember` fallback for any shape we haven't taught yet. + */ +import type { IToken } from 'chevrotain'; +import { tokenize } from './lex'; +import type { ParseError } from './errors'; +import type { + AbapDefinition, + AbapSourceFile, + AliasDecl, + AttributeDecl, + ClassDef, + ClassImpl, + ClassMember, + ConstantDecl, + EventDecl, + InterfaceDef, + InterfaceStmt, + MethodDecl, + MethodImpl, + MethodParam, + Section, + SourceSpan, + StructureField, + TableKind, + TableTypeRef, + TypeDecl, + TypeRef, + Visibility, +} from './ast'; + +export interface ParseResult { + readonly ast: AbapSourceFile; + readonly errors: readonly ParseError[]; +} + +/** + * Parse an ABAP OO source file into a typed AST plus any diagnostics. + * Returns a `{ ast, errors }` shape; never throws for malformed input. + */ +export function parse(source: string): ParseResult { + const errors: ParseError[] = []; + const { tokens, errors: lexErrors } = tokenize(source); + errors.push(...lexErrors); + + const definitions: AbapDefinition[] = []; + const cursor = new Cursor(tokens, source, errors); + + try { + while (!cursor.eof()) { + const def = parseTopLevel(cursor); + if (def) definitions.push(def); + } + } catch (err) { + // The parser is documented to never throw — any unexpected exception + // (e.g. a missing `eof()` guard in a future helper) is normalised + // into a `ParseError` and attached to the result so callers still + // get a best-effort AST. + errors.push({ + severity: 'error', + message: `internal parser error: ${ + err instanceof Error ? err.message : String(err) + }`, + line: 1, + column: 1, + offset: 0, + length: 0, + }); + } + + const ast: AbapSourceFile = { + kind: 'AbapSourceFile', + source, + definitions, + }; + return { ast, errors }; +} + +// ============================================ +// Cursor — linear token stream with helpers +// ============================================ + +class Cursor { + private position = 0; + constructor( + private readonly tokens: readonly IToken[], + private readonly source: string, + private readonly errors: ParseError[], + ) {} + + eof(): boolean { + return this.position >= this.tokens.length; + } + + peek(offset = 0): IToken | undefined { + return this.tokens[this.position + offset]; + } + + current(): IToken { + const t = this.tokens[this.position]; + if (!t) throw new Error('Cursor.current() called past end of stream'); + return t; + } + + advance(): IToken { + const t = this.current(); + this.position += 1; + return t; + } + + matches(name: string, offset = 0): boolean { + return this.peek(offset)?.tokenType.name === name; + } + + /** Collect tokens up to (not including) the next `Dot` at top level. */ + collectStatement(): { + readonly tokens: IToken[]; + readonly terminator?: IToken; + readonly startOffset: number; + readonly endOffset: number; + } { + const collected: IToken[] = []; + const start = this.peek()?.startOffset ?? 0; + while (!this.eof()) { + const t = this.current(); + if (t.tokenType.name === 'Dot') { + this.advance(); + return { + tokens: collected, + terminator: t, + startOffset: start, + endOffset: t.endOffset ?? t.startOffset, + }; + } + collected.push(t); + this.advance(); + } + // EOF without terminating dot — pragmatic: return what we have. + return { + tokens: collected, + startOffset: start, + endOffset: collected[collected.length - 1]?.endOffset ?? start, + }; + } + + /** Accumulate consecutive ABAPDoc lines at the current position. */ + consumeAbapDoc(): string[] | undefined { + const doc: string[] = []; + while (this.matches('ABAPDocLine')) { + const t = this.advance(); + // Strip the leading `"!` and one optional leading space. + doc.push(t.image.replace(/^"!\s?/, '')); + } + return doc.length > 0 ? doc : undefined; + } + + report( + message: string, + at: IToken, + severity: ParseError['severity'] = 'error', + ): void { + this.errors.push({ + severity, + message, + line: at.startLine ?? 1, + column: at.startColumn ?? 1, + offset: at.startOffset, + length: (at.endOffset ?? at.startOffset) - at.startOffset + 1, + }); + } + + sliceSource(startOffset: number, endOffset: number): string { + return this.source.slice(startOffset, endOffset + 1); + } + + /** + * Return the first token whose start offset is greater than or equal + * to the given offset. Useful for recovering line/column metadata for + * a source-slice boundary that doesn't land on a token (e.g. the + * start of a method body, which is usually whitespace). + */ + firstTokenAtOrAfter(offset: number): IToken | undefined { + for (const t of this.tokens) { + if (t.startOffset >= offset) return t; + } + return undefined; + } +} + +// ============================================ +// Top-level dispatcher +// ============================================ + +function parseTopLevel(c: Cursor): AbapDefinition | null { + const abapDoc = c.consumeAbapDoc(); + if (c.eof()) return null; + const t = c.current(); + const name = t.tokenType.name; + + if (name === 'Class') { + return parseClassOrImpl(c, abapDoc); + } + if (name === 'Interface') { + return parseInterface(c, abapDoc); + } + // Unknown top-level token — skip ahead to next `Dot` to recover. + c.report(`unexpected top-level token "${t.image}"`, t); + c.collectStatement(); + return null; +} + +// ============================================ +// CLASS … DEFINITION OR CLASS … IMPLEMENTATION +// ============================================ + +function parseClassOrImpl( + c: Cursor, + abapDoc: string[] | undefined, +): ClassDef | ClassImpl | null { + // Lookahead: find the DEFINITION / IMPLEMENTATION keyword after the name. + // Pattern: CLASS {DEFINITION|IMPLEMENTATION} [ … ] . + const classTok = c.current(); + const startOffset = classTok.startOffset; + const startLine = classTok.startLine ?? 1; + const startColumn = classTok.startColumn ?? 1; + + const header = c.collectStatement(); + // header.tokens[0] = Class (already advanced past) + // Rebuild: [Class, name, DEFINITION|IMPLEMENTATION, …rest] + + const tokens = header.tokens; + if (tokens[0]?.tokenType.name !== 'Class') { + c.report('expected CLASS keyword', classTok); + return null; + } + const nameTok = tokens[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected class name after CLASS', nameTok ?? classTok); + return null; + } + const kindTok = tokens[2]; + if (!kindTok) { + c.report('expected DEFINITION or IMPLEMENTATION after class name', nameTok); + return null; + } + + if (kindTok.tokenType.name === 'Implementation') { + return parseClassImpl(c, nameTok.image, { + startOffset, + endOffset: header.endOffset, + startLine, + startColumn, + }); + } + if (kindTok.tokenType.name !== 'Definition') { + c.report( + `expected DEFINITION or IMPLEMENTATION, got "${kindTok.image}"`, + kindTok, + ); + return null; + } + + return parseClassDef(c, tokens, abapDoc, { + startOffset, + endOffset: header.endOffset, + startLine, + startColumn, + }); +} + +function parseClassDef( + c: Cursor, + headerTokens: IToken[], + abapDoc: string[] | undefined, + headerSpan: SourceSpan, +): ClassDef { + // headerTokens: [Class, Ident, Definition, …modifiers…] + const name = headerTokens[1].image; + const mods = headerTokens.slice(3); + + // Forward declaration: `CLASS x DEFINITION DEFERRED.` / `… DEFINITION LOAD.` + // has no body and must NOT expect ENDCLASS. + const isForwardDecl = mods.some( + (t) => t.tokenType.name === 'Deferred' || t.tokenType.name === 'Load', + ); + if (isForwardDecl) { + return { + kind: 'ClassDef', + name, + abapDoc, + isFinal: false, + isAbstract: false, + isForTesting: false, + createVisibility: 'public', + sections: [], + span: headerSpan, + }; + } + + let isFinal = false; + let isAbstract = false; + let isForTesting = false; + let createVisibility: Visibility = 'public'; + let superClass: string | undefined; + + for (let i = 0; i < mods.length; i++) { + const t = mods[i]; + switch (t.tokenType.name) { + case 'Public': + break; + case 'Final': + isFinal = true; + break; + case 'Abstract': + isAbstract = true; + break; + case 'Inheriting': { + // INHERITING FROM + const from = mods[i + 1]; + const sup = mods[i + 2]; + if (from?.tokenType.name === 'From' && sup) { + superClass = sup.image; + i += 2; + } + break; + } + case 'Create': { + const v = mods[i + 1]; + if (v?.tokenType.name === 'Private') createVisibility = 'private'; + else if (v?.tokenType.name === 'Protected') + createVisibility = 'protected'; + else createVisibility = 'public'; + i += 1; + break; + } + case 'For': { + // FOR TESTING [ RISK LEVEL ] [ DURATION ] + if (mods[i + 1]?.tokenType.name === 'Testing') { + isForTesting = true; + } + break; + } + default: + break; + } + } + + const sections: Section[] = []; + while (!c.eof() && !c.matches('EndClass')) { + const sec = parseSection(c); + if (sec) sections.push(sec); + else break; + } + // Consume the closing `ENDCLASS.` or report that it never arrived. + if (c.matches('EndClass')) { + c.collectStatement(); + } else { + c.report( + `unterminated CLASS ${name} DEFINITION — expected ENDCLASS`, + headerTokens[0], + ); + } + + return { + kind: 'ClassDef', + name, + abapDoc, + isFinal, + isAbstract, + isForTesting, + createVisibility, + superClass, + sections, + span: headerSpan, + }; +} + +function parseClassImpl( + c: Cursor, + name: string, + headerSpan: SourceSpan, +): ClassImpl { + const methods: MethodImpl[] = []; + while (!c.eof() && !c.matches('EndClass')) { + // Skip ABAPDoc before method impl (rarely seen, but be safe) + c.consumeAbapDoc(); + if (c.eof() || c.matches('EndClass')) break; + if (!c.matches('Method')) { + // Skip stray statements inside IMPLEMENTATION block + const stray = c.collectStatement(); + if (stray.tokens.length === 0) break; + continue; + } + const mi = parseMethodImpl(c); + if (mi) methods.push(mi); + } + if (c.matches('EndClass')) { + c.collectStatement(); + } else { + // No token handy — synthesise a diagnostic anchored at the header span. + c['errors'].push({ + severity: 'error', + message: `unterminated CLASS ${name} IMPLEMENTATION — expected ENDCLASS`, + line: headerSpan.startLine, + column: headerSpan.startColumn, + offset: headerSpan.startOffset, + length: headerSpan.endOffset - headerSpan.startOffset + 1, + }); + } + return { kind: 'ClassImpl', name, methods, span: headerSpan }; +} + +function parseMethodImpl(c: Cursor): MethodImpl | null { + const methodTok = c.current(); + const header = c.collectStatement(); + // header.tokens: [Method, , …] + const qual = readQualifiedName(header.tokens, 1); + if (!qual) { + c.report( + 'expected method name after METHOD', + header.tokens[1] ?? methodTok, + ); + return null; + } + // Body: everything up to the next ENDMETHOD. We operate on source + // offsets because we preserve the body verbatim. + const bodyStart = header.endOffset + 1; + // Walk tokens until ENDMETHOD Dot at top level. Methods don't nest, so a + // flat scan is enough for well-formed input. + while (!c.eof()) { + if (c.matches('EndMethod')) { + const endMethodStart = c.current().startOffset; + const stmt = c.collectStatement(); + const bodyEnd = endMethodStart - 1; + const totalEnd = stmt.endOffset; + const body = c['sliceSource'](bodyStart, bodyEnd); + // Locate the first non-whitespace line/column inside the body so + // bodySpan points at the actual content, not at the `METHOD` + // keyword that terminated with the `.` before bodyStart. + const firstBodyTok = c.firstTokenAtOrAfter(bodyStart); + const bodyLine = firstBodyTok?.startLine ?? methodTok.startLine ?? 1; + const bodyCol = firstBodyTok?.startColumn ?? 1; + return { + kind: 'MethodImpl', + name: qual.name, + body, + bodySpan: { + startOffset: bodyStart, + endOffset: bodyEnd, + startLine: bodyLine, + startColumn: bodyCol, + }, + span: { + startOffset: methodTok.startOffset, + endOffset: totalEnd, + startLine: methodTok.startLine ?? 1, + startColumn: methodTok.startColumn ?? 1, + }, + }; + } + c.advance(); + } + c.report('ENDMETHOD not found for METHOD block', methodTok); + return null; +} + +// ============================================ +// INTERFACE … ENDINTERFACE +// ============================================ + +function parseInterface( + c: Cursor, + abapDoc: string[] | undefined, +): InterfaceDef | null { + const ifaceTok = c.current(); + const header = c.collectStatement(); + const tokens = header.tokens; + // [Interface, Ident, [Public], …] + if (tokens[0]?.tokenType.name !== 'Interface') { + c.report('expected INTERFACE keyword', ifaceTok); + return null; + } + const nameTok = tokens[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected interface name after INTERFACE', nameTok ?? ifaceTok); + return null; + } + const isPublic = tokens.slice(2).some((t) => t.tokenType.name === 'Public'); + + const members: ClassMember[] = []; + while (!c.eof() && !c.matches('EndInterface')) { + const m = parseMember(c); + if (m) members.push(m); + else break; + } + if (c.matches('EndInterface')) c.collectStatement(); + + return { + kind: 'InterfaceDef', + name: nameTok.image, + abapDoc, + isPublic, + members, + span: { + startOffset: ifaceTok.startOffset, + endOffset: header.endOffset, + startLine: ifaceTok.startLine ?? 1, + startColumn: ifaceTok.startColumn ?? 1, + }, + }; +} + +// ============================================ +// Sections (PUBLIC / PROTECTED / PRIVATE SECTION.) +// ============================================ + +function parseSection(c: Cursor): Section | null { + c.consumeAbapDoc(); // ABAPDoc on a section header is rare; drop + if (c.eof()) return null; + const first = c.current(); + let visibility: Visibility; + if (first.tokenType.name === 'Public') visibility = 'public'; + else if (first.tokenType.name === 'Protected') visibility = 'protected'; + else if (first.tokenType.name === 'Private') visibility = 'private'; + else return null; + // consume " SECTION ." + const header = c.collectStatement(); + const sectionStart = header.startOffset; + const members: ClassMember[] = []; + while (!c.eof() && !isSectionBoundary(c)) { + const m = parseMember(c); + if (m) members.push(m); + else break; + } + const end = + members.length > 0 + ? members[members.length - 1].span.endOffset + : header.endOffset; + return { + kind: 'Section', + visibility, + members, + span: { + startOffset: sectionStart, + endOffset: end, + startLine: first.startLine ?? 1, + startColumn: first.startColumn ?? 1, + }, + }; +} + +function isSectionBoundary(c: Cursor): boolean { + if (c.matches('EndClass')) return true; + if (c.matches('EndInterface')) return true; + // ` SECTION .` — we only accept these as section-starters, not + // as member starts. + const t = c.peek(); + if (!t) return true; + if ( + t.tokenType.name === 'Public' || + t.tokenType.name === 'Protected' || + t.tokenType.name === 'Private' + ) { + return c.peek(1)?.tokenType.name === 'Section'; + } + return false; +} + +// ============================================ +// Members +// ============================================ + +function parseMember(c: Cursor): ClassMember | null { + const abapDoc = c.consumeAbapDoc(); + if (c.eof()) return null; + const head = c.current(); + const name = head.tokenType.name; + + switch (name) { + case 'Methods': + case 'ClassMethods': + return parseMethodDecl(c, abapDoc); + case 'Data': + case 'ClassData': + return parseAttributeDecl(c, abapDoc); + case 'Types': + return parseTypeDecl(c, abapDoc); + case 'Constants': + return parseConstantDecl(c, abapDoc); + case 'Events': + case 'ClassEvents': + return parseEventDecl(c, abapDoc); + case 'Interfaces': + return parseInterfaceStmt(c, abapDoc); + case 'Aliases': + return parseAliasDecl(c, abapDoc); + default: { + // Unrecognised member → capture as RawMember for round-trip fidelity. + const stmt = c.collectStatement(); + const source = c['sliceSource'](stmt.startOffset, stmt.endOffset); + return { + kind: 'RawMember', + source, + abapDoc, + span: { + startOffset: stmt.startOffset, + endOffset: stmt.endOffset, + startLine: head.startLine ?? 1, + startColumn: head.startColumn ?? 1, + }, + }; + } + } +} + +// --- METHODS / CLASS-METHODS --- + +function parseMethodDecl( + c: Cursor, + abapDoc: string[] | undefined, +): MethodDecl | null { + const head = c.current(); + const isClassMethod = head.tokenType.name === 'ClassMethods'; + const stmt = c.collectStatement(); + const toks = stmt.tokens; + // toks: [Methods|ClassMethods, , …] + const qual = readQualifiedName(toks, 1); + if (!qual) { + c.report('expected method name', toks[1] ?? head); + return null; + } + + const importing: MethodParam[] = []; + const exporting: MethodParam[] = []; + const changing: MethodParam[] = []; + let returning: MethodParam | undefined; + const raising: string[] = []; + + let isAbstract = false; + let isFinal = false; + let isRedefinition = false; + let isForTesting = false; + + let section: + | 'none' + | 'importing' + | 'exporting' + | 'changing' + | 'returning' + | 'raising' = 'none'; + + let i = qual.lastIdx + 1; + while (i < toks.length) { + const t = toks[i]; + const tn = t.tokenType.name; + if (tn === 'Abstract') { + isAbstract = true; + i += 1; + continue; + } + if (tn === 'Final') { + isFinal = true; + i += 1; + continue; + } + if (tn === 'Redefinition') { + isRedefinition = true; + i += 1; + continue; + } + if (tn === 'For' && toks[i + 1]?.tokenType.name === 'Testing') { + isForTesting = true; + i += 2; + continue; + } + if (tn === 'Importing') { + section = 'importing'; + i += 1; + continue; + } + if (tn === 'Exporting') { + section = 'exporting'; + i += 1; + continue; + } + if (tn === 'Changing') { + section = 'changing'; + i += 1; + continue; + } + if (tn === 'Returning') { + section = 'returning'; + i += 1; + continue; + } + if (tn === 'Raising') { + section = 'raising'; + i += 1; + continue; + } + + if (section === 'raising') { + if (tn === 'Identifier') { + raising.push(t.image); + i += 1; + continue; + } + i += 1; + continue; + } + + if (section === 'none') { + i += 1; + continue; + } + + // Consume one parameter starting at i + const consumed = consumeMethodParam(toks, i, c); + if (!consumed) break; + const { param, nextIndex } = consumed; + if (section === 'importing') importing.push(param); + else if (section === 'exporting') exporting.push(param); + else if (section === 'changing') changing.push(param); + else if (section === 'returning') returning = param; + i = nextIndex; + } + + return { + kind: 'MethodDecl', + name: qual.name, + abapDoc, + isClassMethod, + isAbstract, + isFinal, + isRedefinition, + isForTesting, + importing, + exporting, + changing, + returning, + raising, + span: spanFromStmt(stmt, head), + }; +} + +function consumeMethodParam( + toks: IToken[], + start: number, + c: Cursor, +): { param: MethodParam; nextIndex: number } | null { + let i = start; + let isValue = false; + // Optional VALUE(name) + if ( + toks[i]?.tokenType.name === 'Value' && + toks[i + 1]?.tokenType.name === 'LParen' + ) { + isValue = true; + const nameTok = toks[i + 2]; + if (!isNameLike(nameTok)) return null; + if (toks[i + 3]?.tokenType.name !== 'RParen') return null; + i += 4; + // Followed by TYPE etc. Fall through to common path but + // we already captured the name. + const paramName = nameTok.image; + return parseParamTail(toks, i, c, paramName, isValue); + } + // Plain name — may be a keyword-as-name (`data`, `type`, `ref`, ...). + const nameTok = toks[i]; + if (!isNameLike(nameTok)) return null; + i += 1; + return parseParamTail(toks, i, c, nameTok.image, isValue); +} + +function parseParamTail( + toks: IToken[], + start: number, + c: Cursor, + name: string, + isValue: boolean, +): { param: MethodParam; nextIndex: number } | null { + let i = start; + // TYPE + if (toks[i]?.tokenType.name !== 'Type') { + c.report( + 'expected TYPE in parameter declaration', + toks[i] ?? toks[start - 1], + ); + return null; + } + i += 1; + const typeRes = consumeTypeRef(toks, i, c); + if (!typeRes) return null; + const type = typeRes.type; + i = typeRes.nextIndex; + + let isOptional = false; + let defaultValue: string | undefined; + + while (i < toks.length) { + const t = toks[i]; + const tn = t.tokenType.name; + if (tn === 'Optional') { + isOptional = true; + i += 1; + continue; + } + if (tn === 'Default') { + // Greedy: take everything until the next keyword that would start a + // new section or the next parameter. + i += 1; + const startTok = toks[i]; + if (!startTok) break; + const valStart = startTok.startOffset; + let valEnd = startTok.endOffset ?? startTok.startOffset; + while (i < toks.length) { + const nt = toks[i]?.tokenType.name; + if ( + nt === 'Optional' || + nt === 'Importing' || + nt === 'Exporting' || + nt === 'Changing' || + nt === 'Returning' || + nt === 'Raising' + ) { + break; + } + valEnd = toks[i].endOffset ?? valEnd; + i += 1; + if (i < toks.length && toks[i]?.tokenType.name === 'Identifier') { + // Likely start of next parameter — stop. + // Heuristic: two consecutive idents without a connective is a new param. + break; + } + } + defaultValue = c['sliceSource'](valStart, valEnd); + continue; + } + // Unknown keyword → param ends here + break; + } + + const startTok = toks[start - (isValue ? 4 : 1)]; + const endTok = toks[i - 1] ?? startTok; + const param: MethodParam = { + kind: 'MethodParam', + name, + type, + isValue, + isOptional, + defaultValue, + span: { + startOffset: startTok.startOffset, + endOffset: endTok.endOffset ?? endTok.startOffset, + startLine: startTok.startLine ?? 1, + startColumn: startTok.startColumn ?? 1, + }, + }; + return { param, nextIndex: i }; +} + +/** + * Shared body for member declarations shaped like ` TYPE + * [rest…]`. Returns `{ nameTok, typeRes, toks, stmt, tailIdx }` + * where `tailIdx` is the index of the first token AFTER the type-ref so + * callers can inspect modifiers such as `READ-ONLY` / `VALUE `. + */ +function parseNameTypeStatement( + c: Cursor, + expectation: { name: string; type: string }, +): { + nameTok: IToken; + typeRes: { type: TypeRef; nextIndex: number }; + toks: IToken[]; + stmt: ReturnType; + tailIdx: number; +} | null { + const head = c.current(); + const stmt = c.collectStatement(); + const toks = stmt.tokens; + // toks[0] = head keyword (already validated by the caller dispatching on it) + const nameTok = toks[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report(`expected ${expectation.name}`, nameTok ?? head); + return null; + } + if (toks[2]?.tokenType.name !== 'Type') { + c.report(`expected TYPE in ${expectation.type}`, toks[2] ?? head); + return null; + } + const typeRes = consumeTypeRef(toks, 3, c); + if (!typeRes) return null; + return { nameTok, typeRes, toks, stmt, tailIdx: typeRes.nextIndex }; +} + +// --- DATA / CLASS-DATA --- + +function parseAttributeDecl( + c: Cursor, + abapDoc: string[] | undefined, +): AttributeDecl | null { + const head = c.current(); + const isClassData = head.tokenType.name === 'ClassData'; + const parsed = parseNameTypeStatement(c, { + name: 'attribute name', + type: 'attribute declaration', + }); + if (!parsed) return null; + const { nameTok, typeRes, toks, stmt, tailIdx } = parsed; + const isReadOnly = toks[tailIdx]?.tokenType.name === 'ReadOnly'; + return { + kind: 'AttributeDecl', + name: nameTok.image, + abapDoc, + isClassData, + isReadOnly, + type: typeRes.type, + span: spanFromStmt(stmt, head), + }; +} + +// --- TYPES --- + +function parseTypeDecl( + c: Cursor, + abapDoc: string[] | undefined, +): TypeDecl | null { + const head = c.current(); + // Two shapes: `TYPES TYPE …` or `TYPES: BEGIN OF . … END OF .` + // The latter spans multiple statements. + + const first = c.peek(1); + const second = c.peek(2); + const third = c.peek(3); + + // Detect `TYPES: BEGIN OF name,` form + if ( + first?.tokenType.name === 'Colon' && + second?.tokenType.name === 'Begin' && + third?.tokenType.name === 'Of' + ) { + return parseStructureTypes(c, abapDoc); + } + + // Simple form — single statement ending with Dot. + const parsed = parseNameTypeStatement(c, { + name: 'type name after TYPES', + type: 'TYPES declaration', + }); + if (!parsed) return null; + const { nameTok, typeRes, stmt } = parsed; + return { + kind: 'TypeDecl', + name: nameTok.image, + abapDoc, + shape: { kind: 'alias', type: typeRes.type }, + span: spanFromStmt(stmt, head), + }; +} + +function parseStructureTypes( + c: Cursor, + abapDoc: string[] | undefined, +): TypeDecl | null { + const head = c.current(); // Types + // `TYPES: BEGIN OF , TYPE , … END OF .` + // is a SINGLE chained statement terminated by one Dot. Collect the + // whole thing; commas inside split the chain items. + const stmt = c.collectStatement(); + const toks = stmt.tokens; + // [Types, Colon, Begin, Of, Ident, Comma, , Comma, , Comma, End, Of, Ident] + const nameTok = toks[4]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected struct name in BEGIN OF', nameTok ?? head); + return null; + } + const structName = nameTok.image; + + // Locate the terminating `END OF ` tail and harvest fields between. + let endIdx = -1; + for (let k = 5; k < toks.length - 1; k++) { + if ( + toks[k].tokenType.name === 'End' && + toks[k + 1]?.tokenType.name === 'Of' + ) { + endIdx = k; + break; + } + } + const fieldsEnd = endIdx >= 0 ? endIdx : toks.length; + const fields: StructureField[] = []; + parseFieldRun(toks.slice(5, fieldsEnd), fields, c); + + return { + kind: 'TypeDecl', + name: structName, + abapDoc, + shape: { kind: 'structure', fields }, + span: { + startOffset: head.startOffset, + endOffset: stmt.endOffset, + startLine: head.startLine ?? 1, + startColumn: head.startColumn ?? 1, + }, + }; +} + +function parseFieldRun(toks: IToken[], out: StructureField[], c: Cursor): void { + let i = 0; + while (i < toks.length) { + // Skip a leading Comma (from end-of-previous-field in the raw source) + if (toks[i]?.tokenType.name === 'Comma') { + i += 1; + continue; + } + const nameTok = toks[i]; + if (!isNameLike(nameTok)) { + // Unexpected — skip to next comma to recover + while (i < toks.length && toks[i].tokenType.name !== 'Comma') i += 1; + continue; + } + if (toks[i + 1]?.tokenType.name !== 'Type') { + i += 1; + continue; + } + // Slurp type-ref tokens up to the next Comma (or end) + let j = i + 2; + const typeStart = j; + while (j < toks.length && toks[j].tokenType.name !== 'Comma') { + j += 1; + } + const typeRes = consumeTypeRef(toks, typeStart, c, j); + if (typeRes) { + const last = toks[j - 1] ?? nameTok; + out.push({ + kind: 'StructureField', + name: nameTok.image, + type: typeRes.type, + span: { + startOffset: nameTok.startOffset, + endOffset: last.endOffset ?? last.startOffset, + startLine: nameTok.startLine ?? 1, + startColumn: nameTok.startColumn ?? 1, + }, + }); + } + i = j; + } +} + +// --- CONSTANTS --- + +function parseConstantDecl( + c: Cursor, + abapDoc: string[] | undefined, +): ConstantDecl | null { + const head = c.current(); + const stmt = c.collectStatement(); + const toks = stmt.tokens; + // [Constants, Ident, Type, , Value, ] + const nameTok = toks[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected constant name', nameTok ?? head); + return null; + } + if (toks[2]?.tokenType.name !== 'Type') { + c.report('expected TYPE in CONSTANTS', toks[2] ?? head); + return null; + } + // Find VALUE keyword + let vIdx = -1; + for (let k = 3; k < toks.length; k++) { + if (toks[k].tokenType.name === 'Value') { + vIdx = k; + break; + } + } + if (vIdx < 0) { + c.report('expected VALUE in CONSTANTS', head); + return null; + } + const typeRes = consumeTypeRef(toks, 3, c, vIdx); + if (!typeRes) return null; + const valueStart = toks[vIdx + 1]?.startOffset ?? 0; + const valueEnd = toks[toks.length - 1]?.endOffset ?? valueStart; + const value = c['sliceSource'](valueStart, valueEnd); + return { + kind: 'ConstantDecl', + name: nameTok.image, + abapDoc, + type: typeRes.type, + value, + span: spanFromStmt(stmt, head), + }; +} + +// --- EVENTS / CLASS-EVENTS --- + +function parseEventDecl( + c: Cursor, + abapDoc: string[] | undefined, +): EventDecl | null { + const head = c.current(); + const isClassEvent = head.tokenType.name === 'ClassEvents'; + const stmt = c.collectStatement(); + const toks = stmt.tokens; + // toks: [Events|ClassEvents, Ident, [Exporting, ]] + const nameTok = toks[1]; + if (!isNameLike(nameTok)) { + c.report('expected event name', nameTok ?? head); + return null; + } + const exporting: MethodParam[] = []; + if (toks[2]?.tokenType.name === 'Exporting') { + let i = 3; + while (i < toks.length) { + const consumed = consumeMethodParam(toks, i, c); + if (!consumed) break; + exporting.push(consumed.param); + i = consumed.nextIndex; + } + } + return { + kind: 'EventDecl', + name: nameTok.image, + abapDoc, + isClassEvent, + exporting, + span: spanFromStmt(stmt, head), + }; +} + +// --- INTERFACES . --- + +function parseInterfaceStmt( + c: Cursor, + abapDoc: string[] | undefined, +): InterfaceStmt | null { + const head = c.current(); + const stmt = c.collectStatement(); + const nameTok = stmt.tokens[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected interface name after INTERFACES', nameTok ?? head); + return null; + } + return { + kind: 'InterfaceStmt', + name: nameTok.image, + abapDoc, + span: spanFromStmt(stmt, head), + }; +} + +// --- ALIASES FOR . --- + +function parseAliasDecl( + c: Cursor, + abapDoc: string[] | undefined, +): AliasDecl | null { + const head = c.current(); + const stmt = c.collectStatement(); + const toks = stmt.tokens; + const nameTok = toks[1]; + if (!nameTok || nameTok.tokenType.name !== 'Identifier') { + c.report('expected alias name', nameTok ?? head); + return null; + } + if (toks[2]?.tokenType.name !== 'For') { + c.report('expected FOR in ALIASES', toks[2] ?? head); + return null; + } + const targetStart = toks[3]?.startOffset ?? 0; + const targetEnd = toks[toks.length - 1]?.endOffset ?? targetStart; + const target = c['sliceSource'](targetStart, targetEnd); + return { + kind: 'AliasDecl', + name: nameTok.image, + target, + abapDoc, + span: spanFromStmt(stmt, head), + }; +} + +// ============================================ +// Type-ref consumption +// ============================================ + +function consumeTypeRef( + toks: IToken[], + start: number, + c: Cursor, + endBound?: number, +): { type: TypeRef; nextIndex: number } | null { + const limit = endBound ?? toks.length; + const i = start; + if (i >= limit) { + c.report('expected type reference', toks[i - 1] ?? toks[0]); + return null; + } + + // REF TO + if ( + toks[i]?.tokenType.name === 'Ref' && + toks[i + 1]?.tokenType.name === 'To' + ) { + const inner = consumeTypeRef(toks, i + 2, c, limit); + if (!inner) return null; + const startTok = toks[i]; + const lastTok = toks[inner.nextIndex - 1] ?? startTok; + return { + type: { + kind: 'RefToTypeRef', + target: inner.type, + source: c['sliceSource']( + startTok.startOffset, + lastTok.endOffset ?? lastTok.startOffset, + ), + }, + nextIndex: inner.nextIndex, + }; + } + + // STANDARD|SORTED|HASHED TABLE OF [WITH …] + const tableKind = peekTableKind(toks, i); + if (tableKind) { + return consumeTableTypeRef(toks, i, c, limit, tableKind); + } + + // Simple name / qualified form: (=>|~) [(=>|~) …] + // `` accepts any identifier-shaped token, including keywords + // that the grammar re-purposes as names in declaration positions + // (`DATA data TYPE i.`, `REF TO data`, `TYPE type`, etc.). + const startTok: IToken | undefined = toks[i]; + if (!isNameLike(startTok)) { + // `startTok` is narrowed to `undefined` here (isNameLike rejected a + // present token would also land here; in that case the image is + // already captured in `toks[i]` so read from there). + const at = toks[i] ?? toks[i - 1] ?? toks[0]; + const label = at ? at.image : ''; + c.report(`expected type name, got "${label}"`, at); + return null; + } + let lastIdx = i; + const nameParts: string[] = [startTok.image]; + let j = i + 1; + while (j < limit) { + const t = toks[j]?.tokenType.name; + if (t === 'FatArrow' || t === 'Tilde') { + const next = toks[j + 1]; + if (!isNameLike(next)) break; + nameParts.push(t === 'FatArrow' ? '=>' : '~', next.image); + lastIdx = j + 1; + j += 2; + continue; + } + break; + } + const lastTok = toks[lastIdx]; + const source = c['sliceSource']( + startTok.startOffset, + lastTok.endOffset ?? lastTok.startOffset, + ); + const fullName = nameParts.join(''); + const type: TypeRef = isBuiltinName(fullName) + ? { kind: 'BuiltinTypeRef', name: fullName, source } + : { kind: 'NamedTypeRef', name: fullName, source }; + return { type, nextIndex: lastIdx + 1 }; +} + +function peekTableKind(toks: IToken[], i: number): TableKind | null { + const t = toks[i]?.tokenType.name; + if (t === 'Standard' && toks[i + 1]?.tokenType.name === 'Table') + return 'standard'; + if (t === 'Sorted' && toks[i + 1]?.tokenType.name === 'Table') + return 'sorted'; + if (t === 'Hashed' && toks[i + 1]?.tokenType.name === 'Table') + return 'hashed'; + return null; +} + +function consumeTableTypeRef( + toks: IToken[], + start: number, + c: Cursor, + limit: number, + tableKind: TableKind, +): { type: TableTypeRef; nextIndex: number } { + // STANDARD TABLE OF WITH … + let i = start + 2; // skip STANDARD TABLE + if (toks[i]?.tokenType.name !== 'Of') { + c.report('expected OF in TABLE OF', toks[i] ?? toks[start]); + return fallbackTable(toks, start, i, tableKind, c); + } + i += 1; + const rowRes = consumeTypeRef(toks, i, c, limit); + if (!rowRes) { + return fallbackTable(toks, start, i, tableKind, c); + } + i = rowRes.nextIndex; + // WITH … key clause runs to end of statement / bound + const keyStart = i; + while (i < limit) i += 1; + const keyTokens = toks.slice(keyStart, limit); + const keyClause = + keyTokens.length > 0 + ? c['sliceSource']( + keyTokens[0].startOffset, + (keyTokens[keyTokens.length - 1].endOffset ?? + keyTokens[keyTokens.length - 1].startOffset) as number, + ) + : ''; + const startTok = toks[start]; + const lastTok = toks[limit - 1] ?? startTok; + return { + type: { + kind: 'TableTypeRef', + tableKind, + row: rowRes.type, + keyClause, + source: c['sliceSource']( + startTok.startOffset, + lastTok.endOffset ?? lastTok.startOffset, + ), + }, + nextIndex: i, + }; +} + +function fallbackTable( + toks: IToken[], + start: number, + i: number, + tableKind: TableKind, + c: Cursor, +): { type: TableTypeRef; nextIndex: number } { + const startTok = toks[start]; + const lastTok = toks[i - 1] ?? startTok; + return { + type: { + kind: 'TableTypeRef', + tableKind, + row: { kind: 'NamedTypeRef', name: '?', source: '?' }, + keyClause: '', + source: c['sliceSource']( + startTok.startOffset, + lastTok.endOffset ?? lastTok.startOffset, + ), + }, + nextIndex: i, + }; +} + +// ============================================ +// Helpers +// ============================================ + +function spanFromStmt( + stmt: { startOffset: number; endOffset: number }, + anchor: IToken, +): SourceSpan { + return { + startOffset: stmt.startOffset, + endOffset: stmt.endOffset, + startLine: anchor.startLine ?? 1, + startColumn: anchor.startColumn ?? 1, + }; +} + +/** + * True when the token can legally appear in a position that expects an + * ABAP identifier (type name, field name, parameter name, qualified part). + * + * The ABAP grammar allows many reserved words to be reused as names in + * declaration positions — e.g. `DATA data TYPE i.`, `METHODS foo IMPORTING + * type TYPE string.`, `TYPES: BEGIN OF x, data TYPE i, END OF x.`. + * At the lexer level those tokens are classified as keywords; the parser + * has to reinterpret them contextually. + * + * We accept: `Identifier`, plus any token whose `image` is a plain ABAP + * identifier (starts with letter/underscore, only contains the chars a + * real identifier would). This keeps the rule cheap (no huge keyword + * allowlist) and catches every keyword that can legally be a name. + */ +/** + * Read a possibly-qualified ABAP name starting at `start`: a chain of + * identifier-shaped tokens separated by `~` (interface member) or `=>` + * (static scope). Returns the joined `image` plus the last-consumed + * index, or `null` if the head token is not name-like. + * + * `zif_foo~bar` → `{ name: 'zif_foo~bar', lastIdx: start + 2 }` + * `zcl_x=>method` → `{ name: 'zcl_x=>method', lastIdx: start + 2 }` + * `plain_name` → `{ name: 'plain_name', lastIdx: start }` + */ +function readQualifiedName( + toks: IToken[], + start: number, +): { name: string; lastIdx: number } | null { + const head = toks[start]; + if (!isNameLike(head)) return null; + const parts: string[] = [head.image]; + let lastIdx = start; + let j = start + 1; + while (j < toks.length) { + const sep = toks[j]?.tokenType.name; + if (sep !== 'Tilde' && sep !== 'FatArrow') break; + const next = toks[j + 1]; + if (!isNameLike(next)) break; + parts.push(sep === 'Tilde' ? '~' : '=>', next.image); + lastIdx = j + 1; + j += 2; + } + return { name: parts.join(''), lastIdx }; +} + +function isNameLike(t: IToken | undefined): t is IToken { + if (!t) return false; + if (t.tokenType.name === 'Identifier') return true; + // Symbols (`Dot`, `Comma`, `FatArrow`, ...) have non-alphabetic images. + return /^[A-Za-z_][A-Za-z0-9_/]*$/.test(t.image); +} + +const BUILTIN_ABAP_TYPES = new Set([ + 'string', + 'xstring', + 'i', + 'int8', + 'int4', + 'int2', + 'int1', + 'f', + 'p', + 'c', + 'n', + 'd', + 't', + 'decfloat16', + 'decfloat34', + 'timestampl', + 'timestamp', + 'abap_bool', + 'abap_boolean', + 'abap_true', + 'abap_false', + 'sy-subrc', + 'sy-tabix', +]); +function isBuiltinName(name: string): boolean { + return BUILTIN_ABAP_TYPES.has(name.toLowerCase()); +} diff --git a/packages/aclass/src/tokens.ts b/packages/aclass/src/tokens.ts new file mode 100644 index 00000000..f65a6b56 --- /dev/null +++ b/packages/aclass/src/tokens.ts @@ -0,0 +1,425 @@ +/** + * Chevrotain Token Definitions for the ABAP OO surface grammar. + * + * Scope: everything `@abapify/aclass` needs to tokenise a `.clas.abap` or + * `.intf.abap` source file up to the point where method bodies become + * opaque text. Method-body statements are NOT tokenised here — they are + * captured verbatim by a dedicated body-capture token. + * + * ABAP is case-insensitive. All keywords use `/i` patterns so `CLASS`, + * `Class`, and `class` all tokenise the same way. + * + * Keyword order inside `allTokens` matters: + * 1. Multi-char operators before their single-char prefixes. + * 2. Longer keywords before their prefixes (e.g. `CLASS-DATA` before + * `CLASS`, `INTERFACES` before `INTERFACE`, `NON-UNIQUE` before + * `UNIQUE`). + * 3. `ABAPDocLine` BEFORE `LineComment` so `"! foo` is captured as + * documentation instead of as a regular line comment. + * 4. `Identifier` must come last among the word-shaped tokens so that + * keywords win their matches via `longer_alt`. + */ +import { createToken, Lexer } from 'chevrotain'; + +// ============================================ +// Whitespace +// ============================================ + +export const WhiteSpace = createToken({ + name: 'WhiteSpace', + pattern: /[ \t]+/, + group: Lexer.SKIPPED, +}); + +export const Newline = createToken({ + name: 'Newline', + pattern: /\r?\n/, + group: Lexer.SKIPPED, +}); + +// ============================================ +// Comments +// ============================================ + +/** + * ABAPDoc line: `"! …` up to end of line. MUST come before LineComment + * because `"!` is a prefix of `"`. + */ +export const ABAPDocLine = createToken({ + name: 'ABAPDocLine', + pattern: /"![^\r\n]*/, +}); + +/** Regular line comment: `" …` up to end of line. */ +export const LineComment = createToken({ + name: 'LineComment', + pattern: /"[^\r\n]*/, + group: Lexer.SKIPPED, +}); + +/** Full-line comment starting at column 1 with `*`. Only recognised at SOL. */ +type CustomPattern = { + exec: (text: string, offset: number) => RegExpExecArray | null; + line_breaks?: boolean; +}; +const starCommentPattern: CustomPattern = { + line_breaks: false, + exec: (text, offset) => { + // Only match when at start of line (offset 0 or previous char is \n). + if (offset !== 0 && text.charCodeAt(offset - 1) !== 0x0a /* \n */) { + return null; + } + if (text.charCodeAt(offset) !== 0x2a /* * */) return null; + let end = offset; + while (end < text.length) { + const c = text.charCodeAt(end); + if (c === 0x0a || c === 0x0d) break; + end++; + } + const match = text.slice(offset, end); + return [match] as unknown as RegExpExecArray; + }, +}; +export const StarComment = createToken({ + name: 'StarComment', + pattern: starCommentPattern as unknown as RegExp, + line_breaks: false, + group: Lexer.SKIPPED, +}); + +// ============================================ +// Literals +// ============================================ + +/** Single-quoted ABAP text literal: `'...'` with doubled `''` for escaping. */ +export const StringLiteral = createToken({ + name: 'StringLiteral', + pattern: /'(?:[^']|'')*'/, +}); + +/** Backtick string literal: \`...\`. */ +export const BacktickLiteral = createToken({ + name: 'BacktickLiteral', + pattern: /`(?:[^`]|``)*`/, +}); + +export const IntegerLiteral = createToken({ + name: 'IntegerLiteral', + pattern: /-?\d+/, +}); + +// ============================================ +// Identifier (must come AFTER all keywords in allTokens) +// ============================================ + +/** + * ABAP identifier: starts with letter or underscore; may contain letters, + * digits, underscores, slash (for namespaces like `/ui2/cl_json`), and + * tilde (for qualified names like `zif_foo~method`) — but tilde is + * reserved for qualified references, so at the lexer level we only allow + * the "simple" form here. Qualified `~` and `=>` are handled in the + * parser via separate tokens. + * + * Note: the `=>` operator (static scope) is tokenised as a distinct + * symbol, not as part of the identifier. + */ +export const Identifier = createToken({ + name: 'Identifier', + pattern: /[A-Za-z_][A-Za-z0-9_/]*/, +}); + +// ============================================ +// Keyword factory +// ============================================ + +// All characters that have a special meaning inside a RegExp literal. +// We escape every one of them when embedding `word` into a dynamic +// pattern so CodeQL's `js/incomplete-sanitization` stops complaining +// and callers cannot inject pattern metacharacters by accident. +const REGEX_META = /[.*+?^${}()|[\]\\/-]/g; +function kw(name: string, word: string) { + // Case-insensitive, whole-word match. `longer_alt: Identifier` makes + // the keyword lose to `Identifier` when followed by an identifier + // character, avoiding false positives like `CLASSIFIER` being split + // into `CLASS IFIER`. + const pattern = new RegExp(word.replace(REGEX_META, '\\$&'), 'i'); + return createToken({ name, pattern, longer_alt: Identifier }); +} + +// ============================================ +// Keywords (compound ones first) +// ============================================ + +// `CLASS-METHODS`, `CLASS-DATA`, `CLASS-EVENTS` — must be declared BEFORE +// `CLASS` itself so the longer keyword wins. +export const ClassMethods = kw('ClassMethods', 'class-methods'); +export const ClassData = kw('ClassData', 'class-data'); +export const ClassEvents = kw('ClassEvents', 'class-events'); + +// `NON-UNIQUE` before `UNIQUE` +export const NonUnique = kw('NonUnique', 'non-unique'); + +// Core OO keywords +export const Class = kw('Class', 'class'); +export const Interfaces = kw('Interfaces', 'interfaces'); // MUST come before Interface +export const Interface = kw('Interface', 'interface'); +export const EndClass = kw('EndClass', 'endclass'); +export const EndInterface = kw('EndInterface', 'endinterface'); +export const EndMethod = kw('EndMethod', 'endmethod'); +export const Definition = kw('Definition', 'definition'); +export const Implementation = kw('Implementation', 'implementation'); +export const Deferred = kw('Deferred', 'deferred'); +export const Load = kw('Load', 'load'); + +// Section visibility +export const Public = kw('Public', 'public'); +export const Protected = kw('Protected', 'protected'); +export const Private = kw('Private', 'private'); +export const Section = kw('Section', 'section'); + +// Member declaration keywords +export const Methods = kw('Methods', 'methods'); +export const Method = kw('Method', 'method'); +export const Data = kw('Data', 'data'); +export const Types = kw('Types', 'types'); +export const Constants = kw('Constants', 'constants'); +export const Events = kw('Events', 'events'); +export const Aliases = kw('Aliases', 'aliases'); + +// Class-header modifiers +export const Inheriting = kw('Inheriting', 'inheriting'); +export const From = kw('From', 'from'); +export const For = kw('For', 'for'); +export const Testing = kw('Testing', 'testing'); +export const Risk = kw('Risk', 'risk'); +export const Level = kw('Level', 'level'); +export const Duration = kw('Duration', 'duration'); +export const Final = kw('Final', 'final'); +export const Abstract = kw('Abstract', 'abstract'); +export const Create = kw('Create', 'create'); + +// Method signature keywords +export const Importing = kw('Importing', 'importing'); +export const Exporting = kw('Exporting', 'exporting'); +export const Changing = kw('Changing', 'changing'); +export const Returning = kw('Returning', 'returning'); +export const Raising = kw('Raising', 'raising'); +export const Value = kw('Value', 'value'); +export const Optional = kw('Optional', 'optional'); +export const Default = kw('Default', 'default'); +export const Redefinition = kw('Redefinition', 'redefinition'); + +// Type-ref keywords +export const Type = kw('Type', 'type'); +export const Ref = kw('Ref', 'ref'); +export const To = kw('To', 'to'); +export const Like = kw('Like', 'like'); + +// Table / structure type keywords +export const Begin = kw('Begin', 'begin'); +export const End = kw('End', 'end'); +export const Of = kw('Of', 'of'); +export const Standard = kw('Standard', 'standard'); +export const Sorted = kw('Sorted', 'sorted'); +export const Hashed = kw('Hashed', 'hashed'); +export const Table = kw('Table', 'table'); +export const With = kw('With', 'with'); +export const Key = kw('Key', 'key'); +export const Empty = kw('Empty', 'empty'); +export const Unique = kw('Unique', 'unique'); + +// Constants / aliases linking +export const As = kw('As', 'as'); +export const ReadOnly = kw('ReadOnly', 'read-only'); + +// Visibility risk-level / duration literals (these are IDENTIFIERS in +// the real grammar, but we keep them as plain Identifier and interpret +// them in the visitor; no dedicated tokens). + +// ============================================ +// Symbols — multi-char first +// ============================================ + +/** Static scope operator: `=>` */ +export const FatArrow = createToken({ + name: 'FatArrow', + pattern: /=>/, +}); + +/** Instance member operator: `->` */ +export const Arrow = createToken({ + name: 'Arrow', + pattern: /->/, +}); + +/** Component selector inside generic reference: `::` (rare, reserved). */ +export const ColonColon = createToken({ + name: 'ColonColon', + pattern: /::/, +}); + +// Single-char symbols +export const Dot = createToken({ name: 'Dot', pattern: /\./ }); +export const Comma = createToken({ name: 'Comma', pattern: /,/ }); +export const Colon = createToken({ name: 'Colon', pattern: /:/ }); +export const Tilde = createToken({ name: 'Tilde', pattern: /~/ }); +export const LParen = createToken({ name: 'LParen', pattern: /\(/ }); +export const RParen = createToken({ name: 'RParen', pattern: /\)/ }); +export const LBracket = createToken({ name: 'LBracket', pattern: /\[/ }); +export const RBracket = createToken({ name: 'RBracket', pattern: /\]/ }); +export const Eq = createToken({ name: 'Eq', pattern: /=/ }); + +// `#` — inferred-type placeholder in `VALUE #(…)`, `NEW #(…)`, `COND #(…)`, etc. +export const Hash = createToken({ name: 'Hash', pattern: /#/ }); + +// String templates `|…|` with interpolation braces `{` / `}`. +// We tokenise them at the single-character level — good enough for +// opaque-body preservation, since the parser never inspects expression +// interior for MethodImpl.body. +export const Pipe = createToken({ name: 'Pipe', pattern: /\|/ }); +export const LBrace = createToken({ name: 'LBrace', pattern: /\{/ }); +export const RBrace = createToken({ name: 'RBrace', pattern: /\}/ }); + +// Arithmetic / comparison operators that appear inside method bodies. +// Treated as plain symbols — the body parser doesn't need their semantics. +export const Plus = createToken({ name: 'Plus', pattern: /\+/ }); +export const Minus = createToken({ name: 'Minus', pattern: /-/ }); +export const Star = createToken({ name: 'Star', pattern: /\*/ }); +export const Slash = createToken({ name: 'Slash', pattern: /\// }); +export const Lt = createToken({ name: 'Lt', pattern: // }); +export const Question = createToken({ name: 'Question', pattern: /\?/ }); +export const At = createToken({ name: 'At', pattern: /@/ }); +export const Ampersand = createToken({ name: 'Ampersand', pattern: /&/ }); + +// ============================================ +// Full token array — lexer dispatch order +// ============================================ + +export const allTokens = [ + // whitespace / comments + WhiteSpace, + Newline, + StarComment, + ABAPDocLine, + LineComment, + + // multi-char symbols before single-char variants + FatArrow, + Arrow, + ColonColon, + + // literals + StringLiteral, + BacktickLiteral, + IntegerLiteral, + + // compound keywords MUST come before their prefixes + ClassMethods, + ClassData, + ClassEvents, + NonUnique, + ReadOnly, + + // core OO + Class, + Interfaces, + Interface, + EndClass, + EndInterface, + EndMethod, + Definition, + Implementation, + Deferred, + Load, + + // visibility + Public, + Protected, + Private, + Section, + + // members + Methods, + Method, + Data, + Types, + Constants, + Events, + Aliases, + + // header modifiers + Inheriting, + From, + For, + Testing, + Risk, + Level, + Duration, + Final, + Abstract, + Create, + + // signature + Importing, + Exporting, + Changing, + Returning, + Raising, + Value, + Optional, + Default, + Redefinition, + + // type refs + Type, + Ref, + To, + Like, + As, + + // table / structure + Begin, + End, + Of, + Standard, + Sorted, + Hashed, + Table, + With, + Key, + Empty, + Unique, + + // Identifier — must come after all keywords + Identifier, + + // single-char symbols + Dot, + Comma, + Colon, + Tilde, + LParen, + RParen, + LBracket, + RBracket, + Eq, + Hash, + Pipe, + LBrace, + RBrace, + Plus, + Minus, + Star, + Slash, + Lt, + Gt, + Question, + At, + Ampersand, +]; + +export const AclassLexer = new Lexer(allTokens, { + // Track newline positions so error locations have line/column. + positionTracking: 'full', +}); diff --git a/packages/aclass/tests/assert.test.ts b/packages/aclass/tests/assert.test.ts new file mode 100644 index 00000000..7d729847 --- /dev/null +++ b/packages/aclass/tests/assert.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from 'vitest'; +import { assertCleanParse, AclassParseError } from '../src/assert'; + +describe('assertCleanParse', () => { + it('returns silently on clean input', () => { + expect(() => + assertCleanParse('INTERFACE zif_foo PUBLIC.\nENDINTERFACE.'), + ).not.toThrow(); + }); + + it('throws AclassParseError on lex errors, including the file label', () => { + // `@` inside identifier position is not part of the lex vocabulary — + // … wait, `@` IS a token now. Use something genuinely outside the + // lexer's alphabet instead. + // Control character U+0007 (BEL) is guaranteed outside every token + // pattern, so it's a reliable lex-error trigger. + const bad = 'CLASS zcl_x DEFINITION\u0007 PUBLIC.\nENDCLASS.'; + expect(() => assertCleanParse(bad, 'demo.clas.abap')).toThrow( + AclassParseError, + ); + try { + assertCleanParse(bad, 'demo.clas.abap'); + } catch (e) { + expect(e).toBeInstanceOf(AclassParseError); + const err = e as AclassParseError; + expect(err.message).toContain('demo.clas.abap'); + expect(err.errors.length).toBeGreaterThan(0); + expect(err.errors[0].severity).toBe('error'); + } + }); + + it('default fileLabel is ``', () => { + try { + assertCleanParse('unknown\u0007garbage.'); + } catch (e) { + expect((e as Error).message).toContain(''); + } + }); +}); diff --git a/packages/aclass/tests/fixtures.test.ts b/packages/aclass/tests/fixtures.test.ts new file mode 100644 index 00000000..3eb02a21 --- /dev/null +++ b/packages/aclass/tests/fixtures.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest'; +import { readFileSync, readdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { dirname } from 'node:path'; +import { parse } from '../src/parser'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const petstore3Dir = join( + __dirname, + '../../../samples/petstore3-client/generated/abapgit/src', +); + +function listAbapFiles(): { name: string; path: string; source: string }[] { + return readdirSync(petstore3Dir) + .filter((f) => f.endsWith('.abap')) + .sort() + .map((f) => ({ + name: f, + path: join(petstore3Dir, f), + source: readFileSync(join(petstore3Dir, f), 'utf8'), + })); +} + +describe('petstore3 corpus — parser must accept every file the generator emits', () => { + const files = listAbapFiles(); + + it.each(files)( + 'parses $name with no lex errors and at least one top-level definition', + ({ source, name }) => { + const { ast, errors } = parse(source); + // Zero tolerance: the fixture contract is "these generated files + // parse cleanly". Checking only lex errors would let new + // diagnostics (missing ENDCLASS, unknown shape in a class body, + // …) slip through unnoticed; gate on the full errors array + // instead. + expect(errors, `parse errors in ${name}`).toEqual([]); + expect( + ast.definitions.length, + `${name} yielded zero top-level definitions`, + ).toBeGreaterThan(0); + }, + ); + + it('every file exposes expected top-level kinds', () => { + const map = new Map(); + for (const { name, source } of files) { + const kinds = parse(source).ast.definitions.map((d) => d.kind); + map.set(name, kinds); + } + // Sanity: zcl_petstore3.clas.abap has a DEFINITION and an + // IMPLEMENTATION; zif_* files have exactly one InterfaceDef; zcx_* + // has a DEFINITION and an IMPLEMENTATION. + expect(map.get('zif_petstore3.intf.abap')).toEqual(['InterfaceDef']); + expect(map.get('zif_petstore3_types.intf.abap')).toEqual(['InterfaceDef']); + + const zcl = map.get('zcl_petstore3.clas.abap') ?? []; + expect(zcl).toContain('ClassDef'); + expect(zcl).toContain('ClassImpl'); + + const zcx = map.get('zcx_petstore3_error.clas.abap') ?? []; + expect(zcx).toContain('ClassDef'); + expect(zcx).toContain('ClassImpl'); + }); +}); diff --git a/packages/aclass/tests/lex.test.ts b/packages/aclass/tests/lex.test.ts new file mode 100644 index 00000000..b1b7be95 --- /dev/null +++ b/packages/aclass/tests/lex.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'vitest'; +import { tokenize } from '../src/lex'; +import * as T from '../src/tokens'; + +/** Helper: return the human-readable token names produced by the lexer. */ +function tokenNames(src: string): string[] { + const { tokens, errors } = tokenize(src); + expect(errors).toEqual([]); + return tokens.map((t) => t.tokenType.name); +} + +describe('AclassLexer — Wave 0 smoke coverage', () => { + it('tokenises a minimal class header + ENDCLASS', () => { + const src = 'CLASS zcl_foo DEFINITION PUBLIC FINAL.\nENDCLASS.'; + expect(tokenNames(src)).toEqual([ + 'Class', + 'Identifier', + 'Definition', + 'Public', + 'Final', + 'Dot', + 'EndClass', + 'Dot', + ]); + }); + + it('is case-insensitive', () => { + const lower = tokenNames('class zcl_foo definition.\nendclass.'); + const upper = tokenNames('CLASS zcl_foo DEFINITION.\nENDCLASS.'); + expect(lower).toEqual(upper); + }); + + it('distinguishes CLASS-DATA / CLASS-METHODS / CLASS-EVENTS from CLASS', () => { + expect(tokenNames('CLASS-DATA mv_x TYPE i.')).toEqual([ + 'ClassData', + 'Identifier', + 'Type', + 'Identifier', + 'Dot', + ]); + expect(tokenNames('CLASS-METHODS foo.')).toEqual([ + 'ClassMethods', + 'Identifier', + 'Dot', + ]); + expect(tokenNames('CLASS-EVENTS bar.')).toEqual([ + 'ClassEvents', + 'Identifier', + 'Dot', + ]); + }); + + it('distinguishes INTERFACES (plural, member) from INTERFACE (definition keyword)', () => { + expect(tokenNames('INTERFACES zif_foo.')).toEqual([ + 'Interfaces', + 'Identifier', + 'Dot', + ]); + expect(tokenNames('INTERFACE zif_foo PUBLIC.')).toEqual([ + 'Interface', + 'Identifier', + 'Public', + 'Dot', + ]); + }); + + it('distinguishes NON-UNIQUE from UNIQUE', () => { + expect(tokenNames('WITH NON-UNIQUE KEY field.')).toEqual([ + 'With', + 'NonUnique', + 'Key', + 'Identifier', + 'Dot', + ]); + expect(tokenNames('WITH UNIQUE KEY field.')).toEqual([ + 'With', + 'Unique', + 'Key', + 'Identifier', + 'Dot', + ]); + }); + + it('captures ABAPDocLine comments but skips regular line comments', () => { + const { tokens } = tokenize( + [ + '"! API client', + '" internal note', + 'CLASS zcl_foo DEFINITION.', + 'ENDCLASS.', + '', + ].join('\n'), + ); + const names = tokens.map((t) => t.tokenType.name); + expect(names).toContain('ABAPDocLine'); + // Plain line-comment is skipped, so LineComment must not appear. + expect(names).not.toContain('LineComment'); + }); + + it('skips star-comments that start at column 1', () => { + const { tokens } = tokenize( + ['* generated header', 'CLASS zcl_foo DEFINITION.', 'ENDCLASS.'].join( + '\n', + ), + ); + expect(tokens[0].tokenType.name).toBe('Class'); + }); + + it('does NOT treat a mid-line asterisk as a star-comment', () => { + // A mid-line `*` (not at column 1) must tokenise as a Star symbol, + // NOT swallow the rest of the line as a comment. The tokens AFTER + // the `*` must still be visible to the parser. + const { tokens, errors } = tokenize('CLASS zcl. *trailing.'); + expect(errors).toEqual([]); + const names = tokens.map((t) => t.tokenType.name); + // Must contain Star followed by `trailing` identifier — proving the + // `*` didn't swallow the rest of the line. + expect(names).toContain('Star'); + const starIdx = names.indexOf('Star'); + expect(names[starIdx + 1]).toBe('Identifier'); + expect(tokens[starIdx + 1].image).toBe('trailing'); + }); + + it('tokenises static and instance access operators', () => { + expect(tokenNames('cl_foo=>bar( ).')).toEqual([ + 'Identifier', + 'FatArrow', + 'Identifier', + 'LParen', + 'RParen', + 'Dot', + ]); + expect(tokenNames('me->baz( ).')).toEqual([ + 'Identifier', + 'Arrow', + 'Identifier', + 'LParen', + 'RParen', + 'Dot', + ]); + }); + + it('tokenises qualified interface members with tilde', () => { + expect(tokenNames('ALIASES save FOR zif_io~save.')).toEqual([ + 'Aliases', + 'Identifier', + 'For', + 'Identifier', + 'Tilde', + 'Identifier', + 'Dot', + ]); + }); + + it('tokenises method signature keywords', () => { + const src = + 'METHODS get IMPORTING pet_id TYPE string RETURNING VALUE(r) TYPE i RAISING zcx_err.'; + expect(tokenNames(src)).toEqual([ + 'Methods', + 'Identifier', + 'Importing', + 'Identifier', + 'Type', + 'Identifier', + 'Returning', + 'Value', + 'LParen', + 'Identifier', + 'RParen', + 'Type', + 'Identifier', + 'Raising', + 'Identifier', + 'Dot', + ]); + }); + + it('exposes all tokens through the public `tokens` namespace', () => { + expect(T.AclassLexer).toBeDefined(); + expect(T.allTokens.length).toBeGreaterThan(40); + expect(T.Class.name).toBe('Class'); + expect(T.Interface.name).toBe('Interface'); + expect(T.EndClass.name).toBe('EndClass'); + }); +}); diff --git a/packages/aclass/tests/parse-interface.test.ts b/packages/aclass/tests/parse-interface.test.ts new file mode 100644 index 00000000..4bbeea9e --- /dev/null +++ b/packages/aclass/tests/parse-interface.test.ts @@ -0,0 +1,405 @@ +import { describe, it, expect } from 'vitest'; +import { parse } from '../src/parser'; +import type { InterfaceDef, MethodDecl, TypeDecl } from '../src/ast'; + +function firstInterface(src: string): InterfaceDef { + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const def = ast.definitions[0]; + expect(def.kind).toBe('InterfaceDef'); + return def as InterfaceDef; +} + +describe('parse — INTERFACE declarations', () => { + it('parses a public empty interface', () => { + const i = firstInterface('INTERFACE zif_foo PUBLIC.\nENDINTERFACE.'); + expect(i.name).toBe('zif_foo'); + expect(i.isPublic).toBe(true); + expect(i.members).toEqual([]); + }); + + it('parses a simple TYPES alias member', () => { + const i = firstInterface( + 'INTERFACE zif_foo PUBLIC.\n' + ' TYPES id TYPE i.\n' + 'ENDINTERFACE.', + ); + const td = i.members[0] as TypeDecl; + expect(td.kind).toBe('TypeDecl'); + expect(td.name).toBe('id'); + expect(td.shape.kind).toBe('alias'); + if (td.shape.kind === 'alias') { + expect(td.shape.type.kind).toBe('BuiltinTypeRef'); + expect(td.shape.type.source).toBe('i'); + } + }); + + it('parses a STANDARD TABLE OF qualified type', () => { + const i = firstInterface( + 'INTERFACE zif_foo PUBLIC.\n' + + ' TYPES pet_list TYPE STANDARD TABLE OF zif_bar=>pet WITH DEFAULT KEY.\n' + + 'ENDINTERFACE.', + ); + const td = i.members[0] as TypeDecl; + expect(td.shape.kind).toBe('alias'); + if (td.shape.kind !== 'alias') return; + expect(td.shape.type.kind).toBe('TableTypeRef'); + if (td.shape.type.kind !== 'TableTypeRef') return; + expect(td.shape.type.tableKind).toBe('standard'); + expect(td.shape.type.row.source).toBe('zif_bar=>pet'); + expect(td.shape.type.keyClause).toContain('WITH DEFAULT KEY'); + }); + + it('parses METHODS with IMPORTING + RETURNING + RAISING', () => { + const src = [ + 'INTERFACE zif_foo PUBLIC.', + ' METHODS get', + ' IMPORTING pet_id TYPE int8', + ' RETURNING VALUE(pet) TYPE zif_bar=>pet', + ' RAISING zcx_err.', + 'ENDINTERFACE.', + ].join('\n'); + const i = firstInterface(src); + const m = i.members[0] as MethodDecl; + expect(m.kind).toBe('MethodDecl'); + expect(m.name).toBe('get'); + expect(m.importing).toHaveLength(1); + expect(m.importing[0].name).toBe('pet_id'); + expect(m.importing[0].type.source).toBe('int8'); + expect(m.returning?.name).toBe('pet'); + expect(m.returning?.isValue).toBe(true); + expect(m.returning?.type.source).toBe('zif_bar=>pet'); + expect(m.raising).toEqual(['zcx_err']); + }); + + it('parses OPTIONAL parameters', () => { + const src = [ + 'INTERFACE zif_foo PUBLIC.', + ' METHODS del', + ' IMPORTING', + ' pet_id TYPE int8', + ' api_key TYPE string OPTIONAL.', + 'ENDINTERFACE.', + ].join('\n'); + const i = firstInterface(src); + const m = i.members[0] as MethodDecl; + expect(m.importing).toHaveLength(2); + expect(m.importing[0].isOptional).toBe(false); + expect(m.importing[1].name).toBe('api_key'); + expect(m.importing[1].isOptional).toBe(true); + }); + + it('captures ABAPDoc on members', () => { + const src = [ + 'INTERFACE zif_foo PUBLIC.', + ' "! @openapi-operation addPet', + ' "! Add a new pet.', + ' METHODS add_pet', + ' IMPORTING body TYPE string.', + 'ENDINTERFACE.', + ].join('\n'); + const i = firstInterface(src); + const m = i.members[0] as MethodDecl; + expect(m.abapDoc).toEqual(['@openapi-operation addPet', 'Add a new pet.']); + }); + + it('parses BEGIN OF / END OF structure inside interface', () => { + const src = [ + 'INTERFACE zif_t PUBLIC.', + ' TYPES: BEGIN OF pet,', + ' id TYPE int8,', + ' name TYPE string,', + ' END OF pet.', + 'ENDINTERFACE.', + ].join('\n'); + const i = firstInterface(src); + const td = i.members[0] as TypeDecl; + expect(td.kind).toBe('TypeDecl'); + expect(td.name).toBe('pet'); + expect(td.shape.kind).toBe('structure'); + if (td.shape.kind !== 'structure') return; + expect(td.shape.fields).toHaveLength(2); + expect(td.shape.fields[0].name).toBe('id'); + expect(td.shape.fields[0].type.source).toBe('int8'); + expect(td.shape.fields[1].name).toBe('name'); + expect(td.shape.fields[1].type.source).toBe('string'); + }); +}); + +describe('parse — CLASS declarations', () => { + it('parses class header with FINAL / INHERITING / CREATE', () => { + const src = [ + 'CLASS zcx_err DEFINITION PUBLIC FINAL INHERITING FROM cx_static_check CREATE PUBLIC.', + ' PUBLIC SECTION.', + ' DATA status TYPE i READ-ONLY.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const cls = ast.definitions[0]; + expect(cls.kind).toBe('ClassDef'); + if (cls.kind !== 'ClassDef') return; + expect(cls.name).toBe('zcx_err'); + expect(cls.isFinal).toBe(true); + expect(cls.superClass).toBe('cx_static_check'); + expect(cls.createVisibility).toBe('public'); + expect(cls.sections).toHaveLength(1); + const sec = cls.sections[0]; + expect(sec.visibility).toBe('public'); + const attr = sec.members[0]; + expect(attr.kind).toBe('AttributeDecl'); + if (attr.kind !== 'AttributeDecl') return; + expect(attr.name).toBe('status'); + expect(attr.isReadOnly).toBe(true); + }); + + it('parses CLASS IMPLEMENTATION with method bodies preserved verbatim', () => { + const src = [ + 'CLASS zcl_foo IMPLEMENTATION.', + ' METHOD constructor.', + ' super->constructor( ).', + ' me->x = 1.', + ' ENDMETHOD.', + ' METHOD ping.', + ' RETURN.', + ' ENDMETHOD.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const impl = ast.definitions[0]; + expect(impl.kind).toBe('ClassImpl'); + if (impl.kind !== 'ClassImpl') return; + expect(impl.methods).toHaveLength(2); + expect(impl.methods[0].name).toBe('constructor'); + expect(impl.methods[0].body).toContain('super->constructor( ).'); + expect(impl.methods[0].body).toContain('me->x = 1.'); + expect(impl.methods[1].name).toBe('ping'); + expect(impl.methods[1].body).toContain('RETURN.'); + }); + + it('parses an INTERFACES member statement', () => { + const src = [ + 'CLASS zcl_foo DEFINITION PUBLIC FINAL CREATE PUBLIC.', + ' PUBLIC SECTION.', + ' INTERFACES zif_petstore3.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const cls = ast.definitions[0]; + if (cls.kind !== 'ClassDef') return; + const stmt = cls.sections[0].members[0]; + expect(stmt.kind).toBe('InterfaceStmt'); + if (stmt.kind !== 'InterfaceStmt') return; + expect(stmt.name).toBe('zif_petstore3'); + }); + + it('parses EVENTS into a typed EventDecl node', () => { + const src = [ + 'CLASS zcl_foo DEFINITION PUBLIC FINAL CREATE PUBLIC.', + ' PUBLIC SECTION.', + ' EVENTS changed EXPORTING VALUE(payload) TYPE string.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const cls = ast.definitions[0]; + if (cls.kind !== 'ClassDef') return; + const ev = cls.sections[0].members[0]; + expect(ev.kind).toBe('EventDecl'); + if (ev.kind !== 'EventDecl') return; + expect(ev.name).toBe('changed'); + expect(ev.isClassEvent).toBe(false); + expect(ev.exporting).toHaveLength(1); + expect(ev.exporting[0].name).toBe('payload'); + expect(ev.exporting[0].isValue).toBe(true); + expect(ev.exporting[0].type.source).toBe('string'); + }); + + it('preserves genuinely-unknown member statements as RawMember', () => { + const src = [ + 'CLASS zcl_foo DEFINITION PUBLIC FINAL CREATE PUBLIC.', + ' PUBLIC SECTION.', + ' WILDCARD something_unrecognised_by_mvp.', // truly outside MVP grammar + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const cls = ast.definitions[0]; + if (cls.kind !== 'ClassDef') return; + const raw = cls.sections[0].members[0]; + expect(raw.kind).toBe('RawMember'); + if (raw.kind !== 'RawMember') return; + expect(raw.source).toContain('WILDCARD'); + }); +}); + +describe('parse — error handling', () => { + it('returns errors instead of throwing for unrecognised top-level tokens', () => { + const { errors } = parse('GARBAGE.'); + expect(errors.length).toBeGreaterThan(0); + expect(errors[0].severity).toBe('error'); + }); + + it('never throws even for empty input', () => { + const { ast, errors } = parse(''); + expect(ast.definitions).toEqual([]); + expect(errors).toEqual([]); + }); + + it('is robust against a missing ENDCLASS, producing a best-effort AST + a diagnostic', () => { + const { ast, errors } = parse( + 'CLASS zcl_x DEFINITION PUBLIC.\n PUBLIC SECTION.\n', + ); + // Doesn't throw; produces at least the opening ClassDef. + expect(ast.definitions.length).toBeGreaterThan(0); + // ENDCLASS never arrives, so the parser MUST emit at least one + // diagnostic pointing at the truncated input. An empty errors array + // here would mean we silently lost information. + expect(errors.length).toBeGreaterThan(0); + expect(errors[0].severity).toBe('error'); + }); +}); + +describe('parse — ABAP keywords used as names', () => { + it('accepts a parameter named `data` (keyword-as-name)', () => { + const src = [ + 'INTERFACE zif_x PUBLIC.', + ' METHODS foo IMPORTING data TYPE i.', + 'ENDINTERFACE.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const iface = ast.definitions[0]; + if (iface.kind !== 'InterfaceDef') throw new Error('expected InterfaceDef'); + const m = iface.members[0]; + if (m.kind !== 'MethodDecl') throw new Error('expected MethodDecl'); + expect(m.importing[0].name).toBe('data'); + expect(m.importing[0].type.source).toBe('i'); + }); + + it('accepts REF TO ', () => { + const src = [ + 'INTERFACE zif_x PUBLIC.', + ' TYPES generic_ref TYPE REF TO data.', + 'ENDINTERFACE.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const iface = ast.definitions[0]; + if (iface.kind !== 'InterfaceDef') throw new Error('expected InterfaceDef'); + const td = iface.members[0]; + if (td.kind !== 'TypeDecl') throw new Error('expected TypeDecl'); + if (td.shape.kind !== 'alias') throw new Error('expected alias'); + expect(td.shape.type.kind).toBe('RefToTypeRef'); + if (td.shape.type.kind !== 'RefToTypeRef') return; + expect(td.shape.type.target.source).toBe('data'); + }); + + it('accepts a structure field named after an ABAP keyword', () => { + const src = [ + 'INTERFACE zif_x PUBLIC.', + ' TYPES: BEGIN OF row,', + ' type TYPE string,', + ' data TYPE i,', + ' value TYPE string,', + ' END OF row.', + 'ENDINTERFACE.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const iface = ast.definitions[0]; + if (iface.kind !== 'InterfaceDef') throw new Error('expected InterfaceDef'); + const td = iface.members[0]; + if (td.kind !== 'TypeDecl') throw new Error('expected TypeDecl'); + if (td.shape.kind !== 'structure') throw new Error('expected structure'); + const fields = td.shape.fields.map((f) => f.name); + expect(fields).toEqual(['type', 'data', 'value']); + }); + + it('accepts qualified type references where either side is a keyword', () => { + const src = [ + 'INTERFACE zif_x PUBLIC.', + ' TYPES foo TYPE zif_y=>data.', + 'ENDINTERFACE.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const iface = ast.definitions[0]; + if (iface.kind !== 'InterfaceDef') throw new Error('expected InterfaceDef'); + const td = iface.members[0]; + if (td.kind !== 'TypeDecl' || td.shape.kind !== 'alias') return; + expect(td.shape.type.source).toBe('zif_y=>data'); + }); +}); + +describe('parse — MethodImpl.bodySpan', () => { + it('bodySpan.startLine points at the first line of the method body, not at METHOD', () => { + const src = [ + 'CLASS zcl_x IMPLEMENTATION.', + ' METHOD foo.', // line 2 + ' RETURN.', // line 3 — this is where bodySpan should start + ' ENDMETHOD.', + 'ENDCLASS.', + ].join('\n'); + const { ast } = parse(src); + const impl = ast.definitions.find((d) => d.kind === 'ClassImpl'); + if (impl?.kind !== 'ClassImpl') throw new Error('expected ClassImpl'); + const m = impl.methods[0]; + expect(m.span.startLine).toBe(2); // METHOD keyword line + expect(m.bodySpan.startLine).toBe(3); // body content line + }); + + it('empty method body still produces a valid bodySpan', () => { + const src = [ + 'CLASS zcl_x IMPLEMENTATION.', + ' METHOD noop.', + ' ENDMETHOD.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const impl = ast.definitions.find((d) => d.kind === 'ClassImpl'); + if (impl?.kind !== 'ClassImpl') throw new Error('expected ClassImpl'); + const m = impl.methods[0]; + // Empty body span must still be well-ordered (end >= start - 1 is the + // degenerate empty-range case; start should never land past end of file). + expect(m.bodySpan.startOffset).toBeLessThanOrEqual( + m.bodySpan.endOffset + 1, + ); + expect(m.body.trim()).toBe(''); + }); +}); + +describe('parse — qualified method names', () => { + it('preserves `zif_foo~bar` on METHOD IMPLEMENTATIONS', () => { + const src = [ + 'CLASS zcl_x IMPLEMENTATION.', + ' METHOD zif_petstore3~add_pet.', + ' RETURN.', + ' ENDMETHOD.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const impl = ast.definitions.find((d) => d.kind === 'ClassImpl'); + if (impl?.kind !== 'ClassImpl') throw new Error('expected ClassImpl'); + expect(impl.methods[0].name).toBe('zif_petstore3~add_pet'); + }); + + it('preserves `zif_foo~bar REDEFINITION` on METHODS declarations', () => { + const src = [ + 'CLASS zcl_x DEFINITION PUBLIC FINAL CREATE PUBLIC.', + ' PUBLIC SECTION.', + ' METHODS zif_foo~bar REDEFINITION.', + 'ENDCLASS.', + ].join('\n'); + const { ast, errors } = parse(src); + expect(errors).toEqual([]); + const cls = ast.definitions[0]; + if (cls.kind !== 'ClassDef') throw new Error('expected ClassDef'); + const m = cls.sections[0].members[0]; + if (m.kind !== 'MethodDecl') throw new Error('expected MethodDecl'); + expect(m.name).toBe('zif_foo~bar'); + expect(m.isRedefinition).toBe(true); + }); +}); diff --git a/packages/aclass/tests/roundtrip.test.ts b/packages/aclass/tests/roundtrip.test.ts new file mode 100644 index 00000000..3581c403 --- /dev/null +++ b/packages/aclass/tests/roundtrip.test.ts @@ -0,0 +1,102 @@ +import { describe, expect, it } from 'vitest'; +import { readFileSync, readdirSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { parse } from '../src/parser'; +import type { ClassDef, InterfaceDef, MethodDecl } from '../src/ast'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const petstore3Dir = join( + __dirname, + '../../../samples/petstore3-client/generated/abapgit/src', +); + +function file(name: string): string { + return readFileSync(join(petstore3Dir, name), 'utf8'); +} + +describe('roundtrip — AST captures every declaration the generator emits', () => { + it('zif_petstore3 interface declares 19 methods matching the operation count', () => { + const src = file('zif_petstore3.intf.abap'); + const { ast } = parse(src); + const iface = ast.definitions[0] as InterfaceDef; + const methods = iface.members.filter( + (m): m is MethodDecl => m.kind === 'MethodDecl', + ); + // petstore3 OpenAPI has 19 operations in scope of the generator. + expect(methods).toHaveLength(19); + // Every method has a name. + for (const m of methods) { + expect(m.name.length).toBeGreaterThan(0); + expect(m.raising.length).toBeGreaterThan(0); + } + }); + + it('zif_petstore3_types interface exposes the expected schema set', () => { + const src = file('zif_petstore3_types.intf.abap'); + const { ast } = parse(src); + const iface = ast.definitions[0] as InterfaceDef; + const typeDecls = iface.members.filter((m) => m.kind === 'TypeDecl'); + // 6 top-level schemas: Order, Category, User, Tag, Pet, ApiResponse. + expect(typeDecls).toHaveLength(6); + const names = typeDecls.map((t) => (t as { name: string }).name); + expect(names).toEqual([ + 'order', + 'category', + 'user', + 'tag', + 'pet', + 'api_response', + ]); + }); + + it('zcx_petstore3_error inherits from cx_static_check and is FINAL', () => { + const src = file('zcx_petstore3_error.clas.abap'); + const { ast } = parse(src); + const def = ast.definitions.find((d) => d.kind === 'ClassDef') as ClassDef; + expect(def.isFinal).toBe(true); + expect(def.superClass).toBe('cx_static_check'); + const pub = def.sections.find((s) => s.visibility === 'public'); + expect(pub).toBeDefined(); + }); + + it('zcl_petstore3 class has a DEFINITION + IMPLEMENTATION pair with 19 methods', () => { + const src = file('zcl_petstore3.clas.abap'); + const { ast } = parse(src); + const defs = ast.definitions; + const def = defs.find((d) => d.kind === 'ClassDef') as ClassDef; + const impl = defs.find((d) => d.kind === 'ClassImpl'); + expect(def).toBeDefined(); + expect(impl).toBeDefined(); + if (impl?.kind !== 'ClassImpl') throw new Error('expected ClassImpl'); + // 1 constructor + 19 operations = 20 implementations. + expect(impl.methods.length).toBeGreaterThanOrEqual(19); + const names = impl.methods.map((m) => m.name); + expect(names).toContain('constructor'); + }); + + it('idempotence: parse(src) twice yields equivalent ASTs', () => { + const files = readdirSync(petstore3Dir).filter((f) => f.endsWith('.abap')); + for (const f of files) { + const src = readFileSync(join(petstore3Dir, f), 'utf8'); + const a = parse(src); + const b = parse(src); + // Both yield the same number and kind of top-level defs. + expect(b.ast.definitions.length, f).toBe(a.ast.definitions.length); + for (let i = 0; i < a.ast.definitions.length; i++) { + expect(b.ast.definitions[i].kind, f).toBe(a.ast.definitions[i].kind); + } + } + }); + + it('method-body preservation: every MethodImpl.body is non-empty for zcl_petstore3', () => { + const src = file('zcl_petstore3.clas.abap'); + const impl = parse(src).ast.definitions.find((d) => d.kind === 'ClassImpl'); + if (impl?.kind !== 'ClassImpl') throw new Error('expected ClassImpl'); + for (const m of impl.methods) { + expect(m.body.length, `${m.name} has empty body`).toBeGreaterThan(0); + // Source appears where we sliced it. + expect(src).toContain(m.body.trimEnd()); + } + }); +}); diff --git a/packages/aclass/tsconfig.json b/packages/aclass/tsconfig.json new file mode 100644 index 00000000..c2104f6b --- /dev/null +++ b/packages/aclass/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "rootDir": "./src", + "outDir": "./dist" + }, + "include": ["src/**/*"] +} diff --git a/packages/aclass/tsdown.config.ts b/packages/aclass/tsdown.config.ts new file mode 100644 index 00000000..ab43cf84 --- /dev/null +++ b/packages/aclass/tsdown.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'tsdown'; +import baseConfig from '../../tsdown.config.ts'; + +export default defineConfig({ + ...baseConfig, + entry: ['src/index.ts'], +}); diff --git a/packages/aclass/vitest.config.ts b/packages/aclass/vitest.config.ts new file mode 100644 index 00000000..8e730d50 --- /dev/null +++ b/packages/aclass/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + }, +}); diff --git a/packages/openai-codegen/package.json b/packages/openai-codegen/package.json index 29e23081..f3c0cc4b 100644 --- a/packages/openai-codegen/package.json +++ b/packages/openai-codegen/package.json @@ -37,7 +37,8 @@ "author": "abapify", "license": "MIT", "devDependencies": { - "@abaplint/core": "^2.118.12" + "@abaplint/core": "^2.118.12", + "@abapify/aclass": "workspace:*" }, "repository": { "type": "git", diff --git a/packages/openai-codegen/tests/aclass-parse-gate.test.ts b/packages/openai-codegen/tests/aclass-parse-gate.test.ts new file mode 100644 index 00000000..b0a685dc --- /dev/null +++ b/packages/openai-codegen/tests/aclass-parse-gate.test.ts @@ -0,0 +1,114 @@ +/** + * CI gate: every generated `.clas.abap` / `.intf.abap` that the + * openai-codegen pipeline ships in `samples/petstore3-client` MUST + * round-trip through `@abapify/aclass` cleanly. + * + * This asserts that our emitter output stays *inside* the structural + * subset of ABAP that `aclass` understands. If someone changes the + * emitter to produce a shape the parser doesn't recognise, this test + * flips red — the fix is either to extend the parser's grammar or to + * revisit the emitter output. + * + * Guards two invariants: + * 1. Zero lex errors (every character the emitter produces is inside + * the lexer's vocabulary). + * 2. Zero unrecognised `RawMember` fallbacks in INTERFACE files. + * Class files may legitimately contain `EVENTS` / behaviour-pool + * constructs outside MVP scope, but interfaces emitted by this + * generator never do. + */ +import { describe, expect, it } from 'vitest'; +import { readFileSync, readdirSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { assertCleanParse, parse } from '@abapify/aclass'; +import type { ClassDef, InterfaceDef } from '@abapify/aclass'; +import { Registry, MemoryFile } from '@abaplint/core'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const petstore3Dir = join( + __dirname, + '../../../samples/petstore3-client/generated/abapgit/src', +); + +describe('openai-codegen × aclass — parse-gate for petstore3 corpus', () => { + const files = readdirSync(petstore3Dir) + .filter((f) => f.endsWith('.abap')) + .map((f) => ({ f, src: readFileSync(join(petstore3Dir, f), 'utf8') })); + + it.each(files)('$f parses cleanly via aclass', ({ f, src }) => { + // `assertCleanParse` throws a labelled AclassParseError on any lex + // or parse error, with file:line pointers in the message. + expect(() => assertCleanParse(src, f)).not.toThrow(); + }); + + it.each(files)( + '$f parses cleanly via @abaplint/core (no parser_error)', + ({ f, src }) => { + // Second opinion: run the same source through abaplint's Registry. + // Gate ONLY on `parser_error` keys — abaplint's default rule set + // includes stylistic rules (`description_empty`, + // `in_statement_indentation`, `global_class` filename check) that + // aren't relevant to the "does this even parse?" question. + const reg = new Registry().addFile(new MemoryFile(f, src)).parse(); + const fatals = reg + .findIssues() + .filter((i) => i.getKey() === 'parser_error') + .map((i) => `${i.getKey()}: ${i.getMessage()}`); + expect(fatals).toEqual([]); + }, + ); + + it.each(files.filter((f) => f.f.endsWith('.intf.abap')))( + '$f: interface body is fully structured (no RawMember fallbacks)', + ({ f, src }) => { + const { ast } = parse(src); + const iface = ast.definitions.find((d) => d.kind === 'InterfaceDef') as + | InterfaceDef + | undefined; + expect(iface, `${f} has no InterfaceDef`).toBeDefined(); + if (!iface) return; + const raws = iface.members.filter((m) => m.kind === 'RawMember'); + expect( + raws.map((r) => ('source' in r ? r.source : '')), + `unrecognised members in ${f}`, + ).toEqual([]); + }, + ); + + it('zif_petstore3.intf.abap exposes the generator-promised method count', () => { + const src = readFileSync( + join(petstore3Dir, 'zif_petstore3.intf.abap'), + 'utf8', + ); + const { ast } = parse(src); + const iface = ast.definitions[0] as InterfaceDef; + const methods = iface.members.filter((m) => m.kind === 'MethodDecl'); + expect(methods.length).toBe(19); + for (const m of methods) { + expect(m.kind === 'MethodDecl' && m.raising.length).toBeGreaterThan(0); + } + }); + + it('zcl_petstore3.clas.abap has a ClassDef + ClassImpl pair', () => { + const src = readFileSync( + join(petstore3Dir, 'zcl_petstore3.clas.abap'), + 'utf8', + ); + const kinds = parse(src).ast.definitions.map((d) => d.kind); + expect(kinds).toContain('ClassDef'); + expect(kinds).toContain('ClassImpl'); + }); + + it('zcx_petstore3_error.clas.abap inherits cx_static_check', () => { + const src = readFileSync( + join(petstore3Dir, 'zcx_petstore3_error.clas.abap'), + 'utf8', + ); + const def = parse(src).ast.definitions.find( + (d) => d.kind === 'ClassDef', + ) as ClassDef; + expect(def.superClass).toBe('cx_static_check'); + expect(def.isFinal).toBe(true); + }); +}); diff --git a/packages/openai-codegen/tsconfig.json b/packages/openai-codegen/tsconfig.json index 2f283f9b..061bbf72 100644 --- a/packages/openai-codegen/tsconfig.json +++ b/packages/openai-codegen/tsconfig.json @@ -5,5 +5,12 @@ "outDir": "./dist" }, "include": ["src/**/*"], - "references": [{ "path": "../abap-ast" }] + "references": [ + { + "path": "../abap-ast" + }, + { + "path": "../aclass" + } + ] } diff --git a/tsconfig.json b/tsconfig.json index 7f2c7429..de915a89 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -110,6 +110,9 @@ }, { "path": "./tools/nx-npm-trust" + }, + { + "path": "./packages/aclass" } ] }