diff --git a/__tests__/systemverilog.test.ts b/__tests__/systemverilog.test.ts new file mode 100644 index 000000000..47ea1e2e1 --- /dev/null +++ b/__tests__/systemverilog.test.ts @@ -0,0 +1,826 @@ +/** + * SystemVerilog / Verilog extraction tests. + * + * Locks in the design-hierarchy mapping (module/instantiation), subroutine-body + * call capture, enum-member emission, and package/import handling. + */ +import { describe, it, expect, beforeAll, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { extractFromSource } from '../src/extraction'; +import { detectLanguage, initGrammars, loadAllGrammars } from '../src/extraction/grammars'; +import { matchReference } from '../src/resolution/name-matcher'; +import { CodeGraph } from '../src'; +import { DatabaseConnection } from '../src/db'; +import type { Node } from '../src/types'; +import type { ResolutionContext, UnresolvedRef } from '../src/resolution/types'; + +beforeAll(async () => { + await initGrammars(); + await loadAllGrammars(); +}); + +describe('SystemVerilog language detection', () => { + it('maps the HDL extensions to systemverilog', () => { + expect(detectLanguage('rtl/fifo.sv')).toBe('systemverilog'); + expect(detectLanguage('pkg/types.svh')).toBe('systemverilog'); + expect(detectLanguage('legacy/core.v')).toBe('systemverilog'); + expect(detectLanguage('inc/defs.vh')).toBe('systemverilog'); + }); +}); + +describe('SystemVerilog extraction', () => { + it('emits module nodes and instantiation edges (instantiates + calls) to the module type', () => { + const code = ` +module leaf(input a, output y); + assign y = ~a; +endmodule +module top(input x, output z); + leaf u_leaf(.a(x), .y(z)); +endmodule +`; + const r = extractFromSource('top.sv', code); + const modules = r.nodes.filter((n) => n.kind === 'module').map((n) => n.name); + expect(modules).toEqual(expect.arrayContaining(['leaf', 'top'])); + + // The instantiation binds to the module TYPE (leaf), not the u_leaf label. + const refs = r.unresolvedReferences.filter((x) => x.referenceName === 'leaf'); + expect(refs.some((x) => x.referenceKind === 'instantiates')).toBe(true); + expect(refs.some((x) => x.referenceKind === 'calls')).toBe(true); + expect(refs.some((x) => x.referenceName === 'u_leaf')).toBe(false); + }); + + it('captures calls made inside subroutine (function/task) bodies', () => { + const code = ` +module m; + function automatic int g(input int v); + return v; + endfunction + task automatic f(input int v); + int t; + t = g(v); + endtask +endmodule +`; + const r = extractFromSource('m.sv', code); + const calls = r.unresolvedReferences.filter( + (x) => x.referenceKind === 'calls' && x.referenceName === 'g' + ); + expect(calls.length).toBeGreaterThan(0); + }); + + it('emits enum members for typedef enums', () => { + const code = ` +package p; + typedef enum logic [1:0] { A, B, C } e_t; +endpackage +`; + const r = extractFromSource('p.sv', code); + expect(r.nodes.some((n) => n.kind === 'enum' && n.name === 'e_t')).toBe(true); + const members = r.nodes.filter((n) => n.kind === 'enum_member').map((n) => n.name); + expect(members).toEqual(expect.arrayContaining(['A', 'B', 'C'])); + }); + + it('maps package to a namespace node and package-import to an import node', () => { + const code = ` +package pkg; + localparam int W = 8; +endpackage +import pkg::*; +module m(input logic clk); +endmodule +`; + const r = extractFromSource('m.sv', code); + expect(r.nodes.some((n) => n.kind === 'namespace' && n.name === 'pkg')).toBe(true); + expect(r.nodes.some((n) => n.kind === 'import' && n.name === 'pkg')).toBe(true); + }); + + it('maps a scalar typedef to a type_alias node (engine path, not the hook)', () => { + const code = ` +package p; + typedef logic [7:0] byte_t; +endpackage +`; + const r = extractFromSource('p.sv', code); + expect(r.nodes.some((n) => n.kind === 'type_alias' && n.name === 'byte_t')).toBe(true); + }); + + it('emits one import per package in a comma-separated import statement', () => { + const code = ` +package a; localparam int X = 1; endpackage +package b; localparam int Y = 2; endpackage +import a::*, b::Y; +module m(input logic clk); +endmodule +`; + const r = extractFromSource('m.sv', code); + const imports = r.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toEqual(expect.arrayContaining(['a', 'b'])); + }); + + it('parses real-world RTL without errors (non-ANSI ports, generate)', () => { + const code = ` +module leaf_na(o, i); + output o; + input i; + assign o = ~i; +endmodule +module gen_top #(parameter int N = 4) (input [3:0] x, output [3:0] y); + genvar gi; + generate + for (gi = 0; gi < N; gi++) begin : g_loop + leaf_na u (.o(y[gi]), .i(x[gi])); + end + endgenerate +endmodule +`; + const r = extractFromSource('gen.sv', code); + const modules = r.nodes.filter((n) => n.kind === 'module').map((n) => n.name); + expect(modules).toEqual(expect.arrayContaining(['leaf_na', 'gen_top'])); + // instantiation inside a generate-for is still attributed to the enclosing module + expect( + r.unresolvedReferences.some( + (x) => x.referenceKind === 'instantiates' && x.referenceName === 'leaf_na' + ) + ).toBe(true); + }); + + it('captures UVM class inheritance, the new constructor, and class-vs-module method scoping', () => { + const code = ` +class base_driver extends uvm_driver #(my_txn); + function new(string name); + super.new(name); + endfunction + virtual function void build_phase(uvm_phase phase); + configure(); + endfunction + function void configure(); + endfunction +endclass +module m; + function int helper(input int v); + return v; + endfunction +endmodule +`; + const r = extractFromSource('uvm.sv', code); + + // The extends clause binds to the base class (the `#(my_txn)` params are ignored). + expect( + r.unresolvedReferences.some( + (x) => x.referenceKind === 'extends' && x.referenceName === 'uvm_driver' + ) + ).toBe(true); + + // `function new` is captured as a method named 'new'. + expect(r.nodes.some((n) => n.kind === 'method' && n.name === 'new')).toBe(true); + + // Class subroutines read as methods ... + const methods = r.nodes.filter((n) => n.kind === 'method').map((n) => n.name); + expect(methods).toEqual(expect.arrayContaining(['new', 'build_phase', 'configure'])); + + // ... while a module-level subroutine stays a function (not mislabeled a method). + expect(r.nodes.some((n) => n.kind === 'function' && n.name === 'helper')).toBe(true); + expect(r.nodes.some((n) => n.kind === 'method' && n.name === 'helper')).toBe(false); + }); + + it('still resolves calls made inside a class method body (method dispatch keeps body walk)', () => { + const code = ` +class c; + virtual function void build_phase(); + configure(); + endfunction + function void configure(); + endfunction +endclass +`; + const r = extractFromSource('c.sv', code); + expect( + r.unresolvedReferences.some( + (x) => x.referenceKind === 'calls' && x.referenceName === 'configure' + ) + ).toBe(true); + }); + + it('binds a package-qualified extends to the base class, not the package scope', () => { + const code = ` +package uvm_pkg; + class uvm_driver #(type T = int); + endclass +endpackage +class d extends uvm_pkg::uvm_driver #(my_txn); +endclass +`; + const r = extractFromSource('d.sv', code); + const ext = r.unresolvedReferences.filter((x) => x.referenceKind === 'extends'); + // the qualifier is preserved (so the resolver can disambiguate by package) and + // the package is NOT mistaken for the base class + expect(ext.some((x) => x.referenceName === 'uvm_pkg::uvm_driver')).toBe(true); + expect(ext.some((x) => x.referenceName === 'uvm_pkg')).toBe(false); + }); + + it('resolves a package-qualified extends to the matching package class under a name collision', () => { + const mk = (id: string, qualifiedName: string, filePath: string): Node => + ({ + id, name: 'Base', kind: 'class', qualifiedName, filePath, + language: 'systemverilog', startLine: 1, endLine: 1, startColumn: 0, endColumn: 0, + } as Node); + const p1Base = mk('P1', 'p1::Base', 'pkg1/p1.sv'); + const p2Base = mk('P2', 'p2::Base', 'pkg2/p2.sv'); + const ctx = { + getNodesByName: (n: string) => (n === 'Base' ? [p1Base, p2Base] : []), + getNodesByQualifiedName: (q: string) => + [p1Base, p2Base].filter((n) => n.qualifiedName === q), + getNodesByKind: () => [], + getNodesInFile: () => [], + getNodesByLowerName: () => [], + fileExists: () => false, + readFile: () => null, + getProjectRoot: () => '', + getAllFiles: () => [], + getImportMappings: () => [], + } as unknown as ResolutionContext; + const ref: UnresolvedRef = { + fromNodeId: 'D', referenceName: 'p2::Base', referenceKind: 'extends', + line: 6, column: 0, filePath: 'tb/d.sv', language: 'systemverilog', + }; + // bare "Base" would tie-break on proximity and could pick p1; the scoped name resolves p2. + expect(matchReference(ref, ctx)?.targetNodeId).toBe('P2'); + }); + + it('extracts out-of-class method/constructor definitions as methods, not loose functions', () => { + const code = ` +class d; + extern function void cfg(); + extern function new(string name); +endclass +function void d::cfg(); +endfunction +function d::new(string name); +endfunction +module m; + function int helper(); + return 0; + endfunction +endmodule +`; + const r = extractFromSource('d.sv', code); + const methods = r.nodes.filter((n) => n.kind === 'method').map((n) => n.name); + // out-of-class `d::cfg` / `d::new` are methods of class d (receiver path) + expect(methods).toEqual(expect.arrayContaining(['cfg', 'new'])); + // the module subroutine stays a plain function ... + expect(r.nodes.some((n) => n.kind === 'function' && n.name === 'helper')).toBe(true); + // ... and the out-of-class defs are NOT also emitted as loose functions + expect(r.nodes.some((n) => n.kind === 'function' && (n.name === 'cfg' || n.name === 'new'))).toBe(false); + }); + + it('resolves an extends reference to a class over a same-named function (resolver kind bias)', () => { + const mk = (id: string, kind: Node['kind'], filePath: string): Node => + ({ + id, name: 'base_c', kind, qualifiedName: 'base_c', filePath, + language: 'systemverilog', startLine: 1, endLine: 1, startColumn: 0, endColumn: 0, + } as Node); + const klass = mk('CLASS', 'class', 'pkg/base.sv'); + const fn = mk('FUNC', 'function', 'rtl/util.sv'); + // function listed FIRST: only the extends kind-bias (not iteration order) can make the class win. + const ctx = { + getNodesByName: (n: string) => (n === 'base_c' ? [fn, klass] : []), + getNodesByQualifiedName: () => [], + getNodesByKind: () => [], + getNodesInFile: () => [], + getNodesByLowerName: () => [], + fileExists: () => false, + readFile: () => null, + getProjectRoot: () => '', + getAllFiles: () => [], + getImportMappings: () => [], + } as unknown as ResolutionContext; + const ref: UnresolvedRef = { + fromNodeId: 'D', referenceName: 'base_c', referenceKind: 'extends', + line: 5, column: 0, filePath: 'tb/d.sv', language: 'systemverilog', + }; + expect(matchReference(ref, ctx)?.targetNodeId).toBe('CLASS'); + }); +}); + +/** + * Inheritance-aware `this.`/`super.` call resolution, end-to-end. + * + * These index a real on-disk fixture and assert the resolved `calls` EDGES + * (not extraction-time refs). The load-bearing case is `super.m()` binding to + * the PARENT's `m` rather than the caller's own — the self-edge trap. + */ +describe('SystemVerilog this./super. call resolution', () => { + let tempDir: string; + let cg: CodeGraph | undefined; + + afterEach(() => { + if (cg) { + cg.destroy(); + cg = undefined; + } else if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + // Index `code` as a single .sv file and run resolution end-to-end. + async function indexSv(code: string): Promise { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-sv-resolve-')); + fs.writeFileSync(path.join(tempDir, 'dut.sv'), code); + const graph = await CodeGraph.init(tempDir, { index: true }); + graph.resolveReferences(); + return graph; + } + + // The method node for `className::methodName` (qualified-name match avoids + // colliding with a same-named method on another class). + function method(graph: CodeGraph, className: string, methodName: string): Node { + const hit = graph + .getNodesByKind('method') + .find((n) => n.name === methodName && n.qualifiedName.includes(className)); + expect(hit, `method ${className}::${methodName}`).toBeDefined(); + return hit!; + } + + function callTargets(graph: CodeGraph, from: Node): string[] { + return graph + .getOutgoingEdges(from.id) + .filter((e) => e.kind === 'calls') + .map((e) => e.target); + } + + it('binds super.m() to the parent class method, not the caller’s own', async () => { + cg = await indexSv(` +class Base; + virtual function void m(); + endfunction +endclass +class Derived extends Base; + virtual function void m(); + super.m(); + this.helper(); + endfunction + function void helper(); + endfunction +endclass +`); + const derivedM = method(cg, 'Derived', 'm'); + const baseM = method(cg, 'Base', 'm'); + const derivedHelper = method(cg, 'Derived', 'helper'); + + const targets = callTargets(cg, derivedM); + + // super.m() resolves to Base::m ... + expect(targets).toContain(baseM.id); + // ... and this.helper() to the enclosing class's helper ... + expect(targets).toContain(derivedHelper.id); + // ... but there is NO self-edge Derived::m -> Derived::m. + expect(targets).not.toContain(derivedM.id); + }); + + it('resolves super.m() across a multi-level chain when the parent lacks m', async () => { + cg = await indexSv(` +class Base; + virtual function void m(); + endfunction +endclass +class Derived extends Base; + virtual function void m(); + endfunction +endclass +class Derived2 extends Derived; + virtual function void run(); + super.m(); + endfunction +endclass +`); + const run = method(cg, 'Derived2', 'run'); + const derivedM = method(cg, 'Derived', 'm'); + const baseM = method(cg, 'Base', 'm'); + + const targets = callTargets(cg, run); + + // Derived2 has no m; super starts at Derived and finds Derived::m first — + // the nearest override wins, so it must NOT skip past to Base::m. + expect(targets).toContain(derivedM.id); + expect(targets).not.toContain(baseM.id); + }); + + it('resolves this.m() up the extends chain to an inherited method', async () => { + cg = await indexSv(` +class Base; + virtual function void shared(); + endfunction +endclass +class Derived extends Base; + virtual function void go(); + this.shared(); + endfunction +endclass +`); + const go = method(cg, 'Derived', 'go'); + const baseShared = method(cg, 'Base', 'shared'); + + // Derived doesn't declare shared(); this. walks up to Base::shared. + expect(callTargets(cg, go)).toContain(baseShared.id); + }); + + // Count this./super. rows still sitting in unresolved_refs after resolution. + // Reads the on-disk db directly (the public API exposes resolved edges, not + // the residual ref table) and closes its own connection so teardown's + // destroy() doesn't trip a Windows file lock. + function lingeringHandleRefs(): number { + const db = DatabaseConnection.open(path.join(tempDir, '.codegraph', 'codegraph.db')); + try { + const rows = db.getDb().prepare( + `SELECT reference_name FROM unresolved_refs WHERE reference_kind = 'calls'` + ).all() as Array<{ reference_name: string }>; + return rows.filter((r) => /^(this|super)\.\w+$/.test(r.reference_name)).length; + } finally { + db.close(); + } + } + + it('drops an unresolvable super.x() (base not indexed): no edge, no lingering ref', async () => { + // `undefined_base` is referenced but never defined here, so super.run() has + // no target. The pass must emit no calls edge AND not leave the ref behind + // to be re-walked on every future sync. + cg = await indexSv(` +class only_child extends undefined_base; + virtual function void run(); + super.run(); + endfunction +endclass +`); + const run = method(cg, 'only_child', 'run'); + // No spurious calls edge from the unresolvable super.run(). + expect(callTargets(cg, run)).toHaveLength(0); + // And the ref is gone — resolved-or-dropped, never perpetually pending. + expect(lingeringHandleRefs()).toBe(0); + }); + + it('terminates on a cyclic extends chain without spurious edges', async () => { + // A <-> B mutual inheritance is illegal SV, but a malformed index must not + // hang the resolver. The visited-guard breaks the cycle; m() exists nowhere + // in the (broken) chain, so super.m() binds to nothing. + cg = await indexSv(` +class A extends B; + virtual function void go(); + super.m(); + endfunction +endclass +class B extends A; +endclass +`); + const go = method(cg, 'A', 'go'); + expect(callTargets(cg, go)).toHaveLength(0); + expect(lingeringHandleRefs()).toBe(0); + }); +}); + +/** + * Class-composition (`has-a`) edges, end-to-end. A class field of a user-class + * type yields a `references` edge class->field-type, giving the UVM testbench + * topology (test has-an env, env has-an agent, ...). Builtin-typed and + * self-typed fields produce no edge. + */ +describe('SystemVerilog class-composition (has-a) edges', () => { + let tempDir: string; + let cg: CodeGraph | undefined; + + afterEach(() => { + if (cg) { + cg.destroy(); + cg = undefined; + } else if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + async function indexSv(code: string): Promise { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-sv-compose-')); + fs.writeFileSync(path.join(tempDir, 'tb.sv'), code); + const graph = await CodeGraph.init(tempDir, { index: true }); + graph.resolveReferences(); + return graph; + } + + function classNode(graph: CodeGraph, name: string): Node { + const hit = graph.getNodesByKind('class').find((n) => n.name === name); + expect(hit, `class ${name}`).toBeDefined(); + return hit!; + } + + // Qualified-name strings of every `references`-edge target from a class. + function refTargets(graph: CodeGraph, from: Node): string[] { + return graph + .getOutgoingEdges(from.id) + .filter((e) => e.kind === 'references') + .map((e) => graph.getNode(e.target)?.qualifiedName ?? e.target); + } + + // `references` rows still pending in unresolved_refs (used to prove a builtin + // field left nothing behind). Closes its own db connection for clean teardown. + function lingeringRefs(): number { + const db = DatabaseConnection.open(path.join(tempDir, '.codegraph', 'codegraph.db')); + try { + const row = db.getDb().prepare( + `SELECT COUNT(*) AS c FROM unresolved_refs WHERE reference_kind = 'references'` + ).get() as { c: number }; + return row.c; + } finally { + db.close(); + } + } + + it('emits a class->field-type references edge (deduped across multiple fields)', async () => { + cg = await indexSv(` +class Sub; +endclass +class Top; + Sub a; + Sub b; +endclass +`); + const top = classNode(cg, 'Top'); + const sub = classNode(cg, 'Sub'); + expect(cg.getOutgoingEdges(top.id).some((e) => e.kind === 'references' && e.target === sub.id)).toBe(true); + }); + + it('does not emit a self-reference for a self-typed field', async () => { + cg = await indexSv(` +class Node1; + Node1 nxt; +endclass +`); + const node1 = classNode(cg, 'Node1'); + // A class pointing at itself is noise in a has-a graph — must be dropped. + expect(cg.getOutgoingEdges(node1.id).some((e) => e.kind === 'references' && e.target === node1.id)).toBe(false); + }); + + it('ignores builtin-typed fields: no edge, no lingering reference', async () => { + cg = await indexSv(` +class HasPrimitives; + int count; + string name; +endclass +`); + const c = classNode(cg, 'HasPrimitives'); + expect(refTargets(cg, c)).toHaveLength(0); + // int/string never become a ref at all, so nothing lingers unresolved. + expect(lingeringRefs()).toBe(0); + }); + + it('binds a package-qualified field type to the scoped class', async () => { + cg = await indexSv(` +package pkg; + class Base; + endclass +endpackage +class Holder; + pkg::Base h; +endclass +`); + const holder = classNode(cg, 'Holder'); + // The qualifier is preserved so the resolver lands on pkg::Base specifically. + expect(refTargets(cg, holder)).toContain('pkg::Base'); + }); + + it('emits an edge for fields behind property qualifiers (rand/local/protected/const)', async () => { + // UVM fields are overwhelmingly `rand ` / `local`/`protected `. + // Most qualifiers parse as a sibling node before the data_declaration; a + // `const` member instead exposes data_type directly (no data_declaration + // wrapper). The type extractor must handle both. Distinct types per + // qualifier ensure a miss can't be masked by dedup against the plain field. + cg = await indexSv(` +class qa; endclass +class qb; endclass +class qc; endclass +class qd; endclass +class qe; endclass +class qf; endclass +class holder; + qa plain_h; + rand qb rand_h; + local qc local_h; + protected qd prot_h; + rand local qe rl_h; + const qf const_h; +endclass +`); + const holder = classNode(cg, 'holder'); + const targets = refTargets(cg, holder); + for (const ty of ['qa', 'qb', 'qc', 'qd', 'qe', 'qf']) { + expect(targets, `holder should reference ${ty}`).toContain(ty); + } + }); +}); + +/** + * UVM factory-create composition + TLM-connect dataflow, end-to-end. A + * `h = T::type_id::create(...)` yields a class->T `references` edge and feeds a + * per-class handle->component map; an `a.b.connect(c.d)` resolves both dotted + * chains through that map and emits a component->component dataflow edge. + */ +describe('SystemVerilog factory-create + TLM-connect (dataflow)', () => { + let tempDir: string; + let cg: CodeGraph | undefined; + + afterEach(() => { + if (cg) { + cg.destroy(); + cg = undefined; + } else if (tempDir && fs.existsSync(tempDir)) { + fs.rmSync(tempDir, { recursive: true, force: true }); + } + }); + + async function indexSv(code: string): Promise { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-sv-tlm-')); + fs.writeFileSync(path.join(tempDir, 'tb.sv'), code); + const graph = await CodeGraph.init(tempDir, { index: true }); + graph.resolveReferences(); + return graph; + } + + function classNode(graph: CodeGraph, name: string): Node { + const hit = graph.getNodesByKind('class').find((n) => n.name === name); + expect(hit, `class ${name}`).toBeDefined(); + return hit!; + } + + function refTargetNames(graph: CodeGraph, from: Node): string[] { + return graph + .getOutgoingEdges(from.id) + .filter((e) => e.kind === 'references') + .map((e) => graph.getNode(e.target)?.name ?? e.target); + } + + // Any `__sv_*__` marker still pending after resolution (must be zero). + function lingeringMarkers(): number { + const db = DatabaseConnection.open(path.join(tempDir, '.codegraph', 'codegraph.db')); + try { + const row = db.getDb().prepare( + `SELECT COUNT(*) AS c FROM unresolved_refs WHERE reference_name LIKE '__sv\\_%' ESCAPE '\\'` + ).get() as { c: number }; + return row.c; + } finally { + db.close(); + } + } + + it('emits a class->type edge for a factory create', async () => { + cg = await indexSv(` +class Sub; endclass +class Top; + Sub h; + function void build_phase(); + h = Sub::type_id::create("h", this); + endfunction +endclass +`); + const top = classNode(cg, 'Top'); + expect(refTargetNames(cg, top)).toContain('Sub'); + expect(lingeringMarkers()).toBe(0); + }); + + it('captures a factory override: a base-typed handle created as a derived type', async () => { + cg = await indexSv(` +class base_drv; endclass +class deriv_drv extends base_drv; endclass +class agent_c; + base_drv drv; + function void build_phase(); + drv = deriv_drv::type_id::create("drv", this); + endfunction +endclass +`); + const agent = classNode(cg, 'agent_c'); + // The create binds the DERIVED type even though the handle is base-typed — + // the factory-override win that a field-only view would miss. + expect(refTargetNames(cg, agent)).toContain('deriv_drv'); + }); + + it('resolves a TLM connect chain to a component->component dataflow edge', async () => { + cg = await indexSv(` +class M; + int ap; +endclass +class B; + int export_h; +endclass +class A; + M m; + function void build_phase(); + m = M::type_id::create("m", this); + endfunction +endclass +class env_c; + A a; + B b; + function void build_phase(); + a = A::type_id::create("a", this); + b = B::type_id::create("b", this); + endfunction + function void connect_phase(); + a.m.ap.connect(b.export_h); + endfunction +endclass +`); + const m = classNode(cg, 'M'); + const b = classNode(cg, 'B'); + // a -> A, A.m -> M (chain), `ap` is a port (not created) so the walk stops + // at M; the arg chain stops at B. Dataflow edge M -> B. + expect(cg.getOutgoingEdges(m.id).some((e) => e.kind === 'references' && e.target === b.id)).toBe(true); + // The port token must NOT produce an edge to a non-component. + expect(refTargetNames(cg, m)).not.toContain('A'); + expect(lingeringMarkers()).toBe(0); + }); + + it('drops a connect whose handles do not resolve: no edge, no lingering marker', async () => { + cg = await indexSv(` +class lonely; + function void connect_phase(); + foo.bar.connect(baz.qux); // no creates anywhere -> chains resolve to nothing + endfunction +endclass +`); + const lonely = classNode(cg, 'lonely'); + expect(refTargetNames(cg, lonely)).toHaveLength(0); + // The unresolvable connect marker must not linger to be re-walked each sync. + expect(lingeringMarkers()).toBe(0); + }); + + it('handles a `__`-bearing handle in a factory override (marker split is `|`, not `__`)', async () => { + // `my__h` is a legal SV name; a `__`-delimited marker would split the body + // mid-identifier and silently drop the override edge. + cg = await indexSv(` +class base_t; endclass +class deriv_t extends base_t; endclass +class Holder; + base_t my__h; + function void build_phase(); + my__h = deriv_t::type_id::create("h", this); + endfunction +endclass +`); + const holder = classNode(cg, 'Holder'); + // The factory-override edge to the DERIVED type must survive. + expect(refTargetNames(cg, holder)).toContain('deriv_t'); + expect(lingeringMarkers()).toBe(0); + }); + + it('resolves a TLM connect that hops through a `__`-bearing handle', async () => { + cg = await indexSv(` +class M; + int ap; +endclass +class B; + int export_h; +endclass +class A; + M sub__mon; + function void build_phase(); + sub__mon = M::type_id::create("sub__mon", this); + endfunction +endclass +class env_c; + A a__inst; + B b; + function void build_phase(); + a__inst = A::type_id::create("a__inst", this); + b = B::type_id::create("b", this); + endfunction + function void connect_phase(); + a__inst.sub__mon.ap.connect(b.export_h); + endfunction +endclass +`); + const m = classNode(cg, 'M'); + const b = classNode(cg, 'B'); + // Chain a__inst -> A, A.sub__mon -> M, port `ap` stops -> dataflow M -> B. + expect(cg.getOutgoingEdges(m.id).some((e) => e.kind === 'references' && e.target === b.id)).toBe(true); + expect(lingeringMarkers()).toBe(0); + }); + + it('does not duplicate the field edge: a declared+created type yields exactly one row', async () => { + cg = await indexSv(` +class Comp; endclass +class Owner; + Comp c; + function void build_phase(); + c = Comp::type_id::create("c", this); + endfunction +endclass +`); + const owner = classNode(cg, 'Owner'); + const comp = classNode(cg, 'Comp'); + // Field decl + factory create of the SAME type → ONE references row, not two + // (the create edge is suppressed because the field edge already covers it). + const toComp = cg + .getOutgoingEdges(owner.id) + .filter((e) => e.kind === 'references' && e.target === comp.id); + expect(toComp).toHaveLength(1); + }); +}); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index 576845e20..7df61f4ab 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -38,6 +38,7 @@ const WASM_GRAMMAR_FILES: Record = { lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', + systemverilog: 'tree-sitter-systemverilog.wasm', }; /** @@ -101,6 +102,11 @@ export const EXTENSION_MAP: Record = { '.luau': 'luau', '.m': 'objc', '.mm': 'objc', + // SystemVerilog / Verilog — one grammar parses both dialects + '.sv': 'systemverilog', + '.svh': 'systemverilog', + '.v': 'systemverilog', + '.vh': 'systemverilog', // XML: file-level tracking; the MyBatis extractor matches `` // shape and emits SQL-statement nodes (other XML returns empty). '.xml': 'xml', @@ -185,7 +191,7 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise> = { typescript: typescriptExtractor, @@ -49,4 +50,5 @@ export const EXTRACTORS: Partial> = { lua: luaExtractor, luau: luauExtractor, objc: objcExtractor, + systemverilog: systemverilogExtractor, }; diff --git a/src/extraction/languages/systemverilog.ts b/src/extraction/languages/systemverilog.ts new file mode 100644 index 000000000..ef93eee48 --- /dev/null +++ b/src/extraction/languages/systemverilog.ts @@ -0,0 +1,577 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getChildByField, getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor, ExtractorContext } from '../tree-sitter-types'; +import type { NodeKind } from '../../types'; + +/** + * SystemVerilog / Verilog extractor (tree-sitter-systemverilog grammar). + * + * The design hierarchy is the payload here: a module's `callers` are the modules + * that instantiate it, its `callees`/contained children are its sub-instances and + * subroutines, and `impact` is the instantiation cone. SystemVerilog keeps + * declaration names nested (module name under a `*_ansi_header`, subroutine name + * under a `*_body_declaration`, a typedef under the `type_name` field), so a single + * resolver walks those shapes rather than relying on a flat name field. + */ + +/** Resolve a declaration's identifier across SV's varied nesting. */ +function svName(node: SyntaxNode, source: string): string | undefined { + // The constructor `function new(...)` parses as class_constructor_declaration + // with no name field — its identity is always 'new'. + if (node.type === 'class_constructor_declaration') return 'new'; + + // class_declaration / package_declaration expose `name` directly. + const direct = getChildByField(node, 'name'); + if (direct) return getNodeText(direct, source).trim(); + + // `typedef NAME;` carries the alias on the `type_name` field. + const typeName = getChildByField(node, 'type_name'); + if (typeName) return getNodeText(typeName, source).trim(); + + // module/interface/program keep the name one level down in the ANSI header; + // function/task keep it on the *_body_declaration. + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (/_(ansi_|nonansi_)?header$/.test(child.type) || /_body_declaration$/.test(child.type)) { + const nm = getChildByField(child, 'name'); + if (nm) return getNodeText(nm, source).trim(); + } + } + + // Fallback: only an enum member names itself with a sole shallow identifier. + // For any other node whose name didn't resolve above, return undefined so the + // engine skips it — a skipped node beats one mislabeled with a port/param id. + if (node.type === 'enum_name_declaration') { + for (let i = 0; i < node.namedChildCount; i++) { + const c = node.namedChild(i); + if (c && c.type === 'simple_identifier') return getNodeText(c, source).trim(); + } + } + return undefined; +} + +/** The instantiated module *type* (`adder` in `adder u_add (...)`), not the `u_add` label. */ +function instantiatedType(node: SyntaxNode, source: string): string | undefined { + // The grammar tags the type as the `instance_type` field; the instance label + // lives deeper under hierarchical_instance > name_of_instance. + const t = getChildByField(node, 'instance_type'); + if (t) return getNodeText(t, source).trim(); + const first = node.namedChild(0); + return first && first.type === 'simple_identifier' ? getNodeText(first, source).trim() : undefined; +} + +// Built-in data types that can surface as a bare identifier under data_type +// (most lower to dedicated grammar nodes like integer_atom_type, but a few +// user-style spellings slip through). A field of one of these is not a has-a +// relationship to a user class, so it never becomes a composition edge. +const SV_BUILTIN_TYPES = new Set([ + 'int', 'integer', 'bit', 'logic', 'reg', 'byte', 'shortint', 'longint', + 'time', 'real', 'shortreal', 'realtime', 'string', 'chandle', 'event', 'void', +]); + +/** + * The user-class type of a `class_property` field, or undefined for a builtin / + * untyped field. The type sits under one of two shapes: ordinary/qualified fields + * nest it as `data_declaration -> data_type_or_implicit -> data_type` (the + * data_declaration follows any leading rand/local/protected qualifier siblings, + * so it is not at a fixed index); a `const` member exposes `data_type` as a direct + * child of class_property. From data_type, the type identifier is read: + * - a `class_type` child joins its DIRECT simple_identifier children with `::` + * (so `pkg::base` keeps its scope, and a `#(param)` value assignment — which + * is a sibling, not a simple_identifier — is excluded: `port #(txn)` -> `port`). + * - otherwise a direct simple_identifier under data_type is the type name. + * - a builtin (`int` -> integer_atom_type, `string` -> empty) has no + * simple_identifier here, so this returns undefined and no edge is emitted. + */ +function fieldTypeName(classProperty: SyntaxNode, source: string): string | undefined { + // Two field shapes carry the type differently: + // - ordinary/qualified fields nest it under a `data_declaration` (which + // itself may sit behind rand/local/protected sibling *_qualifier nodes, + // so it isn't at a fixed index — `rand local foo_t f;` puts it at 2); + // - a `const` member skips the wrapper entirely and exposes `data_type` + // as a direct child of class_property. + // Find the data_type for either shape; qualifier nodes carry no type info. + let dataType: SyntaxNode | undefined; + for (let i = 0; i < classProperty.namedChildCount; i++) { + const c = classProperty.namedChild(i); + if (!c) continue; + if (c.type === 'data_type') { + dataType = c; // const-member form: class_property > data_type + break; + } + if (c.type === 'data_declaration') { + for (let j = 0; j < c.namedChildCount; j++) { + const d = c.namedChild(j); + if (!d) continue; + if (d.type === 'data_type_or_implicit') { + for (let k = 0; k < d.namedChildCount; k++) { + const e = d.namedChild(k); + if (e && e.type === 'data_type') { dataType = e; break; } + } + } else if (d.type === 'data_type') { + dataType = d; + } + if (dataType) break; + } + break; + } + } + if (!dataType) return undefined; + + const first = dataType.namedChild(0); + if (!first) return undefined; // e.g. `string` parses with no child here + + if (first.type === 'class_type') { + const ids: string[] = []; + for (let i = 0; i < first.namedChildCount; i++) { + const c = first.namedChild(i); + if (c && c.type === 'simple_identifier') ids.push(getNodeText(c, source).trim()); + } + return ids.length > 0 ? ids.join('::') : undefined; + } + if (first.type === 'simple_identifier') return getNodeText(first, source).trim(); + + // Builtin scalar/integer types (integer_atom_type, etc.) -> not a user type. + return undefined; +} + +/** The `name` field text of a method_call's method_call_body child, if any. */ +function methodCallName(methodCall: SyntaxNode, source: string): string | undefined { + for (let i = 0; i < methodCall.namedChildCount; i++) { + const c = methodCall.namedChild(i); + if (c && c.type === 'method_call_body') { + const nm = getChildByField(c, 'name'); + if (nm) return getNodeText(nm, source).trim(); + } + } + return undefined; +} + +/** The leading simple_identifier of the first hierarchical_identifier in a subtree. */ +function firstHierId(node: SyntaxNode, source: string): string | undefined { + const stack: SyntaxNode[] = [node]; + let guard = 0; + while (stack.length && guard++ < 64) { + const cur = stack.pop()!; + if (cur.type === 'hierarchical_identifier') { + const id = cur.namedChild(0); + if (id && id.type === 'simple_identifier') return getNodeText(id, source).trim(); + } + for (let i = cur.namedChildCount - 1; i >= 0; i--) { + const c = cur.namedChild(i); + if (c) stack.push(c); + } + } + return undefined; +} + +/** + * For a `T::type_id::create(...)` call, the created Type `T`. The outer + * method_call is `create`; its receiver `primary` nests an inner method_call + * named `type_id` whose own `primary > hierarchical_identifier` leads with the + * base type id. Returns undefined unless a `type_id` link is actually present — + * so a plain `obj.create()` (no factory) is not mistaken for a UVM create. + */ +function createTypeFromChain(createCall: SyntaxNode, source: string): string | undefined { + // The receiver is the create call's primary child (not the method_call_body). + let primary: SyntaxNode | undefined; + for (let i = 0; i < createCall.namedChildCount; i++) { + const c = createCall.namedChild(i); + if (c && c.type === 'primary') { primary = c; break; } + } + if (!primary) return undefined; + + // Find the inner method_call (the `type_id` link) under that primary. + const stack: SyntaxNode[] = [primary]; + let guard = 0; + while (stack.length && guard++ < 64) { + const cur = stack.pop()!; + if (cur.type === 'method_call' && methodCallName(cur, source) === 'type_id') { + // Its own receiver primary leads with the base type identifier. + for (let i = 0; i < cur.namedChildCount; i++) { + const c = cur.namedChild(i); + if (c && c.type === 'primary') return firstHierId(c, source); + } + return undefined; + } + for (let i = cur.namedChildCount - 1; i >= 0; i--) { + const c = cur.namedChild(i); + if (c) stack.push(c); + } + } + return undefined; +} + +/** + * The assigned handle for a create call: walk up to the enclosing + * operator_assignment (under blocking_assignment) and read the leading + * identifier of its variable_lvalue. Empty when the create isn't assigned + * (e.g. a bare `create(...)` statement). Capped so a deep nesting can't loop. + */ +function assignedHandle(createCall: SyntaxNode, source: string): string { + for (let p = createCall.parent, depth = 0; p && depth < 24; p = p.parent, depth++) { + if (p.type === 'operator_assignment' || p.type === 'blocking_assignment') { + for (let i = 0; i < p.namedChildCount; i++) { + const c = p.namedChild(i); + if (c && c.type === 'variable_lvalue') { + const h = firstHierId(c, source); + if (h) return h; + } + } + return ''; + } + } + return ''; +} + +/** Dotted segments of a hierarchical_identifier (`a.b.c` -> ['a','b','c']). */ +function hierIdSegments(hierId: SyntaxNode, source: string): string[] { + const out: string[] = []; + for (let i = 0; i < hierId.namedChildCount; i++) { + const c = hierId.namedChild(i); + if (c && c.type === 'simple_identifier') out.push(getNodeText(c, source).trim()); + } + return out; +} + +/** + * For a `a.b.connect(c.d)` statement, the (fromChain, toChain) dotted handle + * paths — or undefined when the subroutine_call isn't a `.connect()`. The + * subroutine_call wraps a `tf_call` whose hierarchical_identifier ends in + * `connect`; the prefix is the from-chain and the first argument's + * hierarchical_identifier is the to-chain. + */ +function connectChains(subroutineCall: SyntaxNode, source: string): { from: string; to: string } | undefined { + let tfCall: SyntaxNode | undefined; + for (let i = 0; i < subroutineCall.namedChildCount; i++) { + const c = subroutineCall.namedChild(i); + if (c && c.type === 'tf_call') { tfCall = c; break; } + } + if (!tfCall) return undefined; + + let hierId: SyntaxNode | undefined; + let args: SyntaxNode | undefined; + for (let i = 0; i < tfCall.namedChildCount; i++) { + const c = tfCall.namedChild(i); + if (!c) continue; + if (c.type === 'hierarchical_identifier') hierId = c; + else if (c.type === 'list_of_arguments') args = c; + } + if (!hierId) return undefined; + + const segs = hierIdSegments(hierId, source); + if (segs.length < 2 || segs[segs.length - 1] !== 'connect') return undefined; // need prefix.connect + const fromChain = segs.slice(0, -1).join('.'); + + // The to-chain is the first argument's hierarchical_identifier. + if (!args) return undefined; + const stack: SyntaxNode[] = [args]; + let guard = 0; + let toChain: string | undefined; + while (stack.length && guard++ < 64) { + const cur = stack.pop()!; + if (cur.type === 'hierarchical_identifier') { + toChain = hierIdSegments(cur, source).join('.'); + break; + } + for (let i = cur.namedChildCount - 1; i >= 0; i--) { + const c = cur.namedChild(i); + if (c) stack.push(c); + } + } + if (!toChain) return undefined; + return { from: fromChain, to: toChain }; +} + +/** True when the subtree is an enum typedef, so it becomes an `enum` node not a type alias. */ +function isEnumTypedef(node: SyntaxNode): boolean { + const queue: SyntaxNode[] = [node]; + let guard = 0; + while (queue.length && guard++ < 256) { + const cur = queue.shift()!; + if (cur.type === 'enum_name_declaration' || cur.type === 'enum_base_type') return true; + for (let i = 0; i < cur.namedChildCount; i++) { + const c = cur.namedChild(i); + if (c) queue.push(c); + } + } + return false; +} + +// module/program are structural containers we model as 'module'; package -> 'namespace'. +const MODULE_NODES = new Set(['module_declaration', 'program_declaration']); + +export const systemverilogExtractor: LanguageExtractor = { + // Subroutines. function/task are dual-listed in methodTypes (gated by + // methodScopeKinds below) so they read as 'method' inside a class but stay + // 'function' inside a module. class_constructor_declaration ('function new') + // is method-only — it never appears outside a class. + functionTypes: ['function_declaration', 'task_declaration'], + methodTypes: ['function_declaration', 'task_declaration', 'class_constructor_declaration'], + // A subroutine is a method only inside a class scope, not a module. + methodScopeKinds: ['class'], + // UVM / OOP classes use the engine's class machinery directly. + classTypes: ['class_declaration'], + interfaceTypes: ['interface_declaration'], + structTypes: [], + enumTypes: [], + typeAliasTypes: ['type_declaration'], + importTypes: ['include_compiler_directive'], + callTypes: ['tf_call'], // function/task subroutine calls -> 'calls' edges + variableTypes: [], + + nameField: 'name', + bodyField: 'body', // SV has no 'body' field; extract* falls back to the node itself + paramsField: 'tf_port_list', + + resolveName: (node, source) => svName(node, source), + + // SV functions/tasks keep statements under a *_body_declaration child (no 'body' + // field); point the engine's body walk there or internal subroutine calls are lost. + resolveBody: (node) => { + if (node.type === 'function_declaration' || node.type === 'task_declaration') { + for (let i = 0; i < node.namedChildCount; i++) { + const c = node.namedChild(i); + if (c && /_body_declaration$/.test(c.type)) return c; + } + } + // The constructor has no *_body_declaration wrapper; walk the whole node so + // calls in its body (super.new, helper calls) are still captured. + if (node.type === 'class_constructor_declaration') return node; + return null; + }, + + // Out-of-class definitions (`function void D::foo();`, `task D::run();`, + // `function D::new();`) carry a `class_scope` naming the owning class — the + // engine then extracts them as methods of D (qualified name + contains edge), + // matching how Go/Rust receivers work. Inline definitions have no class_scope, + // so this returns undefined and they follow the normal scope-based path. + getReceiverType: (node, source) => { + // function/task keep the class_scope under their *_body_declaration; the + // constructor keeps it as a direct child. + let container = node; + if (node.type !== 'class_constructor_declaration') { + for (let i = 0; i < node.namedChildCount; i++) { + const c = node.namedChild(i); + if (c && /_body_declaration$/.test(c.type)) { + container = c; + break; + } + } + } + for (let i = 0; i < container.namedChildCount; i++) { + const c = container.namedChild(i); + if (c && c.type === 'class_scope') { + const classType = c.namedChild(0); // class_scope > class_type > simple_identifier(s) + if (!classType) return undefined; + const ids: SyntaxNode[] = []; + for (let j = 0; j < classType.namedChildCount; j++) { + const id = classType.namedChild(j); + if (id && id.type === 'simple_identifier') ids.push(id); + } + const last = ids[ids.length - 1]; // qualified pkg::D -> the class is the last id + if (last) return getNodeText(last, source).trim(); + } + } + return undefined; + }, + + extractImport: (node, source) => { + // Only `include "file" is handled here; package imports are emitted in the + // visitNode hook so a multi-package statement (`import a::*, b::x;`) yields + // one import per package rather than just the first. + const signature = getNodeText(node, source).trim(); + const m = signature.match(/["<]([^">]+)[">]/); // `include "defs.svh" + return m && m[1] ? { moduleName: m[1], signature } : null; + }, + + // `super.m()` / `this.m()` parse as `method_call > implicit_class_handle + + // method_call_body{name}` — distinct from a named-handle `obj.m()`, which is a + // `tf_call` already covered by callTypes. The engine can't resolve these from a + // flat name (the receiver is implicit), so we surface a `this.`/`super.` + // marker ref and let the SV inheritance-chain pass bind it. `super.` must reach + // the PARENT class's `m`, never the caller's own — hence the handle is preserved + // in the ref name rather than collapsed to the bare method here. + // + // This runs for every body node that isn't a tf_call, so it must be strict: + // only a method_call whose first named child is an implicit_class_handle + // qualifies; everything else returns undefined and is left untouched. + extractBareCall: (node, source) => { + if (node.type === 'method_call') { + const handle = node.namedChild(0); + + // this./super. dispatch: implicit_class_handle receiver. + if (handle && handle.type === 'implicit_class_handle') { + // The handle text is `this`, `super`, or `this.super`; only `super`-bearing + // forms walk up the extends chain — the rest resolve from the enclosing class. + const receiver = getNodeText(handle, source).includes('super') ? 'super' : 'this'; + const methodName = methodCallName(node, source); + return methodName ? `${receiver}.${methodName}` : undefined; + } + + // UVM factory create: `h = T::type_id::create("h", this)`. The outer call + // is `create`; its receiver chain carries the `type_id` link and the base + // type. We can't resolve the LHS handle -> component class from the flat + // name here (the class graph isn't built yet), so emit a marker carrying + // the handle and the created Type; a post-pass binds it once the graph + // exists. Guard on a real type_id link so plain `obj.create()` is ignored. + if (methodCallName(node, source) === 'create') { + const type = createTypeFromChain(node, source); + if (type) { + const handleName = assignedHandle(node, source); // '' when not assigned + // Separate the handle and type with `|`, not `__`: a double underscore + // is a legal SV identifier substring (`cfg__db`), so splitting the + // body on `__` would mangle such names. A pipe can't occur in an + // identifier or a dotted chain, so it splits unambiguously. + return `__sv_create__${handleName}|${type}`; + } + } + return undefined; + } + + // TLM dataflow: `a.b.connect(c.d)`. The subroutine_call wraps a tf_call whose + // hierarchical_identifier ends in `connect`. The handle chains can't be + // resolved to component classes until the create-map exists, so emit a + // marker with both dotted chains for the post-pass. (The inner tf_call still + // emits a harmless unresolved `connect` call — dropped during resolution.) + if (node.type === 'subroutine_call') { + const chains = connectChains(node, source); + // `|` separates the two dotted chains (see the create marker above for why + // `__` is unsafe); dots stay as the in-chain token separator. + if (chains) return `__sv_connect__${chains.from}|${chains.to}`; + return undefined; + } + + return undefined; + }, + + /** + * The engine's list-driven dispatch can only emit class/struct/enum/interface/trait, + * never 'module', and module instantiation isn't a call expression — so the structural + * containers and the design-hierarchy edges are created here. + */ + visitNode: (node: SyntaxNode, ctx: ExtractorContext): boolean => { + const t = node.type; + + // Enum typedefs: the engine's type-alias path can't emit enum members (it + // looks for enumTypes, which SV leaves empty), so build the enum + members here. + if (t === 'type_declaration' && isEnumTypedef(node)) { + const name = svName(node, ctx.source); + if (!name) return false; + const enumNode = ctx.createNode('enum', name, node); + if (!enumNode) return true; + ctx.pushScope(enumNode.id); + const q: SyntaxNode[] = [node]; + let guard = 0; + while (q.length && guard++ < 512) { + const cur = q.shift()!; + if (cur.type === 'enum_name_declaration') { + const mn = svName(cur, ctx.source); + if (mn) ctx.createNode('enum_member', mn, cur); + continue; + } + for (let i = 0; i < cur.namedChildCount; i++) { + const c = cur.namedChild(i); + if (c) q.push(c); + } + } + ctx.popScope(); + return true; + } + + // Package imports: one import node + `imports` ref per package_import_item, + // so `import a::*, b::x;` produces both. (The engine's single-ImportInfo path + // would only surface the first.) + if (t === 'package_import_declaration') { + const fromId = ctx.nodeStack[ctx.nodeStack.length - 1]; + const line = node.startPosition.row + 1; + const column = node.startPosition.column; + for (let i = 0; i < node.namedChildCount; i++) { + const item = node.namedChild(i); + if (!item || item.type !== 'package_import_item') continue; + let pkg: string | undefined; + for (let j = 0; j < item.namedChildCount; j++) { + const c = item.namedChild(j); + if (c && c.type === 'simple_identifier') { + pkg = getNodeText(c, ctx.source).trim(); // leading id is the package name + break; + } + } + if (!pkg) continue; + const imp = ctx.createNode('import', pkg, item); + if (imp && fromId) { + ctx.addUnresolvedReference({ fromNodeId: fromId, referenceName: pkg, referenceKind: 'imports', line, column }); + } + } + return true; + } + + if (MODULE_NODES.has(t) || t === 'package_declaration') { + const kind: NodeKind = t === 'package_declaration' ? 'namespace' : 'module'; + const name = svName(node, ctx.source); + if (!name) return false; // malformed -> let default dispatch try + const created = ctx.createNode(kind, name, node); + if (!created) return true; + ctx.pushScope(created.id); + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child) ctx.visitNode(child); + } + ctx.popScope(); + return true; // fully handled + } + + if (t === 'module_instantiation') { + const fromId = ctx.nodeStack[ctx.nodeStack.length - 1]; + const typeName = instantiatedType(node, ctx.source); // bind to the module type, not the u_xxx label + if (fromId && typeName) { + const line = node.startPosition.row + 1; + const column = node.startPosition.column; + // `instantiates` is the semantically-correct edge for the design hierarchy. + // We also emit `calls` so the call-graph commands (callers/callees) surface + // instantiation — "who instantiates this module" is the RTL analog of "who + // calls this function", and `impact` already walks both. + // Known limit: callers/callees traverse `calls` globally (every language), so + // we keep the `calls` edge rather than rewiring that path. If a project has + // BOTH a module and a subroutine of the same name, the `calls` edge can + // mis-bind to the subroutine; the `instantiates` edge prefers `module` via the + // name-matcher bias, but a same-file subroutine can still outscore it. Rare in RTL. + ctx.addUnresolvedReference({ fromNodeId: fromId, referenceName: typeName, referenceKind: 'instantiates', line, column }); + ctx.addUnresolvedReference({ fromNodeId: fromId, referenceName: typeName, referenceKind: 'calls', line, column }); + } + return false; // keep walking: port-connection expressions may contain calls + } + + // Class fields of a user-class type are the has-a topology of a UVM + // testbench (env has-an agent, agent has-a driver, ...). Emit a + // `references` edge class->field-type so callers/callees/impact surface it + // (those traversals already include `references`); the resolver binds the + // type name to the class via the normal name/qualified-name match. We point + // the edge at the class, not a field node — callees doesn't walk `contains`, + // so a class->field->type chain would never surface. + if (t === 'class_property') { + const fromId = ctx.nodeStack[ctx.nodeStack.length - 1]; + const typeName = fieldTypeName(node, ctx.source); + if (fromId && typeName && !SV_BUILTIN_TYPES.has(typeName)) { + // Walk up to the enclosing class_declaration: it both confirms a real + // class context (not just whatever sits on nodeStack) and gives the + // name needed to drop self-pointers (`uvm_component parent;` inside + // uvm_component) — a class referencing itself is noise in a has-a graph. + let enclosingClass: SyntaxNode | undefined; + for (let p = node.parent; p; p = p.parent) { + if (p.type === 'class_declaration') { enclosingClass = p; break; } + } + const enclosingClassName = enclosingClass ? svName(enclosingClass, ctx.source) : undefined; + if (enclosingClass && typeName !== enclosingClassName) { + const line = node.startPosition.row + 1; + const column = node.startPosition.column; + ctx.addUnresolvedReference({ fromNodeId: fromId, referenceName: typeName, referenceKind: 'references', line, column }); + } + } + return false; // keep walking: a field initializer may contain calls + } + + return false; + }, +}; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index 6c04fbaeb..aa9cc3d7f 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -145,6 +145,16 @@ export interface LanguageExtractor { extraClassNodeTypes?: string[]; /** Whether methods can be top-level without enclosing class (Go: true) */ methodsAreTopLevel?: boolean; + /** + * Restrict method-vs-function classification to specific parent scope kinds. + * A node listed in both functionTypes and methodTypes is extracted as a + * 'method' only when its immediate enclosing scope (the nearest extracted + * parent on the node stack) is one of these kinds; otherwise it stays a + * 'function'. SystemVerilog uses ['class'] so a subroutine is a method inside + * a class but a plain function inside a module. When unset, the broad + * class-like check (class/struct/interface/trait/enum/module) is used. + */ + methodScopeKinds?: NodeKind[]; /** NodeKind to use for interface-like declarations (Rust: 'trait'). Default: 'interface' */ interfaceKind?: NodeKind; diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index c6eb93ac9..8db3e63bc 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -288,7 +288,7 @@ export class TreeSitterExtractor { // Check for function declarations // For Python/Ruby, function_definition inside a class should be treated as method if (this.extractor.functionTypes.includes(nodeType)) { - if (this.isInsideClassLikeNode() && this.extractor.methodTypes.includes(nodeType)) { + if (this.isInsideMethodScope() && this.extractor.methodTypes.includes(nodeType)) { // Inside a class - treat as method this.extractMethod(node); skipChildren = true; // extractMethod visits children via visitFunctionBody @@ -595,6 +595,29 @@ export class TreeSitterExtractor { ); } + /** + * Whether the immediate enclosing scope is one of the given node kinds. + * Narrower than isInsideClassLikeNode — lets a language scope method + * classification to true OO containers only. + */ + private isInsideScopeKinds(kinds: readonly NodeKind[]): boolean { + const parentId = this.nodeStack[this.nodeStack.length - 1]; + if (!parentId) return false; + const parentNode = this.nodes.find((n) => n.id === parentId); + return !!parentNode && kinds.includes(parentNode.kind); + } + + /** + * The scope test gating method-vs-function classification. Defaults to the + * broad class-like check; a language can narrow it via methodScopeKinds + * (SystemVerilog limits 'method' to `class` scope so module-level + * subroutines stay 'function'). + */ + private isInsideMethodScope(): boolean { + const kinds = this.extractor?.methodScopeKinds; + return kinds ? this.isInsideScopeKinds(kinds) : this.isInsideClassLikeNode(); + } + /** * Extract a function */ @@ -743,7 +766,7 @@ export class TreeSitterExtractor { // For most languages, only extract as method if inside a class-like node // Languages with methodsAreTopLevel (e.g. Go) always treat them as methods // Languages with getReceiverType (e.g. Rust) extract as method when receiver is found - if (!this.isInsideClassLikeNode() && !this.extractor.methodsAreTopLevel && !receiverType) { + if (!this.isInsideMethodScope() && !this.extractor.methodsAreTopLevel && !receiverType) { // Skip method_definition nodes inside object literals (getters/setters/methods // in inline objects). These are ephemeral and create noise (e.g., Svelte context // objects: `ctx.set({ get view() { ... } })`). @@ -2262,6 +2285,32 @@ export class TreeSitterExtractor { } } + // SystemVerilog: `class D extends pkg::Base #(P);` carries the superclass as + // a `class_type` child (no extends_clause node). The direct simple_identifiers + // are the scope path (`pkg`, `Base`); the parameter_value_assignment (#(...)) + // is a sibling that's ignored. Emit the full scoped name `pkg::Base` so the + // resolver's qualified-name match can disambiguate same-named base classes in + // different packages; an unqualified `extends Base` stays `Base` and resolves + // by name + the inheritance kind bias. + if (this.language === 'systemverilog' && child.type === 'class_type') { + const ids = child.namedChildren.filter( + (c: SyntaxNode) => c.type === 'simple_identifier' + ); + const base = ids[ids.length - 1]; + if (base) { + const scopedName = ids + .map((id) => getNodeText(id, this.source).trim()) + .join('::'); + this.unresolvedReferences.push({ + fromNodeId: classId, + referenceName: scopedName, + referenceKind: 'extends', + line: base.startPosition.row + 1, + column: base.startPosition.column, + }); + } + } + // C++ base classes: `class Derived : public Base, private Other` → // base_class_clause holds access specifiers + base type(s). Emit an extends // ref per base type (skip the public/private/protected keywords). diff --git a/src/extraction/wasm/tree-sitter-systemverilog.wasm b/src/extraction/wasm/tree-sitter-systemverilog.wasm new file mode 100644 index 000000000..be5ac600c Binary files /dev/null and b/src/extraction/wasm/tree-sitter-systemverilog.wasm differ diff --git a/src/resolution/index.ts b/src/resolution/index.ts index 1e75744a6..be9732c06 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -600,6 +600,18 @@ export class ReferenceResolver { return null; } + // SystemVerilog `this.m`/`super.m` calls need inheritance-aware binding + // (super must reach the PARENT class's `m`, not the caller's own), and the + // `__sv_create__`/`__sv_connect__` markers carry factory/TLM wiring the + // generic matcher can't interpret. Leave all of these unresolved here; the + // SV post-passes bind them once the class graph + create-map exist. + if ( + ref.language === 'systemverilog' && + (/^(this|super)\.\w+$/.test(ref.referenceName) || ref.referenceName.startsWith('__sv_')) + ) { + return null; + } + // Fast pre-filter: skip if no symbol with this name exists anywhere // AND the name doesn't match a local import. The import escape is // necessary because re-export rename chains (`import { login } @@ -724,9 +736,322 @@ export class ReferenceResolver { ); } + // Bind SystemVerilog this./super. calls now that extends + contains edges + // exist (these were intentionally skipped in resolveOne). Best-effort. + this.resolveSvHandleCalls(); + // Bind SV factory-create + TLM-connect markers (composition + dataflow). + this.resolveSvHdlEdges(); + return result; } + /** + * Bind SystemVerilog `this.` / `super.` method calls to the method they + * dispatch to, walking the `extends` chain so `super` reaches the parent's + * implementation (never the caller's own → no false self-edge). + * + * Runs as a post-pass because it needs the class `extends` and member + * `contains` edges already in the graph. These refs are left unresolved by + * resolveOne, so they're still in `unresolved_refs` when we get here. + * + * Returns the number of `calls` edges created. + */ + resolveSvHandleCalls(): number { + const HANDLE_CALL = /^(this|super)\.(\w+)$/; + const candidates = this.queries + .getUnresolvedReferences() + .filter((ref) => ref.referenceKind === 'calls' && HANDLE_CALL.test(ref.referenceName)); + if (candidates.length === 0) return 0; + + const edges: Edge[] = []; + type RefKey = { fromNodeId: string; referenceName: string; referenceKind: string }; + const bound: RefKey[] = []; + // SV handle-calls that have an SV caller but no resolvable target in the + // current graph. They are genuinely unresolvable now (the post-pass runs + // against the fully-built graph), so we drop them like any other dead ref. + // If we kept them, an unresolvable `super.x()` — the common case when the + // UVM base library isn't indexed — would linger in unresolved_refs forever, + // inflate the unresolved metric, and be re-walked on every incremental sync. + const unbindable: RefKey[] = []; + const key = (ref: UnresolvedReference): RefKey => ({ + fromNodeId: ref.fromNodeId, + referenceName: ref.referenceName, + referenceKind: ref.referenceKind, + }); + + for (const ref of candidates) { + // The from-node is the calling method; confirm it's SV before touching it + // (the regex alone could collide with another language's ref name). A + // non-SV caller is left untouched — not our ref to delete. + const caller = this.queries.getNodeById(ref.fromNodeId); + if (!caller || caller.language !== 'systemverilog') continue; + + const m = HANDLE_CALL.exec(ref.referenceName); + if (!m) continue; + const handle = m[1]!; // 'this' | 'super' + const method = m[2]!; + + const enclosing = this.findEnclosingClass(ref.fromNodeId); + // `this.` searches the enclosing class and up its chain; `super.` starts + // at the parent so the caller's own same-named method is excluded. + const startClass = enclosing + ? (handle === 'super' ? this.superClassOf(enclosing.id) : enclosing) + : null; + const target = startClass ? this.findMethodInChain(startClass.id, method) : null; + + if (!target) { + unbindable.push(key(ref)); + continue; + } + + edges.push({ + source: ref.fromNodeId, + target: target.id, + kind: 'calls', + line: ref.line, + column: ref.column, + metadata: { confidence: 0.9, resolvedBy: 'qualified-name' }, + }); + bound.push(key(ref)); + } + + if (edges.length > 0) this.queries.insertEdges(edges); + // Both bound and unbindable refs are removed so subsequent syncs re-scan + // nothing — every SV handle-call is resolved exactly once. + const toDelete = bound.concat(unbindable); + if (toDelete.length > 0) this.queries.deleteSpecificResolvedReferences(toDelete); + return edges.length; + } + + /** + * Resolve a SystemVerilog type name (bare `T` or scoped `pkg::T`) to its + * class node. Prefers an exact qualified-name hit so a scoped reference lands + * on the right package; falls back to a unique bare-name class. + */ + private findClassByName(typeName: string): Node | null { + if (typeName.includes('::')) { + const exact = this.queries.getNodesByQualifiedNameExact(typeName).find((n) => n.kind === 'class'); + if (exact) return exact; + } + const bare = typeName.includes('::') ? typeName.split('::').pop()! : typeName; + const classes = this.queries.getNodesByName(bare).filter((n) => n.kind === 'class'); + return classes.length === 1 ? classes[0]! : (classes[0] ?? null); + } + + /** + * Bind the SystemVerilog factory-create and TLM-connect markers emitted by + * the extractor (`__sv_create____`, `__sv_connect____`). + * Both express composition/dataflow that the generic resolver can't read, so + * they ride through as marker `calls` refs and are bound here, after the class + * graph exists. Returns the number of `references` edges created. + * + * Pass 1 (creates) runs first and builds a per-class handle->component-type + * map; pass 2 (connects) consumes that map to resolve dotted TLM chains. + */ + resolveSvHdlEdges(): number { + const candidates = this.queries + .getUnresolvedReferences() + .filter((ref) => ref.referenceKind === 'calls' && ref.referenceName.startsWith('__sv_')); + if (candidates.length === 0) return 0; + + type RefKey = { fromNodeId: string; referenceName: string; referenceKind: string }; + const key = (ref: UnresolvedReference): RefKey => ({ + fromNodeId: ref.fromNodeId, + referenceName: ref.referenceName, + referenceKind: ref.referenceKind, + }); + const edges: Edge[] = []; + const bound: RefKey[] = []; + const unbindable: RefKey[] = []; + + // map[classId][handle] = the component class that handle was created as. + // Built from creates so a chain walk stops at a component (a port handle is + // never factory-created, so it isn't a key → the walk halts there). + const createMap = new Map>(); + + const creates = candidates.filter((r) => r.referenceName.startsWith('__sv_create__')); + const connects = candidates.filter((r) => r.referenceName.startsWith('__sv_connect__')); + + // Create edges already emitted this pass, so two creates of the same type in + // one class don't add duplicate rows. + const emittedCreate = new Set(); + + // Pass 1: factory creates -> class->Type composition edge + create-map. + for (const ref of creates) { + const caller = this.queries.getNodeById(ref.fromNodeId); + if (!caller || caller.language !== 'systemverilog') continue; + + // `__sv_create__|`; handle may be empty (unassigned create). + // The handle/type split is `|` (not `__`) because `__` is a legal SV + // identifier substring and would mangle names like `cfg__db`. + const body = ref.referenceName.slice('__sv_create__'.length); + const sep = body.indexOf('|'); + if (sep < 0) { unbindable.push(key(ref)); continue; } + const handle = body.slice(0, sep); + const typeName = body.slice(sep + 1); + + const cls = this.findEnclosingClass(ref.fromNodeId); + const typeClass = typeName ? this.findClassByName(typeName) : null; + if (!cls || !typeClass) { unbindable.push(key(ref)); continue; } + + // Composition edge — but only when it adds information the field view + // (#2) lacks. The dominant UVM pattern declares `T h;` AND creates it via + // the factory, which would otherwise yield two identical rows. The #2 + // field edge is already persisted before this post-pass runs, so a create + // edge is emitted ONLY for a factory OVERRIDE (a created type with no + // matching field edge) — exactly the create's unique value. Self-creates + // are skipped as graph noise. + const pairKey = `${cls.id}${typeClass.id}`; + const fieldEdgeExists = this.queries + .getOutgoingEdges(cls.id, ['references']) + .some((e) => e.target === typeClass.id); + if (typeClass.id !== cls.id && !fieldEdgeExists && !emittedCreate.has(pairKey)) { + emittedCreate.add(pairKey); + edges.push({ + source: cls.id, + target: typeClass.id, + kind: 'references', + line: ref.line, + column: ref.column, + metadata: { confidence: 0.9, resolvedBy: 'qualified-name' }, + }); + } + // Always record the handle (even when the edge was suppressed or it's a + // self-create) so a later TLM chain can still walk through it. + if (handle) { + let perClass = createMap.get(cls.id); + if (!perClass) { perClass = new Map(); createMap.set(cls.id, perClass); } + perClass.set(handle, typeClass); + } + bound.push(key(ref)); + } + + // Pass 2: TLM connects -> dataflow edge between the resolved components. + for (const ref of connects) { + const caller = this.queries.getNodeById(ref.fromNodeId); + if (!caller || caller.language !== 'systemverilog') continue; + + // `__sv_connect__|`; chains are dotted (dots kept), + // split on `|` for the same reason the create marker does. + const body = ref.referenceName.slice('__sv_connect__'.length); + const sep = body.indexOf('|'); + if (sep < 0) { unbindable.push(key(ref)); continue; } + const fromChain = body.slice(0, sep); + const toChain = body.slice(sep + 1); + + const cls = this.findEnclosingClass(ref.fromNodeId); + const a = cls ? this.resolveSvChain(cls, fromChain, createMap) : null; + const b = cls ? this.resolveSvChain(cls, toChain, createMap) : null; + if (!a || !b || a.id === b.id) { unbindable.push(key(ref)); continue; } + + edges.push({ + source: a.id, + target: b.id, + kind: 'references', + line: ref.line, + column: ref.column, + metadata: { confidence: 0.9, resolvedBy: 'qualified-name' }, + }); + bound.push(key(ref)); + } + + if (edges.length > 0) this.queries.insertEdges(edges); + // Drop bound and genuinely-unbindable markers alike so no `__sv_*__` ref + // lingers to be re-walked on the next sync (same hygiene as the super pass). + const toDelete = bound.concat(unbindable); + if (toDelete.length > 0) this.queries.deleteSpecificResolvedReferences(toDelete); + return edges.length; + } + + /** + * Walk a dotted TLM handle chain (`agt.mon.ap`) through the create-map, + * starting from `cls`. Each token is looked up as a created handle of the + * current class; while it resolves to a known class we descend, and we STOP + * at the first token with no mapping (a port/export like `ap`, which is never + * factory-created). Returns the last resolved component (the monitor for + * `agt.mon.ap`), or null if not even the first hop resolves. Visited-guard + + * the fixed token count bound the walk. + */ + private resolveSvChain(cls: Node, chain: string, createMap: Map>): Node | null { + const tokens = chain.split('.').filter((t) => t.length > 0); + let current: Node = cls; + let resolved: Node | null = null; + const visited = new Set([cls.id]); + for (const token of tokens) { + const next = createMap.get(current.id)?.get(token); + if (!next || visited.has(next.id)) break; // port token or cycle -> stop here + visited.add(next.id); + resolved = next; + current = next; + } + return resolved; + } + + /** + * Climb `contains` parents from a node until the first enclosing `class`. + * Works for both inline methods (parent class directly contains them) and + * out-of-class definitions (`task C::run();`), which the extractor wires + * under BOTH the file and the owning class. Because a node can have several + * contains-parents, each level prefers a `class` parent and otherwise climbs + * through a non-file parent — climbing into the file would dead-end at the + * root and miss the class sibling edge. + */ + private findEnclosingClass(nodeId: string): Node | null { + let currentId = nodeId; + for (let depth = 0; depth < 32; depth++) { + const parents = this.queries + .getIncomingEdges(currentId, ['contains']) + .map((e) => this.queries.getNodeById(e.source)) + .filter((n): n is Node => n !== null); + if (parents.length === 0) return null; + + const klass = parents.find((p) => p.kind === 'class'); + if (klass) return klass; + + // No class at this level — keep climbing, but skip the file root so an + // out-of-class method still reaches its class via the class-side edge. + const next = parents.find((p) => p.kind !== 'file') ?? parents[0]!; + currentId = next.id; + } + return null; + } + + /** + * The base class a class `extends` (one hop up the chain), or null. + * SystemVerilog has single class inheritance; an interface base is promoted + * to an `implements` edge (see createEdges), so filtering on `extends` yields + * only the true class parent — taking [0] is safe. + */ + private superClassOf(classId: string): Node | null { + const extendsEdges = this.queries.getOutgoingEdges(classId, ['extends']); + const targetId = extendsEdges[0]?.target; + if (!targetId) return null; + return this.queries.getNodeById(targetId); + } + + /** + * Find a `method` named `method` in `startClass`, else recurse up the + * `extends` chain. Visited-guard + depth cap keep a malformed/cyclic + * inheritance graph from looping forever. + */ + private findMethodInChain(startClassId: string, method: string): Node | null { + const visited = new Set(); + let classId: string | null = startClassId; + for (let depth = 0; depth < 32 && classId; depth++) { + if (visited.has(classId)) return null; + visited.add(classId); + + for (const edge of this.queries.getOutgoingEdges(classId, ['contains'])) { + const child = this.queries.getNodeById(edge.target); + if (child && child.kind === 'method' && child.name === method) return child; + } + + const parent = this.superClassOf(classId); + classId = parent ? parent.id : null; + } + return null; + } + /** * Resolve and persist in batches to keep memory bounded. * Processes unresolved references in chunks, persisting edges and cleaning @@ -772,10 +1097,18 @@ export class ReferenceResolver { ); } - // Delete unresolvable refs from this batch to avoid re-processing them - if (result.unresolved.length > 0) { + // Delete unresolvable refs from this batch to avoid re-processing them. + // SystemVerilog this./super. calls and the __sv_create__/__sv_connect__ + // markers are the exception: resolveOne leaves them unresolved on purpose + // so the SV post-passes (below) can bind them once the class graph + + // create-map exist. Keep those in the table. + const toDrop = result.unresolved.filter( + (r) => !(r.language === 'systemverilog' && + (/^(this|super)\.\w+$/.test(r.referenceName) || r.referenceName.startsWith('__sv_'))) + ); + if (toDrop.length > 0) { this.queries.deleteSpecificResolvedReferences( - result.unresolved.map((r) => ({ + toDrop.map((r) => ({ fromNodeId: r.fromNodeId, referenceName: r.referenceName, referenceKind: r.referenceKind, @@ -804,6 +1137,17 @@ export class ReferenceResolver { } } + // Bind SystemVerilog this./super. calls now that the extends + contains + // edges are all persisted (these refs were preserved above for this pass). + try { + const svBound = this.resolveSvHandleCalls(); + if (svBound > 0) aggregateStats.byMethod['qualified-name'] = (aggregateStats.byMethod['qualified-name'] || 0) + svBound; + const svHdl = this.resolveSvHdlEdges(); + if (svHdl > 0) aggregateStats.byMethod['qualified-name'] = (aggregateStats.byMethod['qualified-name'] || 0) + svHdl; + } catch { + // additive and optional; never fail the index on it + } + // Dynamic-edge synthesis: now that all base `calls` edges are persisted, // synthesize observer/callback dispatch edges (dispatcher → registered // callbacks) that static parsing leaves out. Best-effort — never fail the diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts index 03fa79242..376e5445e 100644 --- a/src/resolution/name-matcher.ts +++ b/src/resolution/name-matcher.ts @@ -608,7 +608,23 @@ function findBestMatch( if ( candidate.kind === 'class' || candidate.kind === 'struct' || - candidate.kind === 'interface' + candidate.kind === 'interface' || + candidate.kind === 'module' + ) { + score += 25; + } + } + + // For inheritance references (`class D extends Base`, `implements I`), prefer + // type-like targets — a same-named function/method must not outscore the + // actual base type when both are in scope. + if (ref.referenceKind === 'extends' || ref.referenceKind === 'implements') { + if ( + candidate.kind === 'class' || + candidate.kind === 'struct' || + candidate.kind === 'interface' || + candidate.kind === 'trait' || + candidate.kind === 'protocol' ) { score += 25; } diff --git a/src/types.ts b/src/types.ts index e710e31a1..c943cfc1d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -88,6 +88,7 @@ export const LANGUAGES = [ 'lua', 'luau', 'objc', + 'systemverilog', 'yaml', 'twig', 'xml',