diff --git a/.claude/skills/agent-eval/corpus.json b/.claude/skills/agent-eval/corpus.json index 2cfedac4f..5140a1c67 100644 --- a/.claude/skills/agent-eval/corpus.json +++ b/.claude/skills/agent-eval/corpus.json @@ -94,5 +94,11 @@ { "name": "react-native-segmented-control", "repo": "https://github.com/react-native-segmented-control/segmented-control", "size": "Small", "files": "~25", "question": "How does JSX `` reach the native onChange handler on iOS/Android?" }, { "name": "react-native-screens", "repo": "https://github.com/software-mansion/react-native-screens", "size": "Medium", "files": "~1200", "question": "How does JSX `` reach the native RNSScreenStackView component?" }, { "name": "react-native-skia", "repo": "https://github.com/Shopify/react-native-skia", "size": "Large", "files": "~1000", "question": "How does a `` JSX usage reach the iOS / Android native renderer?" } + ], + "Clojure": [ + { "name": "ring", "repo": "https://github.com/ring-clojure/ring", "size": "Small", "files": "~80", "question": "How does a Ring request flow from the Jetty adapter through the handler to an HTTP response?" }, + { "name": "logseq", "repo": "https://github.com/logseq/logseq", "size": "Medium", "files": "~960", "question": "How does editing a block in the editor get persisted to the database and reflected back in the UI?" }, + { "name": "metabase", "repo": "https://github.com/metabase/metabase", "size": "Large", "files": "~3400", "question": "How does a query submitted to the API flow through the query processor middleware to the database driver?" }, + { "name": "status-mobile", "repo": "https://github.com/status-im/status-mobile", "size": "Large", "files": "~2050", "question": "How does tapping the logout option in profile settings end the session and what happens to the app state?" } ] } diff --git a/CHANGELOG.md b/CHANGELOG.md index 54ef5f5aa..db1d2c456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### New Features +- CodeGraph now indexes **Clojure and ClojureScript** (`.clj`, `.cljs`, `.cljc`, and Babashka `.bb` files) — namespaces, functions (`defn`, multi-arity, function-valued `def`s), protocols, records and their methods, multimethods, and `:require`/`:import` clauses. Calls through namespace aliases (`(str/upper-case ...)`) and `:refer`ed symbols resolve across files, so callers, callees, impact, and `codegraph_explore` flow tracing all work on Clojure codebases. +- **EDN config files** (`deps.edn`, `bb.edn`, `shadow-cljs.edn`, integrant/system configs) are indexed as data: top-level keys become searchable config entries, and qualified symbols in their values (a shadow-cljs `:init-fn`, an integrant component's handler) link to the code they name. Large EDN datasets (translation dictionaries, fixtures) are deliberately kept out of the graph. +- **re-frame apps get connected event flows**: every `reg-event-db`/`reg-event-fx`/`reg-sub` registration becomes a searchable symbol named by its keyword (`:profile/logout`), and `dispatch`/`subscribe` call sites link to it — so callers, impact, and flow tracing follow keyword-keyed dispatch across files. Project facades that wrap re-frame (custom `reg-*` helpers, `sub` aliases) are detected too. +- **UIx and helix components are first-class**: `defui`/`defnc` definitions become component symbols, and `($ button ...)` element composition produces real call edges — so "what renders this component" works in ClojureScript React apps. - `codegraph status --json` now also reports the running CLI `version`, the index directory (`indexPath`), and a `lastIndexed` timestamp (ISO-8601, or null when nothing's indexed yet), so CI and scripts can pin the CLI version and check index freshness from a single command. A matching `CodeGraph.getLastIndexedAt()` library method exposes the same freshness check without shelling out. Thanks @12122J and @eddieran. (#329) ### Fixes diff --git a/README.md b/README.md index 250b507af..9ffd457ba 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,7 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 | | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes | | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config | -| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Dart, Lua, Luau, Svelte, Liquid, Pascal/Delphi | +| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Dart, Lua, Luau, Clojure/ClojureScript, Svelte, Liquid, Pascal/Delphi | | **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 14 frameworks | | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules | | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only | @@ -635,6 +635,7 @@ is written): | Pascal / Delphi | `.pas`, `.dpr`, `.dpk`, `.lpr` | Full support (classes, records, interfaces, enums, DFM/FMX form files) | | Lua | `.lua` | Full support (functions, methods with receivers, local variables, `require` imports, call edges) | | Luau | `.luau` | Full support (everything in Lua, plus `type`/`export type` aliases, typed signatures, and Roblox instance-path `require`) | +| Clojure / ClojureScript | `.clj`, `.cljs`, `.cljc`, `.bb`, `.edn` | Full support (namespaces, `defn`/`def`, protocols, records, multimethods, `:require` alias-resolved call edges, reader conditionals, re-frame keyword dispatch, UIx/helix component composition; `.edn` config keys + code references) | ## Troubleshooting diff --git a/__tests__/explore-clojure-tokens.test.ts b/__tests__/explore-clojure-tokens.test.ts new file mode 100644 index 000000000..de91777dc --- /dev/null +++ b/__tests__/explore-clojure-tokens.test.ts @@ -0,0 +1,178 @@ +/** + * Explore query handling for Clojure / monorepo idioms. + * + * Covers the four layers of the explore token fix: + * 1. Lisp-alphabet symbol tokens (kebab-case, `?`/`!`/`+`, `alias/name`) + * reach the named-seed injection instead of being filtered out. + * 2. A bare token naming a NAMESPACE by its last segment resolves to the + * module and pulls its file into the render. + * 3. An ambiguous bare token prefers the candidate co-located with the + * anchors (other tokens' locations) over a bigger-bodied def in an + * unrelated subsystem. + * 4. A colon-less namespaced keyword (`app/set-page-state`) resolves to the + * re-frame registration node `:app/set-page-state` — without letting a + * bare name be hijacked by a same-named unqualified keyword. + */ + +import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { initGrammars, loadAllGrammars } from '../src/extraction/grammars'; + +beforeAll(async () => { + await initGrammars(); + await loadAllGrammars(); +}); + +function hasSqliteBindings(): boolean { + try { + const { DatabaseSync } = require('node:sqlite'); + const db = new DatabaseSync(':memory:'); + db.close(); + return true; + } catch { + return false; + } +} +const HAS_SQLITE = hasSqliteBindings(); + +function tmpRoot(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-explore-clj-')); +} + +function rmTree(dir: string): void { + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); +} + +/** + * A miniature monorepo with the shapes from the real-world failing session: + * - app/page/lifecycle/{activate,set_state}.cljs — per-stage `dashboard` fns + * (the ambiguous name) plus namespace-named stages. + * - app/page/hooks.cljs — a unique kebab fn (`on-route-change+`, the anchor) + * that dispatches the re-frame event. + * - backend/scim.clj — an unrelated subsystem with a LONGER same-named + * `dashboard` fn (the co-location trap) . + * - app/core/handlers.cljs — the re-frame registration `:app/set-page-state`. + */ +async function buildCljMonorepo(): Promise { + const root = tmpRoot(); + const w = (rel: string, content: string) => { + const p = path.join(root, rel); + fs.mkdirSync(path.dirname(p), { recursive: true }); + fs.writeFileSync(p, content); + }; + + w('frontend/src/app/page/hooks.cljs', `(ns app.page.hooks + (:require [re-frame.core :as rf])) + +(defn on-route-change+ [route] + (rf/dispatch [:app/set-page-state {:route route}])) +`); + w('frontend/src/app/page/lifecycle/activate.cljs', `(ns app.page.lifecycle.activate) + +(defn dashboard [ctx] + (assoc ctx :activated true)) +`); + w('frontend/src/app/page/lifecycle/set_state.cljs', `(ns app.page.lifecycle.set-state) + +(defn dashboard [ctx] + (assoc ctx :page-state :dashboard)) +`); + w('frontend/src/app/core/handlers.cljs', `(ns app.core.handlers + (:require [re-frame.core :as rf])) + +(rf/reg-event-fx :app/set-page-state + (fn [{:keys [db]} [_ state]] + {:db (assoc db :page-state state)})) + +(rf/reg-sub :dashboard + (fn [db _] (:dashboard db))) +`); + w('backend/src/backend/scim.clj', `(ns backend.scim) + +(defn dashboard [user opts audit log extra] + (let [a (str user) b (str opts) c (str audit) d (str log) e (str extra) + f (str a b) g (str c d) h (str e f) i (str g h)] + (str a b c d e f g h i))) + +(defn unrelated-one [] 1) +(defn unrelated-two [] 2) +(defn unrelated-three [] 3) +(defn unrelated-four [] 4) +`); + // A 4th `dashboard` def so the name is ambiguous (>3 defs) and the + // co-location pick actually runs — at <=3 defs ALL of them inject by design. + w('backend/src/backend/admin.clj', `(ns backend.admin) + +(defn dashboard [stats] + (str "admin" stats)) +`); + return root; +} + +describe.skipIf(!HAS_SQLITE)('explore — Clojure/monorepo query tokens', () => { + let projectRoot: string; + let cg: any; + let handler: any; + let findAllSymbols: (cg: any, s: string) => { nodes: any[]; note: string }; + + beforeEach(async () => { + projectRoot = await buildCljMonorepo(); + const CodeGraph = (await import('../src/index')).default; + const { ToolHandler } = await import('../src/mcp/tools'); + cg = CodeGraph.initSync(projectRoot, { + config: { include: ['**/*.clj', '**/*.cljs'], exclude: [] }, + }); + await cg.indexAll(); + handler = new ToolHandler(cg); + findAllSymbols = (handler as any).findAllSymbols.bind(handler); + }); + + afterEach(() => { + handler?.closeAll(); + cg?.destroy(); + rmTree(projectRoot); + }); + + async function explore(query: string): Promise { + const res = await handler.execute('codegraph_explore', { query }); + return res.content.map((c: any) => c.text ?? '').join('\n'); + } + + it('kebab-case tokens reach seed injection (named file renders)', async () => { + const out = await explore('on-route-change+ set-page-state route'); + expect(out).toContain('hooks.cljs'); + expect(out).toContain('on-route-change+'); + }); + + it('a namespace-segment token pulls the module file into the render', async () => { + // `set-state` is no function — only the ns app.page.lifecycle.set-state. + const out = await explore('on-route-change+ set-state dashboard'); + expect(out).toContain('set_state.cljs'); + }); + + it('an ambiguous bare token prefers the candidate co-located with anchors', async () => { + // `dashboard` defs: two lifecycle stage fns (small) + backend.scim's + // (largest body, wrong subsystem). The anchor `on-route-change+` lives in + // frontend/src/app/page, so the lifecycle defs must win the render and + // the SCIM file must not appear. + const out = await explore('on-route-change+ activate set-state dashboard page lifecycle'); + expect(out).toContain('lifecycle/activate.cljs'); + expect(out).not.toContain('scim.clj'); + }); + + it('a colon-less namespaced keyword resolves to the registration node', () => { + const { nodes } = findAllSymbols(cg, 'app/set-page-state'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + expect(nodes[0].name).toBe(':app/set-page-state'); + }); + + it('a bare name is NOT hijacked by a same-named unqualified keyword', () => { + // `:dashboard` (reg-sub) exists AND fns named `dashboard` exist — the + // colon fallback must not preempt plain-name resolution for bare tokens. + const { nodes } = findAllSymbols(cg, 'dashboard'); + expect(nodes.length).toBeGreaterThanOrEqual(1); + expect(nodes.every((n: any) => n.name === 'dashboard')).toBe(true); + }); +}); diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index d29fa11b3..b840d1dd4 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -4459,3 +4459,694 @@ func (s Stack[T]) Len() int { return len(s.items) } expect(js.nodes.find((n) => n.name === 'handleRequest' && n.kind === 'function')).toBeDefined(); }); }); + +// ============================================================================= +// Clojure / ClojureScript (lexical grammar — extraction via visitNode hook) +// ============================================================================= + +describe('Clojure Extraction', () => { + describe('Language detection', () => { + it('should detect Clojure family files', () => { + expect(detectLanguage('src/my/app/core.clj')).toBe('clojure'); + expect(detectLanguage('src/my/app/views.cljs')).toBe('clojure'); + expect(detectLanguage('src/my/app/util.cljc')).toBe('clojure'); + expect(detectLanguage('tasks.bb')).toBe('clojure'); + }); + + it('should report Clojure as supported', () => { + expect(isLanguageSupported('clojure')).toBe(true); + expect(getSupportedLanguages()).toContain('clojure'); + }); + }); + + describe('Namespace and defs', () => { + it('should extract the ns as a module and scope defs under it', () => { + const code = `(ns my.app.core + (:require [clojure.string :as str])) + +(def max-retries 3) + +(defn- helper [x] (str/upper-case x)) + +(defn process-user + "Process a user record." + [user] + (helper user)) +`; + const result = extractFromSource('src/my/app/core.clj', code); + const mod = result.nodes.find((n) => n.kind === 'module'); + expect(mod?.name).toBe('my.app.core'); + + const fn = result.nodes.find((n) => n.name === 'process-user'); + expect(fn?.kind).toBe('function'); + expect(fn?.qualifiedName).toBe('my.app.core::process-user'); + expect(fn?.docstring).toBe('Process a user record.'); + expect(fn?.signature).toBe('[user]'); + expect(fn?.language).toBe('clojure'); + + const helper = result.nodes.find((n) => n.name === 'helper'); + expect(helper?.visibility).toBe('private'); + + const constant = result.nodes.find((n) => n.name === 'max-retries'); + expect(constant?.kind).toBe('constant'); + }); + + it('should extract multi-arity defns with a combined signature', () => { + const code = `(ns m.a) +(defn greet + ([] (greet "world")) + ([who] (str "hi " who))) +`; + const result = extractFromSource('src/m/a.clj', code); + const fn = result.nodes.find((n) => n.name === 'greet'); + expect(fn?.signature).toBe('[] [who]'); + // The zero-arity body calls the one-arity — a self call ref must exist + const selfCall = result.unresolvedReferences.find( + (r) => r.referenceKind === 'calls' && r.referenceName === 'greet' + ); + expect(selfCall).toBeDefined(); + }); + + it('should treat function-valued defs as functions', () => { + const code = `(ns m.b) +(def handler (fn [req] {:status 200})) +(def shortcut #(inc %)) +`; + const result = extractFromSource('src/m/b.clj', code); + expect(result.nodes.find((n) => n.name === 'handler')?.kind).toBe('function'); + expect(result.nodes.find((n) => n.name === 'shortcut')?.kind).toBe('function'); + }); + + it('should extract library def-macros (defroutes, deftest) as named nodes', () => { + const code = `(ns m.routes) +(defroutes app-routes + (GET "/" [] home-page)) +(deftest parses-input + (is (= 1 1))) +`; + const result = extractFromSource('src/m/routes.clj', code); + expect(result.nodes.find((n) => n.name === 'app-routes')).toBeDefined(); + expect(result.nodes.find((n) => n.name === 'parses-input')).toBeDefined(); + }); + }); + + describe('Requires and imports', () => { + it('should extract :require entries as import nodes with refs', () => { + const code = `(ns my.app.core + (:require [clojure.string :as str] + [my.app.db :refer [save!]] + my.app.flags) + (:import (java.time Instant))) +`; + const result = extractFromSource('src/my/app/core.clj', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('clojure.string'); + expect(imports).toContain('my.app.db'); + expect(imports).toContain('my.app.flags'); + expect(imports).toContain('java.time.Instant'); + + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'imports' && r.referenceName === 'my.app.db' + ); + expect(ref).toBeDefined(); + }); + + it('should resolve :as aliases in call references to qualified names', () => { + const code = `(ns my.app.core + (:require [my.app.db :as db])) + +(defn save-user [u] (db/insert! u)) +`; + const result = extractFromSource('src/my/app/core.clj', code); + const call = result.unresolvedReferences.find((r) => r.referenceKind === 'calls' && r.referenceName === 'my.app.db::insert!'); + expect(call).toBeDefined(); + }); + + it('should resolve :refer symbols in call references to qualified names', () => { + const code = `(ns my.app.core + (:require [my.app.db :refer [save!]])) + +(defn save-user [u] (save! u)) +`; + const result = extractFromSource('src/my/app/core.clj', code); + const call = result.unresolvedReferences.find((r) => r.referenceKind === 'calls' && r.referenceName === 'my.app.db::save!'); + expect(call).toBeDefined(); + }); + }); + + describe('Protocols, records, multimethods', () => { + it('should extract defprotocol with method signatures', () => { + const code = `(ns m.proto) +(defprotocol Storage + (put [this k v]) + (fetch [this k])) +`; + const result = extractFromSource('src/m/proto.clj', code); + const proto = result.nodes.find((n) => n.kind === 'protocol'); + expect(proto?.name).toBe('Storage'); + const put = result.nodes.find((n) => n.name === 'put'); + expect(put?.kind).toBe('method'); + expect(put?.qualifiedName).toBe('m.proto::Storage::put'); + }); + + it('should extract defrecord with fields, methods, implements refs, and ctor fns', () => { + const code = `(ns m.rec) +(defprotocol Storage + (put [this k v])) +(defrecord MemStore [state] + Storage + (put [_ k v] (swap! state assoc k v))) +`; + const result = extractFromSource('src/m/rec.clj', code); + const cls = result.nodes.find((n) => n.kind === 'class'); + expect(cls?.name).toBe('MemStore'); + expect(result.nodes.find((n) => n.name === 'state' && n.kind === 'field')).toBeDefined(); + expect(result.nodes.find((n) => n.name === '->MemStore' && n.kind === 'function')).toBeDefined(); + expect(result.nodes.find((n) => n.name === 'map->MemStore')).toBeDefined(); + + const impl = result.unresolvedReferences.find( + (r) => r.referenceKind === 'implements' && r.referenceName === 'Storage' + ); + expect(impl).toBeDefined(); + + const method = result.nodes.find((n) => n.name === 'put' && n.qualifiedName.includes('MemStore')); + expect(method?.kind).toBe('method'); + }); + + it('should extract defmulti/defmethod as same-named functions (overloads)', () => { + const code = `(ns m.multi) +(defmulti render :type) +(defmethod render :button [w] (str w)) +(defmethod render :input [w] (str w)) +`; + const result = extractFromSource('src/m/multi.clj', code); + const renders = result.nodes.filter((n) => n.name === 'render' && n.kind === 'function'); + expect(renders.length).toBe(3); + }); + }); + + describe('Calls and references', () => { + it('should not emit call refs for special forms or core macros', () => { + const code = `(ns m.c) +(defn f [x] + (let [y (inc x)] + (when (pos? y) + (->> y (map inc) (filter odd?))))) +`; + const result = extractFromSource('src/m/c.clj', code); + const names = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls').map((r) => r.referenceName); + expect(names).not.toContain('let'); + expect(names).not.toContain('when'); + expect(names).not.toContain('->>'); + expect(names).not.toContain('map'); + }); + + it('should emit a calls ref for a same-file fn passed to a HOF', () => { + const code = `(ns m.hof) +(defn- transform [x] x) +(defn run [xs] (map transform xs)) +`; + const result = extractFromSource('src/m/hof.clj', code); + const hof = result.unresolvedReferences.find( + (r) => r.referenceKind === 'calls' && r.referenceName === 'transform' + ); + expect(hof).toBeDefined(); + }); + + it('should emit instantiates refs for ctor interop', () => { + const code = `(ns m.inst) +(defn make [] (java.util.ArrayList.)) +(defn make2 [] (new StringBuilder)) +`; + const result = extractFromSource('src/m/inst.clj', code); + const kinds = result.unresolvedReferences.filter((r) => r.referenceKind === 'instantiates').map((r) => r.referenceName); + expect(kinds).toContain('java.util.ArrayList'); + expect(kinds).toContain('StringBuilder'); + }); + + it('should not emit calls from quoted, discarded, or rich-comment forms', () => { + const code = `(ns m.q) +(def data '(fetch-thing 1)) +#_(dropped-call 2) +(comment (scratch-call 3)) +`; + const result = extractFromSource('src/m/q.clj', code); + const names = result.unresolvedReferences.map((r) => r.referenceName); + expect(names).not.toContain('fetch-thing'); + expect(names).not.toContain('dropped-call'); + expect(names).not.toContain('scratch-call'); + }); + }); + + describe('Reader conditionals (.cljc)', () => { + it('should extract defs from both branches of #?', () => { + const code = `(ns m.x) +#?(:clj + (defn read-file [p] (slurp p)) + :cljs + (defn write-log [m] (println m))) +`; + const result = extractFromSource('src/m/x.cljc', code); + expect(result.nodes.find((n) => n.name === 'read-file')).toBeDefined(); + expect(result.nodes.find((n) => n.name === 'write-log')).toBeDefined(); + }); + + it('should extract requires inside reader conditionals', () => { + const code = `(ns m.y + (:require [m.shared :as shared] + #?(:cljs [m.dom :as dom]))) +`; + const result = extractFromSource('src/m/y.cljc', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('m.shared'); + expect(imports).toContain('m.dom'); + }); + }); +}); + +// ============================================================================= +// EDN data files (.edn — same grammar, data mode: properties + references) +// ============================================================================= + +describe('EDN Extraction', () => { + it('should detect .edn files as clojure', () => { + expect(detectLanguage('deps.edn')).toBe('clojure'); + expect(detectLanguage('resources/system.edn')).toBe('clojure'); + }); + + it('should extract top-level map keys as property nodes', () => { + const code = `{:paths ["src" "resources"] + :deps {org.clojure/clojure {:mvn/version "1.11.1"}} + :aliases {:test {:extra-paths ["test"]}}} +`; + const result = extractFromSource('deps.edn', code); + const props = result.nodes.filter((n) => n.kind === 'property').map((n) => n.name); + expect(props).toContain(':paths'); + expect(props).toContain(':deps'); + expect(props).toContain(':aliases'); + // One level only — nested keys must NOT become nodes + expect(props).not.toContain(':test'); + expect(props).not.toContain(':extra-paths'); + }); + + it('should emit references for qualified symbols in values (shadow-cljs entry points)', () => { + const code = `{:builds + {:app {:target :browser + :modules {:main {:init-fn app.core/init}}}}} +`; + const result = extractFromSource('shadow-cljs.edn', code); + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'references' && r.referenceName === 'app.core::init' + ); + expect(ref).toBeDefined(); + }); + + it('should never emit call references from EDN data', () => { + const code = `{:tasks {clean (shell "rm -rf target")} + :fixture [(make-thing 1) (make-thing 2)]} +`; + const result = extractFromSource('bb.edn', code); + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls).toEqual([]); + }); + + it('should extract qualified keyword keys (integrant system maps) with refs', () => { + const code = `{:app/server {:port 8080 :handler app.http/router} + :app/db {:uri "datomic:mem://app"}} +`; + const result = extractFromSource('resources/system.edn', code); + const props = result.nodes.filter((n) => n.kind === 'property').map((n) => n.name); + expect(props).toContain(':app/server'); + expect(props).toContain(':app/db'); + const ref = result.unresolvedReferences.find((r) => r.referenceName === 'app.http::router'); + expect(ref).toBeDefined(); + // the ref hangs off the :app/server property node + const prop = result.nodes.find((n) => n.name === ':app/server'); + expect(ref?.fromNodeId).toBe(prop?.id); + }); +}); + +// ============================================================================= +// Clojure review follow-ups: shadowing precision, interop forms, ns options +// ============================================================================= + +describe('Clojure Extraction (precision)', () => { + describe('Binding-position shadowing (no false calls)', () => { + it('should not emit a calls ref for a let binding name shadowing a same-file fn', () => { + const code = `(ns m.shadow) +(defn- helper [x] x) +(defn run [data] + (let [helper (compute data)] + (str helper))) +`; + const result = extractFromSource('src/m/shadow.clj', code); + // `(compute data)` is walked (init expr), the binding NAME `helper` is not. + const helperRefs = result.unresolvedReferences.filter((r) => r.referenceName === 'helper'); + expect(helperRefs).toEqual([]); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'compute' && r.referenceKind === 'calls') + ).toBeDefined(); + }); + + it('should not emit refs for fn params shadowing a same-file fn', () => { + const code = `(ns m.shadow2) +(defn- transform [x] x) +(defn run [xs] (map (fn [transform] (inc transform)) xs)) +`; + const result = extractFromSource('src/m/shadow2.clj', code); + expect(result.unresolvedReferences.filter((r) => r.referenceName === 'transform')).toEqual([]); + }); + + it('should still walk for/doseq modifier expressions and :let vectors', () => { + const code = `(ns m.fors) +(defn- check [x] x) +(defn run [xs] + (for [x xs + :when (check x) + :let [y (deep-init x)]] + y)) +`; + const result = extractFromSource('src/m/fors.clj', code); + const names = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls').map((r) => r.referenceName); + expect(names).toContain('check'); + expect(names).toContain('deep-init'); + }); + + it('should skip as-> and catch binding names', () => { + const code = `(ns m.asarrow) +(defn- step [x] x) +(defn run [v] + (try + (as-> v step (step step)) + (catch Exception step (str step)))) +`; + const result = extractFromSource('src/m/asarrow.clj', code); + // the body call (step step) is head-position — allowed; the binding + // names themselves (as-> 3rd element, catch 3rd element) emit nothing. + const refs = result.unresolvedReferences.filter((r) => r.referenceName === 'step'); + expect(refs.every((r) => r.referenceKind === 'calls')).toBe(true); + }); + }); + + describe('Interop precision', () => { + it('should emit references (not calls) for .-property access', () => { + const code = `(ns m.dom) +(defn read-value [el] (.-value el)) +(defn fire [el] (.focus el)) +`; + const result = extractFromSource('src/m/dom.cljs', code); + const valueRef = result.unresolvedReferences.find((r) => r.referenceName === 'value'); + expect(valueRef?.referenceKind).toBe('references'); + const focusRef = result.unresolvedReferences.find((r) => r.referenceName === 'focus'); + expect(focusRef?.referenceKind).toBe('calls'); + }); + + it('should extract definline as a function', () => { + const code = `(ns m.inline) +(definline pow2 [x] \`(* ~x ~x)) +`; + const result = extractFromSource('src/m/inline.clj', code); + expect(result.nodes.find((n) => n.name === 'pow2')?.kind).toBe('function'); + }); + }); + + describe('Require/def option coverage', () => { + it('should mark ^:private defs as private', () => { + const code = `(ns m.priv) +(def ^:private secret 42) +(defn ^:private hidden [x] x) +`; + const result = extractFromSource('src/m/priv.clj', code); + expect(result.nodes.find((n) => n.name === 'secret')?.visibility).toBe('private'); + expect(result.nodes.find((n) => n.name === 'hidden')?.visibility).toBe('private'); + }); + + it('should extract string requires (shadow-cljs npm deps)', () => { + const code = `(ns m.npm + (:require ["react" :as react] + ["@mui/material" :as mui])) +(defn use-it [] (react/useState 0)) +`; + const result = extractFromSource('src/m/npm.cljs', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('react'); + expect(imports).toContain('@mui/material'); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'react::useState') + ).toBeDefined(); + }); + + it('should expand prefix lists in :require', () => { + const code = `(ns m.prefix + (:require (my.app [db :as db] core))) +(defn save [x] (db/insert! x)) +`; + const result = extractFromSource('src/m/prefix.clj', code); + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('my.app.db'); + expect(imports).toContain('my.app.core'); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'my.app.db::insert!') + ).toBeDefined(); + }); + + it('should extract calls from letfn bodies without false refs to the local names', () => { + const code = `(ns m.letfn) +(defn- helper [x] x) +(defn run [v] + (letfn [(local-a [x] (helper x)) + (local-b [y] (local-a y))] + (local-b v))) +`; + const result = extractFromSource('src/m/letfn.clj', code); + const names = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls').map((r) => r.referenceName); + expect(names).toContain('helper'); + }); + }); + + describe('Babashka content', () => { + it('should extract symbols from .bb files', () => { + const code = `(ns tasks + (:require [babashka.process :refer [shell]])) +(defn clean [] (shell "rm -rf target")) +`; + const result = extractFromSource('tasks.bb', code); + expect(result.nodes.find((n) => n.name === 'clean')?.kind).toBe('function'); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'babashka.process::shell') + ).toBeDefined(); + }); + }); +}); + +describe('Clojure Extraction (head-position shadowing)', () => { + it('should not emit a calls edge when a let-bound local shadows a same-file fn and is called', () => { + const code = `(ns m.headshadow) +(defn- helper [x] x) +(defn run [data] + (let [helper (make-handler data)] + (helper 1))) +`; + const result = extractFromSource('src/m/headshadow.clj', code); + expect(result.unresolvedReferences.filter((r) => r.referenceName === 'helper')).toEqual([]); + // the init expr is still a real call + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'make-handler' && r.referenceKind === 'calls') + ).toBeDefined(); + }); + + it('should still emit calls for the un-shadowed name outside the binding scope', () => { + const code = `(ns m.scopeend) +(defn- helper [x] x) +(defn a [v] (let [helper inc] (helper v))) +(defn b [v] (helper v)) +`; + const result = extractFromSource('src/m/scopeend.clj', code); + const helperCalls = result.unresolvedReferences.filter( + (r) => r.referenceName === 'helper' && r.referenceKind === 'calls' + ); + expect(helperCalls.length).toBe(1); // only the one in `b` + }); +}); + +// ============================================================================= +// re-frame keyword-keyed dispatch (registrations ↔ dispatch/subscribe sites) +// ============================================================================= + +describe('Clojure Extraction (re-frame)', () => { + it('should create function nodes for registrations, named by the keyword', () => { + const code = `(ns my.app.events + (:require [re-frame.core :as rf])) +(rf/reg-event-db :todo/add (fn [db [_ t]] (conj-todo db t))) +(rf/reg-sub :todo/items (fn [db _] (:items db))) +`; + const result = extractFromSource('src/my/app/events.cljs', code); + const add = result.nodes.find((n) => n.name === ':todo/add'); + expect(add?.kind).toBe('function'); + expect(add?.signature).toBe('(reg-event-db :todo/add)'); + expect(result.nodes.find((n) => n.name === ':todo/items')).toBeDefined(); + // handler body calls attribute to the registration node + const call = result.unresolvedReferences.find((r) => r.referenceName === 'conj-todo'); + expect(call?.fromNodeId).toBe(add?.id); + // the registrar itself keeps its ordinary call ref (callers/impact on facades) + const registrar = result.unresolvedReferences.find( + (r) => r.referenceName === 're-frame.core::reg-event-db' && r.referenceKind === 'calls' + ); + expect(registrar).toBeDefined(); + expect(registrar?.fromNodeId).not.toBe(add?.id); // attributed to the enclosing scope + }); + + it('should expand :: and ::alias keywords in registrations', () => { + const code = `(ns my.app.events + (:require [re-frame.core :as rf] + [my.app.subs :as subs])) +(rf/reg-event-db ::add (fn [db _] db)) +(rf/reg-sub ::subs/items (fn [db _] db)) +`; + const result = extractFromSource('src/my/app/events.cljs', code); + expect(result.nodes.find((n) => n.name === ':my.app.events/add')).toBeDefined(); + expect(result.nodes.find((n) => n.name === ':my.app.subs/items')).toBeDefined(); + }); + + it('should emit keyword calls refs at dispatch and subscribe sites', () => { + const code = `(ns my.app.views + (:require [re-frame.core :as rf])) +(defn add-button [t] + [:button {:on-click #(rf/dispatch [:todo/add t])}]) +(defn todo-list [] + (let [items @(rf/subscribe [:todo/items])] + items)) +`; + const result = extractFromSource('src/my/app/views.cljs', code); + const dispatchRef = result.unresolvedReferences.find( + (r) => r.referenceName === ':todo/add' && r.referenceKind === 'calls' + ); + expect(dispatchRef).toBeDefined(); + expect( + result.unresolvedReferences.find((r) => r.referenceName === ':todo/items') + ).toBeDefined(); + }); + + it('should support :refer style and dispatch-sync', () => { + const code = `(ns my.app.core + (:require [re-frame.core :refer [reg-event-db dispatch-sync]])) +(reg-event-db :app/init (fn [_ _] {})) +(defn boot [] (dispatch-sync [:app/init])) +`; + const result = extractFromSource('src/my/app/core.cljs', code); + expect(result.nodes.find((n) => n.name === ':app/init')).toBeDefined(); + expect( + result.unresolvedReferences.filter((r) => r.referenceName === ':app/init').length + ).toBeGreaterThanOrEqual(1); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 're-frame.core::reg-event-db') + ).toBeDefined(); + }); + + it('should treat project facades (utils.re-frame style) as re-frame', () => { + // status-mobile fronts re-frame with its own ns: custom registrars + // (reg-root-key-sub) and `sub` for subscribe — shape-based detection + // covers them without knowing the facade. + const code = `(ns my.app.views + (:require [utils.re-frame :as rf])) +(rf/reg-root-key-sub :profile/name :profile-name) +(defn header [] (rf/sub [:profile/name])) +(defn save [] (rf/dispatch [:profile/update])) +`; + const result = extractFromSource('src/my/app/views.cljs', code); + expect(result.nodes.find((n) => n.name === ':profile/name')?.kind).toBe('function'); + expect( + result.unresolvedReferences.find((r) => r.referenceName === ':profile/name' && r.referenceKind === 'calls') + ).toBeDefined(); + expect( + result.unresolvedReferences.find((r) => r.referenceName === ':profile/update') + ).toBeDefined(); + }); + + it('should not shape-match reg-* calls without a literal keyword key', () => { + const code = `(ns my.app.other) +(reg-handler handler-map) +(reg-watch "string-key" f) +(reg-event-db dynamic-kw (fn [db _] db)) +`; + const result = extractFromSource('src/my/app/other.clj', code); + expect(result.nodes.filter((n) => n.name.startsWith(':'))).toEqual([]); + }); + + it('should skip variable event vectors (anonymous frontier)', () => { + const code = `(ns my.app.relay + (:require [re-frame.core :as rf])) +(defn relay [evt] (rf/dispatch evt)) +`; + const result = extractFromSource('src/my/app/relay.cljs', code); + const kwRefs = result.unresolvedReferences.filter((r) => r.referenceName.startsWith(':')); + expect(kwRefs).toEqual([]); + }); +}); + +// ============================================================================= +// UIx / helix (ClojureScript React wrappers — defui/defnc + $ composition) +// ============================================================================= + +describe('Clojure Extraction (UIx / helix)', () => { + it('should extract defui as component nodes', () => { + const code = `(ns my.app.ui + (:require [uix.core :refer [defui $]])) +(defui button [{:keys [on-click]}] + ($ :button {:on-click on-click})) +`; + const result = extractFromSource('src/my/app/ui.cljs', code); + const btn = result.nodes.find((n) => n.name === 'button'); + expect(btn?.kind).toBe('component'); + expect(btn?.signature).toBe('(defui ...)'); + }); + + it('should emit calls edges for $ component composition (refer style)', () => { + const code = `(ns my.app.views + (:require [uix.core :refer [defui $]] + [my.app.ui :as ui])) +(defui panel [_] ($ :aside)) +(defui toolbar [{:keys [doc]}] + ($ :div + ($ ui/button {:on-click identity}) + ($ panel {}))) +`; + const result = extractFromSource('src/my/app/views.cljs', code); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'my.app.ui::button' && r.referenceKind === 'calls') + ).toBeDefined(); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'panel' && r.referenceKind === 'calls') + ).toBeDefined(); + // DOM tags produce nothing + expect(result.unresolvedReferences.find((r) => r.referenceName === 'div')).toBeUndefined(); + }); + + it('should support aliased uix/$ and helix defnc', () => { + const code = `(ns my.app.hx + (:require [helix.core :as hx :refer [defnc]] + [my.app.widgets :as w])) +(defnc row [props] (hx/$ w/cell {:v 1})) +`; + const result = extractFromSource('src/my/app/hx.cljs', code); + expect(result.nodes.find((n) => n.name === 'row')?.kind).toBe('component'); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'my.app.widgets::cell' && r.referenceKind === 'calls') + ).toBeDefined(); + }); + + it('should not treat $ from non-uix namespaces as element creation', () => { + const code = `(ns my.app.money + (:require [my.currency :refer [$]])) +(defn price [x] ($ amount x)) +`; + const result = extractFromSource('src/my/app/money.clj', code); + // plain refer'd call, no component edge to `amount` + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'my.currency::$') + ).toBeDefined(); + expect( + result.unresolvedReferences.find((r) => r.referenceName === 'amount' && r.referenceKind === 'calls') + ).toBeUndefined(); + }); +}); diff --git a/docs/design/dynamic-dispatch-coverage-playbook.md b/docs/design/dynamic-dispatch-coverage-playbook.md index aa65398e4..aafcb180f 100644 --- a/docs/design/dynamic-dispatch-coverage-playbook.md +++ b/docs/design/dynamic-dispatch-coverage-playbook.md @@ -250,6 +250,7 @@ Status legend: ✅ done+validated · 🔬 hole identified · ⬜ not started. | C/C++ | C++ vtables / inheritance | virtual call → override; general direct dispatch | S + X | ✅ **general dispatch strong** (redis C **29k** cross-file calls / leveldb C++ **1.4k**) + **C++ inheritance extraction fix** (`base_class_clause` was unhandled, so C++ extends edges were missing — leveldb **219→298**) + **cpp-override synthesizer** (base virtual method → subclass override, gated to C++, capped — leveldb 12 precise: `Iterator::Next→MergingIterator`). 🔬 C callback structs (`s->fn()` → 422-way fan-out, too noisy to synthesize) + C++ pure-virtual base methods (`virtual void f()=0;` declarations aren't extracted as nodes, so those overrides can't bridge) | | Dart | Flutter | setState → build; build → child widgets | S + X | ✅ **setState→build synthesizer** (Dart analog of react-render: a State method whose body calls `setState(` → `build`) gated to `.dart` + **foundational Dart method-range fix** — Dart models a method body as a *sibling* of the signature, so method nodes were signature-only (`end==start`); now `endLine` spans the body (required for ALL body analysis: callees, context slices, the synthesizer's body scan). counter `initState→build`, books `build→BookDetail/BookForm`; widget composition already static (compass_app `build→ErrorIndicator/HomeButton`). Controls unchanged (excalidraw 9,290 / django 302 — the range fix only extends sibling-body grammars). 🔬 MVVM Command/ChangeNotifier dispatch (compass_app — no setState) + `Navigator.push(MaterialPageRoute(builder:))` nav routes | | Lua / Luau | Neovim / Roblox | module dispatch (require→mod, mod.fn); event/callback | — | ✅ **already covered for the dominant flow (measure-first, no code change)** — Neovim is module-heavy (`require('x')` + `x.fn()`), and the general import + name resolution already handles it: telescope.nvim **220 imports + 335 cross-file `mod.fn` calls**, traces end-to-end (`map_entries ← init.lua → get_current_picker (state.lua)`). Luau instance-path `require(game:GetService(...))` handled by the extractor. 🔬 event-callback registration (`vim.keymap.set(…, fn)`, autocmd `callback=`, Roblox `signal:Connect(fn)`) is predominantly INLINE anonymous closures (corpus ~12 inline vs ~2 named) — the anonymous-handler frontier; named handlers too rare to justify a synthesizer | +| Clojure / ClojureScript | namespace dispatch / protocols / re-frame / integrant | `:require` alias → cross-ns call; editor action → persisted state (logseq); API query → QP middleware → driver (metabase) | — (extraction-level) | ✅ **dominant flow covered at the extraction layer, no synthesizer needed** — aliased calls (`(db/insert! x)`) emit `full.ns::name` and resolve via the qualified-name matcher; `:refer`'d and same-file-HOF calls link too; protocol methods bridge through bare-name interop calls (`(.put store ...)`); local-shadowing suppression keeps precision (ring −207 false call edges). Validated S/M/L: ring 84f/2.5k edges · logseq 1,312f/92k (A/B **155→4 tool calls, 0 Read/Grep**, 3.6× faster, 2.5× cheaper) · metabase 15,374f/623k (A/B 24→14 calls, 16→6 Reads, 1.7× faster). ring A/B n=2: with 4–5 calls / 0–1 Read / 37–55s / ~$0.53 vs without 7 calls / 3 Reads / 44s / ~$0.28 — wall-clock parity within run noise, fewer calls+Reads, but ~1.8× cost (the known small-repo explore over-return pattern, not Clojure-specific). ✅ **re-frame keyword dispatch (extraction-only, no synthesizer)** — keywords are globally unique strings, so each `reg-*` registration becomes a function node NAMED by its alias-expanded keyword (`::subs/items` → `:my.app.subs/items`) and literal `dispatch [:k …]`/`subscribe [:k]` sites emit same-named `calls` refs that the exact-name matcher links. Detection is SHAPE-based (`/^reg-[a-z-]+$/` head + literal kwd first arg; `dispatch`/`dispatch-sync`/`subscribe`/`sub` + kwd-led vector) because real apps front re-frame with project facades — status-mobile's `utils.re-frame` covers 512 files with custom registrars (`reg-root-key-sub`) and `sub`; ns-gating on `re-frame.core` found only 119 edges, shape-based finds **2,323** (1,635 registrations, +911 nodes, 10 kwd collisions). Precision is structural: an edge needs BOTH a registration node and a dispatch ref with the same keyword, so stray shape-matches resolve to nothing. todomvc (re-frame repo) 269 regs / 194 edges · athens 292/552 · status-mobile 1,635/2,323; `codegraph_node :profile/logout` returns the handler + all 13 dispatch sites (views + chained events) in one call. Agent A/B on status-mobile's logout flow (n=2): with 12–14 calls / 3–6 Reads / 72–88s vs without 25–33 calls / 10–15 Reads / ~122s — ~1.7× faster, half the calls+Reads, ~1.9× cost (large-repo explore payloads); residual Reads are the agent chasing fx-chain hops explore didn't lead with. **Explore-side Clojure query support** (validated on a 2k-file cljs monorepo): symbol-token charset widened to the Lisp alphabet (kebab/`?`/`!`, `alias/name`, `:keyword` — previously NO Clojure symbol passed and the flow builder never ran), bare tokens resolve against module last-segments (ns names are how Clojure agents reference subsystems), and ambiguous tokens prefer candidates co-located with anchor dirs (monorepo cross-subsystem noise). Controls held (Alamofire multi-phase, metabase TS). **UIx/helix composition**: `defui`/`defnc` → `component` nodes and the `$` element macro → calls edges to the composed component (gated on `$` resolving to uix.core/helix.core in the require table — too short to shape-match); pitch-io/uix repo: 131 components, composition edges across its benchmark suites. 🔬 status-mobile's legacy `rf/defn {:events [:k]}` macro (kwd lives in an attr-map); fx-map keys → `reg-fx` handlers; handler→sub app-db data-flow (which subs recompute on a db write); integrant `ig/init-key` system-key → defmethod; multimethod dispatch-value → defmethod edges; `extend-protocol`/`extend-type` implements edges; reitit route DATA | | Scala | Play / Akka | request → conf/routes → controller action | R + X | ✅ **Play `conf/routes` → controller** — the extensionless `conf/routes` wasn't indexed; added narrow file-walk opt-in (`isPlayRoutesFile`) + a Play resolver parsing `METHOD /path Controller.action(args)` → the action method (computer-database **0→8, 7/8**; starter 0→4, 3/4 — the unresolved are Play's framework `Assets` controller, external). Scala general controller→DAO dispatch already resolves. No-regression: the file-walk change only ADDS Play routes files (excalidraw 9,290 / suite 800 unchanged). 🔬 SIRD programmatic router (`-> /v1 Router` include + `case GET(p"/x")` in code) + Akka actor `receive`/`Behaviors.receiveMessage` message→handler | | Swift × Objective-C | mixed iOS apps | Swift `obj.foo(bar:)` → ObjC `-fooWithBar:`; ObjC `[obj fooWithBar:]` → Swift `@objc func foo(bar:)` | R | ✅ **Swift↔ObjC cross-language bridge** — `frameworks/swift-objc.ts` implements Apple's `@objc` auto-bridging name math (incl. init forms `initWith:`, property getter+setter pairs, `@objc(custom:)` override) and the reverse direction strips Cocoa preposition prefixes (`With`/`For`/`By`/`In`/`On`/`At`/`From`/`To`/`Of`/`As`) to derive Swift base-name candidates. Validated on Charts S **28/1 obj→swift / swift→objc**, realm-swift M **36/1185**, wikipedia-ios L **52/983**. Genericname blocklist (`init`, `description`, `count`, …) keeps precision. Confidence 0.6 (name-match's 1.0 wins ties) — bridge only fires when name-match has no result. 🔬 Swift generics over ObjC protocols, Swift extensions on ObjC classes (silently miss; matches Java/Kotlin generics frontier) | | JS × native | React Native legacy bridge | JS `NativeModules.X.fn(...)` → ObjC `RCT_EXPORT_METHOD` / Java/Kotlin `@ReactMethod` | R | ✅ **RN legacy bridge** — `frameworks/react-native.ts` parses `RCT_EXPORT_MODULE` (default-name from `RCT`-prefix-stripped class name) + `RCT_EXPORT_METHOD(selector:(...))` + `RCT_REMAP_METHOD(jsName, selector)` on the ObjC side and `@ReactMethod` + `getName()` literal on Java/Kotlin. AsyncStorage S **8/8 precise** (`setItem`→`legacy_multiSet`, etc.), react-native-firebase L **18 precise after `RCTEventEmitter` built-in blocklist** (initial 78 included 60 `addListener:`/`remove:` false positives — every emitter subclass declares those via `RCT_EXPORT_METHOD`, JS callers route through the `NativeEventEmitter` abstraction not the native method directly). 🔬 dynamic bridge keys (`NativeModules[someVar]`) — literal-key only | diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index 576845e20..99479867d 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -38,6 +38,7 @@ const WASM_GRAMMAR_FILES: Record = { lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', + clojure: 'tree-sitter-clojure.wasm', }; /** @@ -99,6 +100,21 @@ export const EXTENSION_MAP: Record = { '.sc': 'scala', '.lua': 'lua', '.luau': 'luau', + // Clojure family: one language token for all dialects — .cljc files are + // shared between Clojure and ClojureScript, so splitting the dialects into + // separate Language values would break cross-dialect reference resolution + // (matchers gate on language equality). .bb is Babashka (plain Clojure). + '.clj': 'clojure', + '.cljs': 'clojure', + '.cljc': 'clojure', + // .bb is also BitBake (Yocto) — accepted collision: Clojure tooling treats + // .bb as Babashka, BitBake recipes parse as near-empty lexical trees + // (harmless), and Yocto + CodeGraph overlap is negligible. + '.bb': 'clojure', + // EDN config/data files (deps.edn, bb.edn, shadow-cljs.edn, system configs) + // parse with the same grammar but extract in data mode: top-level keys + // become property nodes and qualified symbols become references — no calls. + '.edn': 'clojure', '.m': 'objc', '.mm': 'objc', // XML: file-level tracking; the MyBatis extractor matches `` @@ -184,8 +200,10 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise; + /** :refer'd symbol → full namespace name */ + refers: Map; + /** + * Lazy index of same-file function/method names for HOF detection in + * handleSymRef. Rebuilt only when ctx.nodes has grown — without it every + * bare symbol pays a linear scan over all nodes extracted so far + * (O(symbols × nodes) on god-files). + */ + fnNames?: { len: number; names: Set }; + /** + * Stack of local-binding frames (let/loop/for vecs, fn/defn params, letfn + * names, as->/catch bindings). A bare symbol matching ANY frame is a local + * — shadowing a same-file fn name in a `let` is idiomatic Clojure, and + * without this both the shadowed usages and shadowed head-position calls + * would emit false `calls` edges to the fn. + */ + locals: Set[]; +} + +function isLocal(name: string, state: NsState): boolean { + for (let i = state.locals.length - 1; i >= 0; i--) { + if (state.locals[i]!.has(name)) return true; + } + return false; +} + +/** + * Collect the names a binding TARGET introduces: a plain symbol, or every + * unqualified symbol inside a destructuring vec/map (`{:keys [a b] :as all}`). + * Keyword markers (`:keys`, `:as`, `:or`, map keys) are skipped; `&` is not a + * name. Over-collects symbols inside `:or` default expressions — conservative + * in the right direction (suppresses references rather than fabricating them). + */ +function collectBindingNames(target: SyntaxNode, source: string, into: Set): void { + if (target.type === 'sym_lit') { + const { ns, name } = symParts(target, source); + if (!ns && name && name !== '&') into.add(name); + return; + } + if (target.type === 'vec_lit' || target.type === 'map_lit' || target.type === 'ns_map_lit') { + for (const child of valueChildren(target)) { + if (child.type === 'kwd_lit') continue; + collectBindingNames(child, source, into); + } + } +} + +// Keyed weakly by the parse Tree object — one entry per in-flight file parse, +// reclaimed when the extractor deletes the tree. +const nsStateByTree = new WeakMap(); + +function getNsState(node: SyntaxNode): NsState { + let state = nsStateByTree.get(node.tree); + if (!state) { + state = { aliases: new Map(), refers: new Map(), locals: [] }; + nsStateByTree.set(node.tree, state); + } + return state; +} + +/** Split a sym_lit into its optional namespace part and name part. */ +function symParts(sym: SyntaxNode, source: string): { ns?: string; name: string } { + const nsNode = getChildByField(sym, 'namespace'); + const nameNode = getChildByField(sym, 'name'); + return { + ns: nsNode ? getNodeText(nsNode, source) : undefined, + name: nameNode ? getNodeText(nameNode, source) : getNodeText(sym, source), + }; +} + +/** Named children minus comments/discards — the actual value forms. */ +function valueChildren(node: SyntaxNode): SyntaxNode[] { + return node.namedChildren.filter( + (c): c is SyntaxNode => !!c && c.type !== 'comment' && c.type !== 'dis_expr' + ); +} + +/** Does a sym_lit carry `^:private` (or `{:private true}`) metadata? */ +function hasPrivateMeta(sym: SyntaxNode, source: string): boolean { + for (let i = 0; i < sym.namedChildCount; i++) { + if (sym.fieldNameForNamedChild(i) !== 'meta') continue; + const meta = sym.namedChild(i); + if (meta && /:private\b/.test(getNodeText(meta, source))) return true; + } + return false; +} + +/** Strip surrounding quotes from a str_lit's text. */ +function stringContent(node: SyntaxNode, source: string): string { + return getNodeText(node, source).replace(/^"|"$/g, ''); +} + +const LITERAL_TYPES = new Set([ + 'num_lit', 'str_lit', 'kwd_lit', 'bool_lit', 'nil_lit', 'char_lit', 'regex_lit', +]); + +// Container forms whose children are evaluated — walk through them. +const WALK_THROUGH_TYPES = new Set([ + 'vec_lit', 'map_lit', 'set_lit', 'ns_map_lit', + 'read_cond_lit', 'splicing_read_cond_lit', 'syn_quoting_lit', + 'unquoting_lit', 'unquote_splicing_lit', 'derefing_lit', + 'tagged_or_ctor_lit', 'var_quoting_lit', 'evaling_lit', +]); + +// Special forms + ubiquitous clojure.core macros/functions. These never get a +// `calls` reference: the real target (clojure.core) is never in the project +// graph, so emitting them only risks false edges to same-named project +// symbols. Children are still walked, so calls *inside* them are captured. +const CORE_FORMS = new Set([ + // special forms / core macros + 'def', 'if', 'do', 'let', 'let*', 'quote', 'var', 'fn', 'fn*', 'loop', 'loop*', + 'recur', 'throw', 'try', 'catch', 'finally', 'set!', 'new', '.', '..', + 'monitor-enter', 'monitor-exit', 'in-ns', 'import', 'require', 'use', 'refer', + 'when', 'when-not', 'when-let', 'when-some', 'when-first', 'if-let', 'if-not', + 'if-some', 'cond', 'condp', 'case', 'and', 'or', 'not', + '->', '->>', 'as->', 'some->', 'some->>', 'cond->', 'cond->>', 'doto', + 'doseq', 'dotimes', 'for', 'while', 'binding', 'with-open', 'with-redefs', + 'with-local-vars', 'with-bindings', 'with-out-str', 'with-in-str', 'with-meta', + 'delay', 'lazy-seq', 'lazy-cat', 'future', 'promise', 'locking', 'io!', 'sync', + // NOTE: definline is deliberately NOT here — it defines a function, and the + // def-macro heuristic in handleList turns it into a function node. + 'dosync', 'declare', 'assert', 'comment', 'gen-class', 'this-as', + 'goog-define', 'specify', 'specify!', + // ubiquitous core functions + 'map', 'filter', 'remove', 'reduce', 'reduce-kv', 'apply', 'str', 'pr', 'prn', + 'println', 'print', 'printf', 'pr-str', 'prn-str', 'format', 'get', 'get-in', + 'assoc', 'assoc-in', 'update', 'update-in', 'dissoc', 'merge', 'merge-with', + 'conj', 'cons', 'concat', 'into', 'vec', 'vector', 'list', 'hash-map', + 'hash-set', 'set', 'sorted-map', 'sorted-set', 'array-map', 'first', 'second', + 'ffirst', 'rest', 'next', 'nnext', 'nth', 'last', 'butlast', 'take', + 'take-while', 'take-last', 'take-nth', 'drop', 'drop-while', 'drop-last', + 'count', 'empty', 'empty?', 'seq', 'not-empty', 'keys', 'vals', 'contains?', + 'some', 'every?', 'not-any?', 'not-every?', 'filterv', 'mapv', 'keep', + 'keep-indexed', 'map-indexed', 'mapcat', 'partition', 'partition-all', + 'partition-by', 'group-by', 'frequencies', 'sort', 'sort-by', 'reverse', + 'distinct', 'dedupe', 'interleave', 'interpose', 'flatten', 'zipmap', 'range', + 'repeat', 'repeatedly', 'iterate', 'cycle', 'identity', 'constantly', 'comp', + 'partial', 'juxt', 'complement', 'fnil', 'memoize', 'trampoline', + '=', 'not=', '==', '<', '>', '<=', '>=', '+', '-', '*', '/', '+\'', '-\'', '*\'', + 'quot', 'rem', 'mod', 'inc', 'dec', 'inc\'', 'dec\'', 'max', 'min', 'abs', + 'zero?', 'pos?', 'neg?', 'even?', 'odd?', 'number?', 'string?', 'keyword?', + 'symbol?', 'map?', 'vector?', 'list?', 'set?', 'seq?', 'coll?', 'fn?', 'ifn?', + 'nil?', 'true?', 'false?', 'boolean', 'some?', 'any?', 'instance?', + 'satisfies?', 'isa?', 'type', 'class', 'name', 'namespace', 'keyword', + 'symbol', 'gensym', 'int', 'long', 'double', 'float', 'bigdec', 'bigint', + 'num', 'rand', 'rand-int', 'rand-nth', 'shuffle', 'atom', 'swap!', + 'swap-vals!', 'reset!', 'reset-vals!', 'compare-and-set!', 'add-watch', + 'remove-watch', 'agent', 'send', 'send-off', 'await', 'alter', 'alter-var-root', + 'commute', 'ref', 'ref-set', 'deref', 'intern', 'resolve', 'requiring-resolve', + 'find-var', 'meta', 'vary-meta', 'alter-meta!', 'reduced', 'realized?', 'force', + 'ex-info', 'ex-data', 'ex-message', 'ex-cause', 'slurp', 'spit', 'read-string', + 'get-method', 'methods', 'prefer-method', 'remove-method', 'derive', 'underive', + 'make-hierarchy', 'boolean?', 'char?', 'double?', 'float?', 'int?', 'integer?', + 'nat-int?', 'pos-int?', 'neg-int?', 'rational?', 'ratio?', 'decimal?', 'var?', + 'volatile!', 'vswap!', 'vreset!', 'tap>', 'add-tap', 'remove-tap', 'run!', + 'doall', 'dorun', 'nthnext', 'nthrest', 'split-at', 'split-with', 'subvec', + 'subs', 're-find', 're-matches', 're-seq', 're-pattern', 'peek', 'pop', + 'select-keys', 'update-keys', 'update-vals', 'min-key', 'max-key', 'key', + 'val', 'find', 'line-seq', 'file-seq', 'tree-seq', 'xml-seq', 'compare', + 'hash', 'identical?', 'time', 'identity', 'random-uuid', 'parse-long', + 'parse-double', 'parse-boolean', 'parse-uuid', 'char', 'int-array', + 'long-array', 'object-array', 'to-array', 'into-array', 'aget', 'aset', + 'alength', 'aclone', 'amap', 'areduce', 'make-array', +]); + +/** Emit one unresolved reference from the current scope. */ +function emitRef( + ctx: ExtractorContext, + node: SyntaxNode, + referenceName: string, + referenceKind: 'calls' | 'references' | 'instantiates' | 'implements' | 'imports' +): void { + const fromNodeId = ctx.nodeStack[ctx.nodeStack.length - 1]; + if (!fromNodeId || !referenceName) return; + ctx.addUnresolvedReference({ + fromNodeId, + referenceName, + referenceKind, + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); +} + +/** + * Reference name for a namespaced symbol: resolve the alias through the + * file's `:require` table to `full.ns::name` (exact qualifiedName match), or + * fall back to interop/foreign-namespace forms. + */ +function qualifiedRefName(ns: string, name: string, state: NsState): string { + const full = state.aliases.get(ns); + if (full) return `${full}::${name}`; + if (ns.includes('.')) return `${ns}::${name}`; // direct fully-qualified usage + if (/^[A-Z]/.test(ns)) return `${ns}.${name}`; // Class/staticMethod interop + return name; // unknown lowercase alias — fall back to bare name matching +} + +/** Walk any evaluated (non-list) form for references; lists go to handleList. */ +function walkForm(node: SyntaxNode, ctx: ExtractorContext): void { + const t = node.type; + if (t === 'list_lit') { + handleList(node, ctx); + } else if (t === 'anon_fn_lit') { + // `#(f x)` IS the call form — the grammar puts head + args directly under + // anon_fn_lit, no inner list_lit. Route through handleList so the call is + // extracted (`%`/`%1` arg symbols are 1-char and skipped by handleSymRef). + handleList(node, ctx); + } else if (t === 'sym_lit') { + handleSymRef(node, ctx); + } else if (WALK_THROUGH_TYPES.has(t)) { + for (const child of valueChildren(node)) walkForm(child, ctx); + } + // quoting_lit / dis_expr / literals: not evaluated as code — skip +} + +/** + * A symbol in non-head (argument) position. Emit a reference only in the + * high-precision cases: a namespaced symbol (clearly a var usage), a + * `:refer`'d symbol, or a symbol naming a function already defined in this + * file (the common private-helper-passed-to-HOF case — Clojure is + * define-before-use, so the node already exists). Bare locals never match. + */ +function handleSymRef(sym: SyntaxNode, ctx: ExtractorContext): void { + const state = getNsState(sym); + const { ns, name } = symParts(sym, ctx.source); + if (!name || name.length <= 1) return; + + if (ns) { + emitRef(ctx, sym, qualifiedRefName(ns, name, state), 'references'); + return; + } + if (CORE_FORMS.has(name)) return; + if (isLocal(name, state)) return; // bound by an enclosing let/fn/loop — not a var usage + const referNs = state.refers.get(name); + if (referNs) { + emitRef(ctx, sym, `${referNs}::${name}`, 'references'); + return; + } + // Same-file function passed as a value — a higher-order call. + let cache = state.fnNames; + if (!cache || cache.len !== ctx.nodes.length) { + const names = new Set(); + for (const n of ctx.nodes) { + if (n.kind === 'function' || n.kind === 'method') names.add(n.name); + } + cache = { len: ctx.nodes.length, names }; + state.fnNames = cache; + } + if (cache.names.has(name)) emitRef(ctx, sym, name, 'calls'); +} + +/** + * Core forms whose second element is a binding VECTOR: `[name expr name + * expr ...]`. Binding names introduce locals — they are never usages, and + * shadowing a same-file fn name in a `let` is idiomatic Clojure, so walking + * them through handleSymRef would emit false `calls` edges. + */ +const BINDING_FORMS = new Set([ + 'let', 'let*', 'loop', 'loop*', 'binding', 'for', 'doseq', 'dotimes', + 'when-let', 'if-let', 'when-some', 'if-some', 'when-first', + 'with-open', 'with-redefs', 'with-local-vars', +]); + +/** + * Process the pairs of a binding vector into `frame`: even positions are + * binding targets (their names join the frame AFTER their init is walked — + * `let` is sequential), odd positions are init expressions. `for`/`doseq` + * modifiers keep the pairing: `:when expr` / `:while expr` walk the expr, + * `:let [..]` recurses into the nested binding vector. + */ +function processBindingPairs( + vec: SyntaxNode, + ctx: ExtractorContext, + frame: Set +): void { + const kids = valueChildren(vec); + for (let i = 0; i + 1 < kids.length; i += 2) { + const target = kids[i]!; + const init = kids[i + 1]!; + if (target.type === 'kwd_lit') { + if (getNodeText(target, ctx.source) === ':let' && init.type === 'vec_lit') { + processBindingPairs(init, ctx, frame); + } else { + walkForm(init, ctx); // :when / :while expressions + } + continue; + } + walkForm(init, ctx); + collectBindingNames(target, ctx.source, frame); + } +} + +/** + * `(let [name expr ...] body)` and friends — bind the vector's names for the + * duration of the body so shadowed usages don't emit references. + */ +function handleBindingForm(kids: SyntaxNode[], ctx: ExtractorContext, state: NsState): void { + const frame = new Set(); + state.locals.push(frame); + processBindingPairs(kids[1]!, ctx, frame); + for (const kid of kids.slice(2)) walkForm(kid, ctx); + state.locals.pop(); +} + +/** + * `(fn name? [params] body)` / `(fn name? ([a] ...) ([a b] ...))` — the + * optional self-name and the param vectors are bindings: they join a locals + * frame around the bodies instead of being walked as usages. + */ +function walkFnForm(kids: SyntaxNode[], ctx: ExtractorContext, state: NsState): void { + const frame = new Set(); + state.locals.push(frame); + let i = 1; + if (kids[i]?.type === 'sym_lit') { + collectBindingNames(kids[i]!, ctx.source, frame); + i++; + } + if (kids[i]?.type === 'vec_lit') { + collectBindingNames(kids[i]!, ctx.source, frame); + for (const form of kids.slice(i + 1)) walkForm(form, ctx); + } else { + for (const arity of kids.slice(i)) { + if (arity.type !== 'list_lit') { + walkForm(arity, ctx); + continue; + } + const aKids = valueChildren(arity); + if (aKids[0]?.type === 'vec_lit') collectBindingNames(aKids[0]!, ctx.source, frame); + for (const form of aKids.slice(aKids[0]?.type === 'vec_lit' ? 1 : 0)) { + walkForm(form, ctx); + } + } + } + state.locals.pop(); +} + +/** Push a frame of param names around a body walk (defn arities, method impls). */ +function walkBodyWithParams( + params: SyntaxNode | undefined, + body: SyntaxNode[], + ctx: ExtractorContext, + state: NsState +): void { + const frame = new Set(); + if (params) collectBindingNames(params, ctx.source, frame); + state.locals.push(frame); + for (const form of body) walkForm(form, ctx); + state.locals.pop(); +} + +// --------------------------------------------------------------------------- +// re-frame keyword-keyed dispatch +// --------------------------------------------------------------------------- +// +// re-frame routes everything through keyword-keyed registries at runtime — +// `(reg-event-db :todo/add handler)` … `(dispatch [:todo/add x])` — so the +// flow has zero static edges. Keywords are globally unique strings, so the +// bridge is extraction-only: each registration becomes a function node NAMED +// by its (alias-expanded) keyword, each literal dispatch/subscribe site emits +// a `calls` reference with the same name, and the existing exact-name matcher +// links them. +// +// Detection is SHAPE-based, not require-gated: real apps wrap re-frame in a +// project facade (status-mobile's `utils.re-frame` fronts 512 files, with +// custom registrars like `reg-root-key-sub` and `sub` for subscribe), so +// gating on `re-frame.core` in the require table misses most call sites. +// The shape is distinctive — a `reg-*` head with a literal keyword first arg +// is re-frame-family vocabulary — and precision is enforced structurally: an +// edge only materializes when a registration node AND a dispatch ref carry +// the exact same keyword, so a stray shape-match in a non-re-frame app +// resolves to nothing and is dropped. (The node side of a stray match — a +// spurious function node named `:kwd` — is accepted: it is inert without a +// same-keyword dispatch site, and the registrar call itself still gets its +// ordinary call ref, so nothing is lost either way.) + +const RE_FRAME_REG_SHAPE = /^reg-[a-z-]+$/; +const RE_FRAME_DISPATCH_FORMS = new Set(['dispatch', 'dispatch-sync', 'subscribe', 'sub']); + +// --------------------------------------------------------------------------- +// UIx / helix (ClojureScript React wrappers) +// --------------------------------------------------------------------------- +// +// Components are defined with a def-macro (`defui` in UIx, `defnc` in helix) +// and composed with the `$` element macro: `($ ui/button {:on-click f} ...)`. +// `$` is the entire composition mechanism, so the component argument gets a +// real `calls` edge (a render IS a call — same reasoning as the React JSX +// child edges), not just an argument-position reference. Gated on `$` +// actually resolving to uix.core / helix.core in the file's require table — +// `$` is too short a name to shape-match. + +const UIX_CORE_NAMESPACES = new Set(['uix.core', 'helix.core']); +const UIX_COMPONENT_MACROS = new Set(['defui', 'defnc']); + +/** `($ ui/button {...} child)` — emit a calls ref to the component (sym args only; `:div`/`:<>` keywords are DOM tags). */ +function emitUixElementRef(kids: SyntaxNode[], ctx: ExtractorContext, state: NsState): void { + const comp = kids[1]; + if (!comp || comp.type !== 'sym_lit') return; + const { ns, name } = symParts(comp, ctx.source); + if (!name || isLocal(name, state)) return; + if (ns) { + emitRef(ctx, comp, qualifiedRefName(ns, name, state), 'calls'); + return; + } + const referNs = state.refers.get(name); + emitRef(ctx, comp, referNs ? `${referNs}::${name}` : name, 'calls'); +} + +/** + * Expand a keyword literal to its canonical `:full.ns/name` string: + * `:todo/add` as written, `::add` → `:/add`, `::subs/items` → + * `:/items`. The `::` auto-resolve marker only exists in + * the raw text — the grammar's ns/name fields don't carry it. + */ +function expandKeyword(kwd: SyntaxNode, ctx: ExtractorContext, state: NsState): string { + const raw = getNodeText(kwd, ctx.source); + const nsNode = getChildByField(kwd, 'namespace'); + const nameNode = getChildByField(kwd, 'name'); + const name = nameNode ? getNodeText(nameNode, ctx.source) : raw.replace(/^:+/, ''); + const ns = nsNode ? getNodeText(nsNode, ctx.source) : undefined; + if (raw.startsWith('::')) { + if (ns) return `:${state.aliases.get(ns) ?? ns}/${name}`; + return state.nsName ? `:${state.nsName}/${name}` : `:${name}`; + } + return ns ? `:${ns}/${name}` : `:${name}`; +} + +/** + * `(reg-event-db :todo/add (fn [db v] ...))` — the registration becomes a + * function node named by the keyword, and the handler body walks under it so + * its calls attribute to the event, not the file. + */ +function handleReframeRegistration( + list: SyntaxNode, + kids: SyntaxNode[], + ctx: ExtractorContext, + state: NsState, + regName: string +): void { + const kwd = kids[1]; + if (!kwd || kwd.type !== 'kwd_lit') { + // Dynamic registration key — nothing to name; walk for calls only. + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + const keyword = expandKeyword(kwd, ctx, state); + const regNode = ctx.createNode('function', keyword, list, { + signature: `(${regName} ${getNodeText(kwd, ctx.source)})`, + isExported: true, + }); + if (regNode) { + ctx.pushScope(regNode.id); + for (const kid of kids.slice(2)) walkForm(kid, ctx); + ctx.popScope(); + } else { + for (const kid of kids.slice(2)) walkForm(kid, ctx); + } +} + +/** + * `(dispatch [:todo/add x])` / `(subscribe [:todo/items])` — emit a `calls` + * reference named by the literal event keyword so it links to the + * registration node. Variable event vectors (`(dispatch evt)`) stay + * unlinked — the anonymous frontier. + */ +function emitReframeDispatchRef(kids: SyntaxNode[], ctx: ExtractorContext, state: NsState): void { + const vec = kids[1]; + if (!vec || vec.type !== 'vec_lit') return; + const kwd = valueChildren(vec)[0]; + if (!kwd || kwd.type !== 'kwd_lit') return; + emitRef(ctx, vec, expandKeyword(kwd, ctx, state), 'calls'); +} + +/** Dispatch a list form by its head symbol. */ +function handleList(list: SyntaxNode, ctx: ExtractorContext): void { + const kids = valueChildren(list); + const head = kids[0]; + if (!head) return; + + // `((make-handler) req)` or `(:kwd m)` — no callable name; walk everything. + if (head.type !== 'sym_lit') { + for (const kid of kids) walkForm(kid, ctx); + return; + } + + const state = getNsState(list); + const { ns, name } = symParts(head, ctx.source); + + if (!ns) { + switch (name) { + case 'ns': + handleNs(list, kids, ctx, state); + return; + case 'comment': // rich-comment block — never code that runs + case 'quote': + case 'declare': + return; + case 'defn': + case 'defn-': + case 'defmacro': + handleDefn(list, kids, ctx, name === 'defn-'); + return; + case 'def': + case 'defonce': + handleDef(list, kids, ctx); + return; + case 'defprotocol': + handleProtocol(list, kids, ctx, 'protocol'); + return; + case 'definterface': + handleProtocol(list, kids, ctx, 'interface'); + return; + case 'defrecord': + case 'deftype': + handleRecord(list, kids, ctx, name === 'defrecord'); + return; + case 'defmulti': + handleDefmulti(list, kids, ctx); + return; + case 'defmethod': + handleDefmethod(list, kids, ctx); + return; + case 'reify': + case 'proxy': + case 'extend-protocol': + case 'extend-type': + case 'extend': + case 'specify': + case 'specify!': + handleInlineImpl(kids, ctx); + return; + case 'letfn': + handleLetfn(kids, ctx, state); + return; + case 'new': { + // (new Foo args) + const cls = kids[1]; + if (cls?.type === 'sym_lit') { + emitRef(ctx, list, symParts(cls, ctx.source).name, 'instantiates'); + } + for (const kid of kids.slice(2)) walkForm(kid, ctx); + return; + } + } + + // (.method obj args) — interop / protocol method call by bare name. + // (.-property obj) is a ClojureScript property READ, not a call. + if (name.startsWith('.') && name.length > 1 && name !== '..') { + const isPropertyAccess = name.startsWith('.-'); + emitRef(ctx, list, name.replace(/^\.-?/, ''), isPropertyAccess ? 'references' : 'calls'); + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + // (Foo. args) — constructor call. + if (name.endsWith('.') && name.length > 1) { + emitRef(ctx, list, name.slice(0, -1), 'instantiates'); + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + if (CORE_FORMS.has(name)) { + // Binding forms: names join a locals frame, init exprs + body walked. + if (BINDING_FORMS.has(name) && kids[1]?.type === 'vec_lit') { + handleBindingForm(kids, ctx, state); + return; + } + // fn literals: self-name + param vectors are bindings, not usages. + if (name === 'fn' || name === 'fn*') { + walkFnForm(kids, ctx, state); + return; + } + // (as-> expr name forms...) / (catch ExClass e body) — kids[2] is a + // binding name scoped over the remaining forms. + if (name === 'as->' || name === 'catch') { + if (kids[1]) walkForm(kids[1]!, ctx); + const frame = new Set(); + if (kids[2]) collectBindingNames(kids[2]!, ctx.source, frame); + state.locals.push(frame); + for (const kid of kids.slice(3)) walkForm(kid, ctx); + state.locals.pop(); + return; + } + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + + // Library def-macros: `(defroutes app-routes ...)`, `(deftest x ...)`, + // `(defstate db ...)` — anything def-shaped whose first arg is a symbol + // defines that symbol. Without this, the var never becomes a node and + // every call inside the body attributes to the file instead. + // UIx `defui` / helix `defnc` define React components — kind 'component' + // (same modeling as Svelte/Vue components). + if (/^def[a-z-]*$/.test(name) && name !== 'default' && name !== 'defer') { + const defSym = kids[1]; + if (defSym?.type === 'sym_lit') { + const kind = UIX_COMPONENT_MACROS.has(name) ? 'component' : 'function'; + const defNode = ctx.createNode(kind, symParts(defSym, ctx.source).name, list, { + signature: `(${name} ...)`, + isExported: !hasPrivateMeta(defSym, ctx.source), + }); + if (defNode) { + ctx.pushScope(defNode.id); + for (const kid of kids.slice(2)) walkForm(kid, ctx); + ctx.popScope(); + return; + } + } + } + + // A locally-bound head — `(let [helper (mk)] (helper 1))` — calls the + // LOCAL, not the same-named var; the target is unknowable statically. + if (isLocal(name, state)) { + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + + // re-frame shapes (see the block comment above RE_FRAME_REG_SHAPE). + if (RE_FRAME_REG_SHAPE.test(name) && kids[1]?.type === 'kwd_lit' && kids.length >= 3) { + // The registrar itself is still called — keep its ordinary call ref so + // "who calls reg-sub" / impact on a project facade sees every + // registration site. + const regReferNs = state.refers.get(name); + emitRef(ctx, list, regReferNs ? `${regReferNs}::${name}` : name, 'calls'); + handleReframeRegistration(list, kids, ctx, state, name); + return; + } + if (RE_FRAME_DISPATCH_FORMS.has(name)) { + emitReframeDispatchRef(kids, ctx, state); + // fall through — the dispatch call itself is still a call + } + + // UIx/helix element macro: `($ button {...})` with `$` :refer'd. + if (name === '$' && UIX_CORE_NAMESPACES.has(state.refers.get('$') ?? '')) { + emitUixElementRef(kids, ctx, state); + for (const kid of kids.slice(kids[1]?.type === 'sym_lit' ? 2 : 1)) walkForm(kid, ctx); + return; + } + + // Plain call. Prefer the :refer'd qualified form when known. + const referNs = state.refers.get(name); + emitRef(ctx, list, referNs ? `${referNs}::${name}` : name, 'calls'); + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + + // re-frame via an alias — `(rf/reg-event-db :k ...)`, `(rf/dispatch [:k x])` + // — including project facades (`utils.re-frame`); see RE_FRAME_REG_SHAPE. + if (RE_FRAME_REG_SHAPE.test(name) && kids[1]?.type === 'kwd_lit' && kids.length >= 3) { + // Keep the ordinary registrar call ref (callers/impact on the facade). + emitRef(ctx, list, qualifiedRefName(ns, name, state), 'calls'); + handleReframeRegistration(list, kids, ctx, state, name); + return; + } + if (RE_FRAME_DISPATCH_FORMS.has(name)) { + emitReframeDispatchRef(kids, ctx, state); + // fall through — the dispatch call itself is still a call + } + + // UIx/helix element macro via alias: `(uix/$ button {...})`. + if (name === '$' && UIX_CORE_NAMESPACES.has(state.aliases.get(ns) ?? ns)) { + emitUixElementRef(kids, ctx, state); + for (const kid of kids.slice(kids[1]?.type === 'sym_lit' ? 2 : 1)) walkForm(kid, ctx); + return; + } + + // Qualified def-macros: `(rum/defc page [args] ...)`, `(m/defstate db ...)` + // — same def-shape heuristic as the unqualified branch. Without this, every + // rum/uix/fulcro component in a ClojureScript app is invisible. + if (/^def[a-z-]*$/.test(name) && kids[1]?.type === 'sym_lit') { + const kind = UIX_COMPONENT_MACROS.has(name) ? 'component' : 'function'; + const defNode = ctx.createNode(kind, symParts(kids[1]!, ctx.source).name, list, { + signature: `(${ns}/${name} ...)`, + isExported: !hasPrivateMeta(kids[1]!, ctx.source), + }); + if (defNode) { + ctx.pushScope(defNode.id); + for (const kid of kids.slice(2)) walkForm(kid, ctx); + ctx.popScope(); + return; + } + } + + // Namespaced head: aliased / fully-qualified / interop call. + emitRef(ctx, list, qualifiedRefName(ns, name, state), 'calls'); + for (const kid of kids.slice(1)) walkForm(kid, ctx); +} + +/** + * `(ns my.app.core (:require ...) (:import ...))` — create the module node, + * scope the rest of the file under it, record alias/refer tables, and create + * import nodes + `imports` refs for required namespaces. + */ +function handleNs( + list: SyntaxNode, + kids: SyntaxNode[], + ctx: ExtractorContext, + state: NsState +): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + const nsName = symParts(nameSym, ctx.source).name; + state.nsName = nsName; + + const docNode = kids[2]?.type === 'str_lit' ? kids[2] : undefined; + const moduleNode = ctx.createNode('module', nsName, list, { + signature: `(ns ${nsName})`, + docstring: docNode ? stringContent(docNode, ctx.source) : undefined, + endLine: list.endPosition.row + 1, + }); + if (!moduleNode) return; + // Deliberately never popped — the whole file's top-level defs live in this + // namespace, giving them qualifiedName `my.app.core::sym` (same pattern as + // the JVM package_header namespace wrapper). A (rare) second `ns` form in + // the same file nests its module inside the first, so later defs carry both + // namespaces in their qualifiedName — accepted: still searchable, and + // multi-ns files are vanishingly rare outside generated code. + ctx.pushScope(moduleNode.id); + + for (const clause of kids.slice(2)) { + if (clause.type !== 'list_lit') continue; + const clauseKids = valueChildren(clause); + const kwd = clauseKids[0]; + if (!kwd || kwd.type !== 'kwd_lit') continue; + const kwdName = getNodeText(kwd, ctx.source).replace(/^:+/, ''); + + if (kwdName === 'require' || kwdName === 'use' || kwdName === 'require-macros') { + for (const entry of clauseKids.slice(1)) parseRequireEntry(entry, '', ctx, state); + } else if (kwdName === 'import') { + for (const entry of clauseKids.slice(1)) parseImportEntry(entry, ctx); + } + } +} + +/** One `:require` entry: `[my.app.db :as db :refer [save!]]`, a bare sym, a prefix list, or a reader conditional. */ +function parseRequireEntry( + entry: SyntaxNode, + prefix: string, + ctx: ExtractorContext, + state: NsState +): void { + if (entry.type === 'read_cond_lit' || entry.type === 'splicing_read_cond_lit') { + for (const child of valueChildren(entry)) { + if (child.type !== 'kwd_lit') parseRequireEntry(child, prefix, ctx, state); + } + return; + } + if (entry.type === 'sym_lit') { + const base = symParts(entry, ctx.source).name; + createRequire(prefix ? `${prefix}.${base}` : base, entry, ctx); + return; + } + if (entry.type !== 'vec_lit' && entry.type !== 'list_lit') return; + + const kids = valueChildren(entry); + const first = kids[0]; + if (!first) return; + let base: string | null = null; + if (first.type === 'sym_lit') base = symParts(first, ctx.source).name; + else if (first.type === 'str_lit') base = stringContent(first, ctx.source); // shadow-cljs npm require + if (!base) return; + const full = prefix ? `${prefix}.${base}` : base; + + // Prefix form: `(my.app [db :as db] core)` — sub-entries are vecs/syms/lists. + const subEntries = kids + .slice(1) + .filter((k) => k.type === 'vec_lit' || k.type === 'list_lit' || k.type === 'sym_lit'); + const hasOptions = kids.some((k) => k.type === 'kwd_lit'); + if (subEntries.length > 0 && !hasOptions) { + for (const sub of subEntries) parseRequireEntry(sub, full, ctx, state); + return; + } + + createRequire(full, entry, ctx); + + for (let i = 1; i < kids.length - 1; i++) { + const k = kids[i]!; + if (k.type !== 'kwd_lit') continue; + const opt = getNodeText(k, ctx.source).replace(/^:+/, ''); + const value = kids[i + 1]; + if (!value) continue; + if ((opt === 'as' || opt === 'as-alias') && value.type === 'sym_lit') { + state.aliases.set(symParts(value, ctx.source).name, full); + } else if (opt === 'refer' && value.type === 'vec_lit') { + for (const refSym of valueChildren(value)) { + if (refSym.type === 'sym_lit') { + state.refers.set(symParts(refSym, ctx.source).name, full); + } + } + } + } +} + +function createRequire(nsName: string, node: SyntaxNode, ctx: ExtractorContext): void { + ctx.createNode('import', nsName, node, { + signature: getNodeText(node, ctx.source).trim(), + }); + emitRef(ctx, node, nsName, 'imports'); +} + +/** One `:import` entry: `(java.time Instant Duration)` or `java.util.Date`. External — import nodes only, no refs. */ +function parseImportEntry(entry: SyntaxNode, ctx: ExtractorContext): void { + if (entry.type === 'sym_lit') { + ctx.createNode('import', symParts(entry, ctx.source).name, entry, { + signature: getNodeText(entry, ctx.source).trim(), + }); + return; + } + if (entry.type !== 'list_lit' && entry.type !== 'vec_lit') return; + const kids = valueChildren(entry); + const pkg = kids[0]; + if (!pkg || pkg.type !== 'sym_lit') return; + const pkgName = symParts(pkg, ctx.source).name; + for (const cls of kids.slice(1)) { + if (cls.type === 'sym_lit') { + ctx.createNode('import', `${pkgName}.${symParts(cls, ctx.source).name}`, cls, { + signature: getNodeText(entry, ctx.source).trim(), + }); + } + } +} + +/** `(defn name docstring? attr-map? [params] body)` or multi-arity `(defn name ([a] ...) ([a b] ...))`. */ +function handleDefn( + list: SyntaxNode, + kids: SyntaxNode[], + ctx: ExtractorContext, + privateForm: boolean +): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') { + for (const kid of kids.slice(1)) walkForm(kid, ctx); + return; + } + const name = symParts(nameSym, ctx.source).name; + const isPrivate = privateForm || hasPrivateMeta(nameSym, ctx.source); + + let docstring: string | undefined; + const arities: { params: SyntaxNode; body: SyntaxNode[] }[] = []; + + let i = 2; + if (kids[i]?.type === 'str_lit') { + docstring = stringContent(kids[i]!, ctx.source); + i++; + } + if (kids[i]?.type === 'map_lit') i++; // attr-map + + if (kids[i]?.type === 'vec_lit') { + arities.push({ params: kids[i]!, body: kids.slice(i + 1) }); + } else { + // Multi-arity: each remaining list is ([params] body...) + for (const arity of kids.slice(i)) { + if (arity.type !== 'list_lit') continue; + const arityKids = valueChildren(arity); + if (arityKids[0]?.type === 'vec_lit') { + arities.push({ params: arityKids[0]!, body: arityKids.slice(1) }); + } + } + } + + const fnNode = ctx.createNode('function', name, list, { + signature: arities.map((a) => getNodeText(a.params, ctx.source)).join(' ') || undefined, + docstring, + visibility: isPrivate ? 'private' : 'public', + isExported: !isPrivate, + }); + if (!fnNode) return; + const state = getNsState(list); + ctx.pushScope(fnNode.id); + for (const a of arities) walkBodyWithParams(a.params, a.body, ctx, state); + ctx.popScope(); +} + +/** `(def name value)` / `(defonce name value)` — var, constant, or function-valued def. */ +function handleDef(list: SyntaxNode, kids: SyntaxNode[], ctx: ExtractorContext): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + const name = symParts(nameSym, ctx.source).name; + const isPrivate = hasPrivateMeta(nameSym, ctx.source); + + let docstring: string | undefined; + let valueIdx = 2; + if (kids.length > 3 && kids[2]?.type === 'str_lit') { + docstring = stringContent(kids[2]!, ctx.source); + valueIdx = 3; + } + const value = kids[valueIdx]; + + // (def handler (fn [req] ...)) / (def handler #(...)) — a function in disguise. + if (value) { + const isFnList = + value.type === 'list_lit' && + (() => { + const h = valueChildren(value)[0]; + if (!h || h.type !== 'sym_lit') return false; + const hn = symParts(h, ctx.source).name; + return hn === 'fn' || hn === 'fn*'; + })(); + if (isFnList || value.type === 'anon_fn_lit') { + const fnNode = ctx.createNode('function', name, list, { + docstring, + visibility: isPrivate ? 'private' : 'public', + isExported: !isPrivate, + }); + if (fnNode) { + ctx.pushScope(fnNode.id); + walkForm(value, ctx); + ctx.popScope(); + } + return; + } + } + + const kind = value && LITERAL_TYPES.has(value.type) ? 'constant' : 'variable'; + const defNode = ctx.createNode(kind, name, list, { + docstring, + visibility: isPrivate ? 'private' : 'public', + isExported: !isPrivate, + }); + if (defNode && value) { + ctx.pushScope(defNode.id); + walkForm(value, ctx); + ctx.popScope(); + } +} + +/** `(defprotocol Storage (put [this k v]) (fetch [this k]))` / `definterface`. */ +function handleProtocol( + list: SyntaxNode, + kids: SyntaxNode[], + ctx: ExtractorContext, + kind: 'protocol' | 'interface' +): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + const name = symParts(nameSym, ctx.source).name; + const docNode = kids[2]?.type === 'str_lit' ? kids[2] : undefined; + + const protoNode = ctx.createNode(kind, name, list, { + docstring: docNode ? stringContent(docNode, ctx.source) : undefined, + isExported: true, + }); + if (!protoNode) return; + ctx.pushScope(protoNode.id); + for (const sig of kids.slice(2)) { + if (sig.type !== 'list_lit') continue; + const sigKids = valueChildren(sig); + const mSym = sigKids[0]; + if (!mSym || mSym.type !== 'sym_lit') continue; + const params = sigKids + .filter((k) => k.type === 'vec_lit') + .map((k) => getNodeText(k, ctx.source)) + .join(' '); + const mDoc = sigKids.find((k) => k.type === 'str_lit'); + ctx.createNode('method', symParts(mSym, ctx.source).name, sig, { + signature: params || undefined, + docstring: mDoc ? stringContent(mDoc, ctx.source) : undefined, + }); + } + ctx.popScope(); +} + +/** `(defrecord MemStore [state] Storage (put [_ k v] ...))` / `deftype`. */ +function handleRecord( + list: SyntaxNode, + kids: SyntaxNode[], + ctx: ExtractorContext, + isRecord: boolean +): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + const name = symParts(nameSym, ctx.source).name; + const fieldsVec = kids[2]?.type === 'vec_lit' ? kids[2] : undefined; + + const classNode = ctx.createNode('class', name, list, { + signature: fieldsVec ? getNodeText(fieldsVec, ctx.source) : undefined, + isExported: true, + }); + if (!classNode) return; + + // defrecord implicitly defines positional + map constructors; creating them + // as function nodes lets `(->MemStore ...)` call sites resolve by name. + if (isRecord) { + ctx.createNode('function', `->${name}`, list, { + signature: fieldsVec ? getNodeText(fieldsVec, ctx.source) : undefined, + isExported: true, + }); + ctx.createNode('function', `map->${name}`, list, { isExported: true }); + } + + ctx.pushScope(classNode.id); + if (fieldsVec) { + for (const f of valueChildren(fieldsVec)) { + if (f.type === 'sym_lit') ctx.createNode('field', symParts(f, ctx.source).name, f); + } + } + for (const member of kids.slice(fieldsVec ? 3 : 2)) { + if (member.type === 'sym_lit') { + // Protocol / interface being implemented + emitRef(ctx, member, symParts(member, ctx.source).name, 'implements'); + } else if (member.type === 'list_lit') { + handleMethodImpl(member, ctx); + } + } + ctx.popScope(); +} + +/** `(put [_ k v] (swap! state assoc k v))` inside defrecord/deftype — a method node + body walk. */ +function handleMethodImpl(impl: SyntaxNode, ctx: ExtractorContext): void { + const kids = valueChildren(impl); + const mSym = kids[0]; + if (!mSym || mSym.type !== 'sym_lit') return; + const paramsVec = kids[1]?.type === 'vec_lit' ? kids[1] : undefined; + const mNode = ctx.createNode('method', symParts(mSym, ctx.source).name, impl, { + signature: paramsVec ? getNodeText(paramsVec, ctx.source) : undefined, + }); + if (!mNode) return; + ctx.pushScope(mNode.id); + walkBodyWithParams(paramsVec, kids.slice(paramsVec ? 2 : 1), ctx, getNsState(impl)); + ctx.popScope(); +} + +/** `(defmulti render :type)` — the dispatch entry point. */ +function handleDefmulti(list: SyntaxNode, kids: SyntaxNode[], ctx: ExtractorContext): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + let docstring: string | undefined; + let dispatchIdx = 2; + if (kids[2]?.type === 'str_lit') { + docstring = stringContent(kids[2]!, ctx.source); + dispatchIdx = 3; + } + const dispatch = kids[dispatchIdx]; + const fnNode = ctx.createNode('function', symParts(nameSym, ctx.source).name, list, { + signature: dispatch ? getNodeText(dispatch, ctx.source) : undefined, + docstring, + isExported: true, + }); + if (fnNode && dispatch) { + ctx.pushScope(fnNode.id); + walkForm(dispatch, ctx); + ctx.popScope(); + } +} + +/** + * `(defmethod render :button [w] ...)` — an implementation of the multimethod + * (same name → overload). For a foreign multimethod — `(defmethod ig/init-key + * ::server ...)` — the node is named by the method name (`init-key`) with the + * dispatch value in the signature: slightly mislabeled (the local file doesn't + * own `init-key`), but deliberately kept — it's exactly what a search for the + * integrant/multimethod key needs to find. + */ +function handleDefmethod(list: SyntaxNode, kids: SyntaxNode[], ctx: ExtractorContext): void { + const nameSym = kids[1]; + if (!nameSym || nameSym.type !== 'sym_lit') return; + const dispatchVal = kids[2]; + const paramsIdx = kids.findIndex((k, idx) => idx >= 3 && k.type === 'vec_lit'); + const params = paramsIdx >= 0 ? getNodeText(kids[paramsIdx]!, ctx.source) : ''; + const fnNode = ctx.createNode('function', symParts(nameSym, ctx.source).name, list, { + signature: `${dispatchVal ? getNodeText(dispatchVal, ctx.source) : ''} ${params}`.trim() || undefined, + }); + if (!fnNode) return; + ctx.pushScope(fnNode.id); + walkBodyWithParams( + paramsIdx >= 0 ? kids[paramsIdx] : undefined, + kids.slice(paramsIdx >= 0 ? paramsIdx + 1 : 3), + ctx, + getNsState(list) + ); + ctx.popScope(); +} + +/** + * `reify` / `proxy` / `extend-protocol` / `extend-type` bodies: inline method + * impls `(method [args] body)` are anonymous — no nodes created, but their + * bodies are walked so calls attribute to the enclosing function. Other + * children walk normally. + */ +function handleInlineImpl(kids: SyntaxNode[], ctx: ExtractorContext): void { + for (const kid of kids.slice(1)) { + if (kid.type === 'list_lit') { + const implKids = valueChildren(kid); + if (implKids[0]?.type === 'sym_lit' && implKids[1]?.type === 'vec_lit') { + walkBodyWithParams(implKids[1], implKids.slice(2), ctx, getNsState(kid)); + continue; + } + } + walkForm(kid, ctx); + } +} + +// --------------------------------------------------------------------------- +// EDN data mode (.edn files: deps.edn, bb.edn, shadow-cljs.edn, system configs) +// --------------------------------------------------------------------------- + +/** + * Recursively collect qualified-symbol references (`app.core/init` in a + * shadow-cljs `:main`, integrant component maps, …) from an EDN value. + * Bare symbols are data, not code — only namespaced symbols are precise + * enough to reference. + */ +function scanEdnValueForRefs(node: SyntaxNode, ctx: ExtractorContext): void { + if (node.type === 'sym_lit') { + const { ns, name } = symParts(node, ctx.source); + if (ns) { + // `app.core/init` → app.core::init; single-segment ns (rare in EDN) is + // still emitted — there is no alias table in a data file to consult. + emitRef(ctx, node, `${ns}::${name}`, 'references'); + } + return; + } + for (const child of valueChildren(node)) scanEdnValueForRefs(child, ctx); +} + +/** + * EDN is data: never emit `calls`, never interpret list heads. Top-level map + * keys become `property` nodes (one level only, so multi-megabyte fixture + * files can't explode the graph), and every qualified symbol in their values + * becomes a `references` edge to the code it names. + */ +function handleEdnTopLevel(node: SyntaxNode, ctx: ExtractorContext): void { + if (node.type === 'map_lit' || node.type === 'ns_map_lit') { + const kids = valueChildren(node); + // A config map (deps.edn, shadow-cljs.edn, system.edn) has dozens of keys + // at most. Thousands of keys means a dataset (translation dicts, icon + // tables) — extracting those as property nodes explodes the graph with + // pure data (measured: logseq's locale dicts alone added 40k nodes). + // Skip the nodes entirely but still scan for code references. + if (kids.length / 2 > 64) { + scanEdnValueForRefs(node, ctx); + return; + } + for (let i = 0; i + 1 < kids.length; i += 2) { + const key = kids[i]!; + const value = kids[i + 1]!; + if (key.type !== 'kwd_lit') { + scanEdnValueForRefs(key, ctx); + scanEdnValueForRefs(value, ctx); + continue; + } + const keyText = getNodeText(key, ctx.source); + const valuePreview = getNodeText(value, ctx.source).replace(/\s+/g, ' '); + const prop = ctx.createNode('property', keyText, key, { + signature: valuePreview.length > 80 ? `${valuePreview.slice(0, 77)}...` : valuePreview, + }); + if (prop) { + ctx.pushScope(prop.id); + scanEdnValueForRefs(value, ctx); + ctx.popScope(); + } else { + scanEdnValueForRefs(value, ctx); + } + } + return; + } + // Top-level vector/list/etc. (fixture data) — refs only, no nodes. + scanEdnValueForRefs(node, ctx); +} + +/** `(letfn [(f [x] ...) (g [y] ...)] body)` — local fn bindings, then body. */ +function handleLetfn(kids: SyntaxNode[], ctx: ExtractorContext, state: NsState): void { + const frame = new Set(); + state.locals.push(frame); + const bindings = kids[1]; + if (bindings?.type === 'vec_lit') { + // The local fn NAMES are in scope in every binding body and the letfn + // body (mutual recursion), so collect them all before walking anything. + for (const binding of valueChildren(bindings)) { + if (binding.type !== 'list_lit') continue; + const bSym = valueChildren(binding)[0]; + if (bSym?.type === 'sym_lit') collectBindingNames(bSym, ctx.source, frame); + } + for (const binding of valueChildren(bindings)) { + if (binding.type !== 'list_lit') continue; + const bKids = valueChildren(binding); + const paramsVec = bKids[1]?.type === 'vec_lit' ? bKids[1] : undefined; + walkBodyWithParams(paramsVec, bKids.slice(paramsVec ? 2 : 1), ctx, state); + } + } + for (const form of kids.slice(2)) walkForm(form, ctx); + state.locals.pop(); +} + +export const clojureExtractor: LanguageExtractor = { + // The grammar has no semantic node types — everything routes through the + // visitNode hook below; the core's declarative dispatch never fires. + functionTypes: [], + classTypes: [], + methodTypes: [], + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], + callTypes: [], + variableTypes: [], + nameField: 'name', + bodyField: 'body', + paramsField: 'params', + + visitNode: (node, ctx) => { + const t = node.type; + + // .edn files are pure data — property nodes + references, never calls. + if (ctx.filePath.endsWith('.edn')) { + if (t === 'source') return false; // walk top-level forms + if (t === 'comment' || t === 'dis_expr') return true; + handleEdnTopLevel(node, ctx); + return true; + } + + if (t === 'list_lit') { + handleList(node, ctx); + return true; + } + // Discarded (`#_form`) and quoted data are not code. + if (t === 'dis_expr' || t === 'quoting_lit') return true; + // Everything else (source root, top-level vecs/maps, reader conditionals) + // returns false so the core walks children and this hook sees the lists. + return false; + }, +}; diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index 543598b8e..85d11d65f 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -26,6 +26,7 @@ import { scalaExtractor } from './scala'; import { luaExtractor } from './lua'; import { luauExtractor } from './luau'; import { objcExtractor } from './objc'; +import { clojureExtractor } from './clojure'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -49,4 +50,5 @@ export const EXTRACTORS: Partial> = { lua: luaExtractor, luau: luauExtractor, objc: objcExtractor, + clojure: clojureExtractor, }; diff --git a/src/extraction/wasm/README.md b/src/extraction/wasm/README.md new file mode 100644 index 000000000..16b0831bc --- /dev/null +++ b/src/extraction/wasm/README.md @@ -0,0 +1,38 @@ +# Vendored tree-sitter grammar wasm builds + +Grammars in this directory are vendored because `tree-sitter-wasms` either +doesn't ship them or ships a build with an ABI too old for our `web-tree-sitter` +(old-ABI wasms corrupt the shared WASM heap — see the Lua note in +`../grammars.ts`). Every vendored grammar must be listed in the vendored-path +branch of `loadGrammarsForLanguages` in `../grammars.ts`, and `copy-assets` +(run by `npm run build`) ships `*.wasm` from here into `dist/`. + +**Reproducibility:** each entry below records the exact source commit, +toolchain, and command used to produce the binary. When bumping a grammar, +verify it with `node scripts/add-lang/check-grammar.mjs ` +(ABI print + repeated-parse heap-corruption check) and update its entry. + +## tree-sitter-clojure.wasm + +- **Source:** https://github.com/sogaiu/tree-sitter-clojure + commit `e43eff80d17cf34852dcd92ca5e6986d23a7040f` (master, 2025-08-26) +- **ABI:** 14 +- **Toolchain:** tree-sitter CLI `0.26.9` (`npx --yes tree-sitter-cli`), which + downloads its own wasi-sdk; no Docker/emscripten required +- **Command:** + ```bash + git clone https://github.com/sogaiu/tree-sitter-clojure + cd tree-sitter-clojure && git checkout e43eff80d17cf34852dcd92ca5e6986d23a7040f + npx --yes tree-sitter-cli build --wasm # → tree-sitter-clojure.wasm + ``` +- **Why vendored:** no Clojure grammar in `tree-sitter-wasms`, and upstream + publishes no prebuilt wasm (the npm `tree-sitter-clojure` package is the + unmaintained oakmac grammar at ABI 9, which doesn't load in modern + web-tree-sitter). + +## tree-sitter-pascal.wasm · tree-sitter-scala.wasm · tree-sitter-lua.wasm · tree-sitter-luau.wasm + +Vendored before this README existed; provenance not recorded at the time. +Lua is the upstream ABI-15 build (the `tree-sitter-wasms` Lua is ABI 13 and +fails the heap-corruption check — see `../grammars.ts`). When any of these is +next bumped, record its full recipe here in the format above. diff --git a/src/extraction/wasm/tree-sitter-clojure.wasm b/src/extraction/wasm/tree-sitter-clojure.wasm new file mode 100755 index 000000000..de50480c4 Binary files /dev/null and b/src/extraction/wasm/tree-sitter-clojure.wasm differ diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index fc184132e..8bece9bd6 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -1287,18 +1287,25 @@ export class ToolHandler { // names (Class.method / Class::method) — the agent's most precise input, // resolved exactly by findAllSymbols. (The old strip mangled Class.method // into Class, throwing the method away.) - const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i; + const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte|clj|cljs|cljc|bb|edn)$/i; const tokens = [...new Set( query.split(/[\s,()[\]]+/) .map((t) => t.replace(FILE_EXT, '').trim()) - .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)) + // Symbol charset covers Lisp-family names too: kebab-case + // (`on-route-change+`), predicates (`valid?`), and Clojure + // alias-qualified `set-state/dashboard` / keyword `:profile/logout` + // forms. Without these, NO Clojure symbol passes the filter and the + // flow builder silently never runs on Clojure repos. Tokens that + // don't resolve to nodes are dropped downstream, so the wider + // charset admits no noise by itself. + .filter((t) => t.length >= 3 && /^:?[A-Za-z_$][\w$+!?*<>='-]*(?:(?:::|[./]):?[\w$+!?*<>='-]+)*$/.test(t)) )].slice(0, 16); if (tokens.length < 2) return EMPTY; // Pool of name SEGMENTS (Class + method from every token) used to // disambiguate an ambiguous SIMPLE name: keep a candidate only if its // CONTAINER class is itself named in the query. const segPool = new Set(); - for (const t of tokens) for (const s of t.toLowerCase().split(/::|\./)) if (s) segPool.add(s); + for (const t of tokens) for (const s of t.toLowerCase().split(/::|[./]/)) if (s) segPool.add(s); const named = new Map(); // Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in // the whole graph). These are safe to SPARE a file on: the agent named THIS @@ -1635,14 +1642,21 @@ export class ToolHandler { // agent explicitly named is in the subgraph and its file is scored. const namedSeedIds = new Set(); { - const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i; + const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte|clj|cljs|cljc|bb|edn)$/i; const CALLABLE = new Set(['method', 'function', 'component', 'constructor']); const isTestPath = (p: string) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p); const bodyLines = (n: Node) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine); const tokens = [...new Set( query.split(/[\s,()[\]]+/) .map((t) => t.replace(FILE_EXT, '').trim()) - .filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)) + // Symbol charset covers Lisp-family names too: kebab-case + // (`on-route-change+`), predicates (`valid?`), and Clojure + // alias-qualified `set-state/dashboard` / keyword `:profile/logout` + // forms. Without these, NO Clojure symbol passes the filter and the + // flow builder silently never runs on Clojure repos. Tokens that + // don't resolve to nodes are dropped downstream, so the wider + // charset admits no noise by itself. + .filter((t) => t.length >= 3 && /^:?[A-Za-z_$][\w$+!?*<>='-]*(?:(?:::|[./]):?[\w$+!?*<>='-]+)*$/.test(t)) )].slice(0, 16); // PascalCase tokens in the query are type/file disambiguators — when the // agent writes "DataRequest task validate", the `task`/`validate` it wants @@ -1655,6 +1669,8 @@ export class ToolHandler { const lc = ct.toLowerCase(); return n.filePath.toLowerCase().includes(lc) || n.qualifiedName.toLowerCase().includes(lc); }); + // PASS 1 — resolve every token's candidate defs (no picking yet). + const perToken: { cands: Node[]; mods: Node[] }[] = []; for (const t of tokens) { // Enumerate ALL defs of a bare token via the direct index, not FTS — a // 50+-overload name (tokio `poll`) ranks the wanted def (`Harness::poll`) @@ -1666,19 +1682,78 @@ export class ToolHandler { const cands = raw .filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath)) .sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a)); - // A specific name (<=3 defs) injects all its defs. An overloaded name - // (`validate` = 10, `request` = 44) would flood the subgraph, so inject - // only: the overloads whose file/class the query ALSO names (the agent - // told us which one it wants — DataRequest's, not Validation.swift's), - // capped; else fall back to the single most-substantive def. This is the - // explore-side mirror of codegraph_node's overload disambiguation. + // A token can also name a MODULE by its last segment — the Clojure norm + // ("the deactivate stage" = ns `app.page.lifecycle.deactivate`, whose + // fns are named per page type). Callable-only resolution makes those + // tokens contribute nothing, or worse, latch onto an unrelated same-name + // fn in another subsystem. A module match is a strong file pointer. + const last = t.toLowerCase(); + const mods = cg + .searchNodes(t, { limit: 20, kinds: ['module', 'namespace'] }) + .map((r) => r.node) + .filter( + (n) => + !isTestPath(n.filePath) && + lastQualifierPart(n.name).toLowerCase() === last + ) + .slice(0, 3); + perToken.push({ cands, mods }); + } + // Anchor directories: where the SPECIFIC tokens' defs live. The agent's + // bag of names describes ONE flow, so its tokens are spatially coherent — + // when `on-route-change+` (1 def) lives in app/page/, the `deactivate` + // the agent means is app/page/lifecycle's, not the SCIM backend's, even + // though the latter has the longer body. Without this, each ambiguous + // bare token resolved independently to its most-substantive def anywhere + // in the monorepo, dragging wrong-subsystem files into the render budget. + const anchorDirs: string[][] = []; + for (const { cands, mods } of perToken) { + if (cands.length >= 1 && cands.length <= 3) { + for (const n of cands) anchorDirs.push(n.filePath.toLowerCase().split('/').slice(0, -1)); + } + for (const n of mods) anchorDirs.push(n.filePath.toLowerCase().split('/').slice(0, -1)); + } + const sharedSegs = (a: string[], b: string[]) => { + let i = 0; + while (i < a.length && i < b.length && a[i] === b[i]) i++; + return i; + }; + const anchorProximity = (n: Node) => { + const dir = n.filePath.toLowerCase().split('/').slice(0, -1); + let best = 0; + for (const a of anchorDirs) best = Math.max(best, sharedSegs(dir, a)); + return best; + }; + + // PASS 2 — pick per token. A specific name (<=3 defs) injects all its + // defs. An overloaded name (`validate` = 10, `request` = 44) would flood + // the subgraph, so inject only: the overloads whose file/class the query + // ALSO names (the agent told us which one it wants — DataRequest's, not + // Validation.swift's); else the candidate co-located with the anchors + // (>=2 shared path segments so a bare repo-root match doesn't count); + // else the single most-substantive def. This is the explore-side mirror + // of codegraph_node's overload disambiguation. + for (const { cands, mods } of perToken) { let picks: Node[]; if (cands.length <= 3) { picks = cands; } else { const ctx = cands.filter(inNamedContext); - picks = ctx.length > 0 ? ctx.slice(0, 4) : cands.slice(0, 1); + if (ctx.length > 0) { + picks = ctx.slice(0, 4); + } else if (anchorDirs.length > 0) { + // All max-proximity co-located candidates (≥2 shared segments so a + // bare repo-root match doesn't count), capped — when the per-stage + // overloads of one name all live beside the anchors, they are ALL + // the answer (Clojure lifecycle stages, C++ per-backend overrides). + const ranked = [...cands].sort((a, b) => anchorProximity(b) - anchorProximity(a)); + const top = anchorProximity(ranked[0]!); + picks = top >= 2 ? ranked.filter((n) => anchorProximity(n) === top).slice(0, 3) : cands.slice(0, 1); + } else { + picks = cands.slice(0, 1); + } } + picks = picks.concat(mods); for (const n of picks) { if (!subgraph.nodes.has(n.id)) subgraph.nodes.set(n.id, n); // Mark as a named seed EVEN IF the FTS gather already had it — being @@ -2995,6 +3070,12 @@ export class ToolHandler { private matchesSymbol(node: Node, symbol: string): boolean { // Simple name match if (node.name === symbol) return true; + // Clojure keyword nodes (re-frame registrations) are named WITH the + // leading colon (`:app/set-page-state`), but agents habitually write the + // keyword without it. Match the colon-prefixed form too — gated on the + // `/` (namespaced-keyword shape) so a bare name like `dashboard` is never + // hijacked by a same-named unqualified keyword (`:dashboard`). + if (symbol.includes('/') && node.name === ':' + symbol) return true; // File basename match (e.g., "product-card" matches "product-card.liquid") if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true; @@ -3091,6 +3172,15 @@ export class ToolHandler { private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } { let results = cg.searchNodes(symbol, { limit: 50 }); + // A colon-less namespaced keyword (`app/set-page-state` for the re-frame + // event `:app/set-page-state`) resolves to the registration node + // directly. Gated on the `/` so a bare name like `dashboard` can never be + // hijacked by a same-named unqualified keyword. + if (!symbol.startsWith(':') && symbol.includes('/')) { + const kw = cg.getNodesByName(':' + symbol); + if (kw.length > 0) return { nodes: kw, note: '' }; + } + // Mirror the fallback in `findSymbol` for qualified queries — FTS // strips colons, so a module-qualified lookup needs a second pass // by the bare last part. diff --git a/src/types.ts b/src/types.ts index e710e31a1..abd37421f 100644 --- a/src/types.ts +++ b/src/types.ts @@ -88,6 +88,7 @@ export const LANGUAGES = [ 'lua', 'luau', 'objc', + 'clojure', 'yaml', 'twig', 'xml', diff --git a/vitest.config.ts b/vitest.config.ts index 4a5ad904b..28011bce1 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -19,6 +19,22 @@ export default defineConfig({ * there, so the variable is a no-op. */ env: { CODEGRAPH_ALLOW_UNSAFE_NODE: '1' }, + /** + * Keep tree-sitter grammar compilation off V8's turboshaft optimizing + * tier inside test workers, exactly as every production launch path does + * (see src/extraction/wasm-runtime-flags.ts, issues #293/#298). Without + * it, suites that load many grammars (extraction.test.ts loads ALL of + * them in beforeAll) can abort the worker with the turboshaft Zone OOM — + * observed reliably on an arm64 Mac with Node 24: the worker dies mid- + * file and the remaining tests silently never run ("Worker exited + * unexpectedly", ~90 tests vanish from the count). The flag must be on + * the node command line, so it has to go through execArgv — NODE_OPTIONS + * disallows it and runtime v8.setFlagsFromString is too late. + */ + poolOptions: { + forks: { execArgv: ['--liftoff-only'] }, + threads: { execArgv: ['--liftoff-only'] }, + }, coverage: { provider: 'v8', reporter: ['text', 'json', 'html'],