diff --git a/CHANGELOG.md b/CHANGELOG.md index 54ef5f5aa..400180402 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fixes +- Ruby call edges — callers and callees — are now correctly resolved for two common patterns: calling a method on a local variable assigned from `.new` (`obj = MyClass.new; obj.method`) and chaining a method directly on a constructor call (`MyClass.new(...).method`). Both patterns previously produced unresolvable references, so `codegraph_callers` returned no results for those methods and `codegraph_callees` omitted them from what a function calls. **This requires rebuilding the index**. - The background file watcher no longer exhausts your machine's file-descriptor budget. On macOS it previously kept **one open file handle per watched file**, so on a large project the running MCP server could pile up tens of thousands of handles and blow past the system-wide limit — at which point *unrelated* apps (your shell, editor, Docker, browser) started failing with "too many open files" until the codegraph process was killed. The watcher now uses a single recursive watch on macOS and Windows, and bounded per-directory watches on Linux, so its cost stays flat no matter how large the project is. (#644, #496, #555, #628, #579) - Indexing a project with very symbol-dense files (tens of thousands of functions or methods in a single file) no longer runs out of memory. The step that links dynamic call relationships used to load every function and method into memory at once, which could exhaust the heap and abort indexing with "JavaScript heap out of memory" on large or generated codebases; it now streams them, so memory stays flat no matter how many symbols the project has. (#610) - Indexing a very large repository no longer aborts during its first sync with a "too many SQL variables" error. (#540) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index d29fa11b3..cecc0afe7 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -2188,6 +2188,87 @@ end }); }); + describe('Ruby calls', () => { + it('should resolve a method call on a local var assigned from .new', () => { + const code = ` +module CachedCounting + def perform_increment!(key, count) + writer = CacheWriter.new + writer.write_cache!(key, count) + end +end +`; + const result = extractFromSource('concerns/cached_counting.rb', code); + + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'CacheWriter::write_cache!')).toBe(true); + }); + + it('should resolve a method call on a local var assigned from namespaced .new', () => { + const code = ` +module Notifications + class Dispatcher + def dispatch(user) + mailer = Notifications::Mailer.new(user) + mailer.send_welcome! + end + end +end +`; + const result = extractFromSource('lib/notifications/dispatcher.rb', code); + + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'Notifications::Mailer::send_welcome!')).toBe(true); + }); + + it('should extract .new as a calls reference to the class', () => { + const code = ` +module CachedCounting + def perform_increment!(key, count) + CacheWriter.new(key: key, count: count) + end +end +`; + const result = extractFromSource('concerns/cached_counting.rb', code); + + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'CacheWriter')).toBe(true); + }); + + it('should extract .new on a fully-qualified namespaced class', () => { + const code = ` +module Discourse + class AuthProvider + def authenticate(params) + Discourse::Auth::TokenValidator.new(params) + end + end +end +`; + const result = extractFromSource('lib/auth.rb', code); + + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'Discourse::Auth::TokenValidator')).toBe(true); + }); + + it('should extract a method call chained directly on a .new expression', () => { + const code = ` +module Discourse + class AuthProvider + def authenticate(params) + Discourse::Auth::TokenValidator.new(params).validate! + end + end +end +`; + const result = extractFromSource('lib/auth.rb', code); + + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'Discourse::Auth::TokenValidator')).toBe(true); + expect(calls.some((c) => c.referenceName === 'validate!')).toBe(true); + }); + }); + describe('C/C++ imports', () => { it('should extract system include', () => { const code = `#include `; diff --git a/src/extraction/languages/ruby.ts b/src/extraction/languages/ruby.ts index b54261656..231bd0840 100644 --- a/src/extraction/languages/ruby.ts +++ b/src/extraction/languages/ruby.ts @@ -38,6 +38,38 @@ export const rubyExtractor: LanguageExtractor = { ctx.popScope(); return true; // handled }, + buildLocalScope: (body, source) => { + // Pre-scan the method body for `var = SomeClass.new(...)` assignments so that + // subsequent `var.method` calls can be emitted as `SomeClass::method` — a name + // the resolver can match directly rather than falling back to `var.method`. + // + // Map value includes the `::` separator so the engine stays agnostic: + // creator = Payments::Processor.new → "creator" → "Payments::Processor::" + const scope = new Map(); + const scan = (node: SyntaxNode): void => { + if (node.type === 'assignment') { + const left = node.childForFieldName('left') ?? node.namedChild(0); + const right = node.childForFieldName('right') ?? node.namedChild(1); + // Only track: plain local variable on the left, `.new` call on the right + if (left?.type === 'identifier' && right?.type === 'call') { + const method = right.childForFieldName('method'); + if (method && getNodeText(method, source) === 'new') { + // Receiver must be a bare constant (`Foo`) or namespaced (`Foo::Bar`) + const receiver = right.namedChild(0); + if (receiver?.type === 'constant' || receiver?.type === 'scope_resolution') { + scope.set(getNodeText(left, source), `${getNodeText(receiver, source)}::`); + } + } + } + } + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child) scan(child); + } + }; + scan(body); + return scope; + }, extractBareCall: (node, _source) => { // Ruby bare method calls (no parens, no receiver) parse as plain identifiers. // e.g., `reset` in a method body is `identifier "reset"` not a `call` node. diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index 6c04fbaeb..02ec11317 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -213,6 +213,22 @@ export interface LanguageExtractor { */ extractBareCall?: (node: SyntaxNode, source: string) => string | undefined; + /** + * Pre-scan a function body and return a map of local variable name → qualified + * call prefix (including separator) for variables whose type is statically known + * from a constructor assignment. + * + * The engine stores this map for the duration of the body traversal. When it + * encounters a call whose receiver matches a key, it emits `prefix + methodName` + * instead of `receiver.method`, producing a directly resolvable reference. + * + * The value must include the language's member-access separator so the engine + * stays separator-agnostic. Examples: + * Ruby `creator = Payments::Processor.new(…)` → `"Payments::Processor::"` + * Python `obj = payments.Processor(…)` → `"payments.Processor."` + */ + buildLocalScope?: (body: SyntaxNode, source: string) => Map; + /** * Node types representing a file-level package/namespace declaration * (e.g. Kotlin `package_header`, Java `package_declaration`). When set, diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index c6eb93ac9..3fa0faa59 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -141,6 +141,7 @@ export class TreeSitterExtractor { private extractor: LanguageExtractor | null = null; private nodeStack: string[] = []; // Stack of parent node IDs private methodIndex: Map | null = null; // lookup key → node ID for Pascal defProc lookup + private localVarTypes = new Map(); // var name → call prefix (with separator), populated by extractor.buildLocalScope constructor(filePath: string, source: string, language?: Language) { this.filePath = filePath; @@ -1851,7 +1852,35 @@ export class TreeSitterExtractor { // Scoped call: Module::function() calleeName = getNodeText(func, this.source); } else { - calleeName = getNodeText(func, this.source); + // Grammars that use receiver + method fields (e.g. Ruby) rather than a + // single function field. namedChild(0) is the receiver. When a `method` + // field exists: + // - identifier receiver: emit resolved prefix (if known) + method, else "receiver.method" + // - constant/scope_resolution receiver: emit class name only + // (resolver promotes .new calls on classes to `instantiates`) + // - complex receiver (chained call etc.): emit just the method name + const methodNode = getChildByField(node, 'method'); + if (methodNode) { + const methodName = getNodeText(methodNode, this.source); + if (func.type === 'identifier') { + const receiverName = getNodeText(func, this.source); + const RUBY_SKIP = new Set(['self', 'super', 'nil']); + if (RUBY_SKIP.has(receiverName)) { + calleeName = methodName; + } else { + const resolvedPrefix = this.localVarTypes.get(receiverName); + calleeName = resolvedPrefix + ? `${resolvedPrefix}${methodName}` + : `${receiverName}.${methodName}`; + } + } else if (func.type === 'constant' || func.type === 'scope_resolution') { + calleeName = getNodeText(func, this.source); + } else { + calleeName = methodName; + } + } else { + calleeName = getNodeText(func, this.source); + } } } } @@ -2108,6 +2137,12 @@ export class TreeSitterExtractor { private visitFunctionBody(body: SyntaxNode, _functionId: string): void { if (!this.extractor) return; + // Ask the extractor to pre-scan the body for statically-typed locals (e.g. Ruby + // `var = SomeClass.new(...)`). Nested bodies each get their own scope; restoring + // on exit lets the outer body's remaining nodes still resolve. + const savedLocalVarTypes = this.localVarTypes; + this.localVarTypes = this.extractor?.buildLocalScope?.(body, this.source) ?? new Map(); + const visitForCallsAndStructure = (node: SyntaxNode): void => { const nodeType = node.type; @@ -2191,6 +2226,7 @@ export class TreeSitterExtractor { }; visitForCallsAndStructure(body); + this.localVarTypes = savedLocalVarTypes; } /**