diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7a874611..38eaa584 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -658,6 +658,12 @@ importers: specifier: workspace:* version: link:../../lib/provider + provider/pprof: + dependencies: + '@openctx/provider': + specifier: workspace:* + version: link:../../lib/provider + provider/prometheus: dependencies: '@openctx/provider': diff --git a/provider/pprof/README.md b/provider/pprof/README.md new file mode 100644 index 00000000..3b34b798 --- /dev/null +++ b/provider/pprof/README.md @@ -0,0 +1,69 @@ +# [pprof](https://github.com/google/pprof) context provider for OpenCtx + +[OpenCtx](https://openctx.org) provider that annotates Go functions with their associated CPU time and memory allocations based on the CPU/memory profiles. + +As profiling reports are usually not stored in a centralized remote location (like, e.g. docs or logs) and only exist on your machine, this provider only supports local VSCode client. It also does not provide annotations for test files. + +When enabled, pprof provider will: + +1. Search the workspace to find a profiling report and, optionally, a Go binary that produced it. +1. Get `pprof -top` nodes for the current package. +1. Create an annotation for each function/method in the current file denoting its resourse consumption. +1. Pass a detailed `pprof -list` breakdown to `annotation.item.ai` to be consumed by Cody. + +## Usage + +Add the following to your `settings.json`: + +```json +"openctx.providers": { + // ...other providers... + "https://openctx.org/npm/@openctx/provider-pprof": true +}, +``` + +Pprof provider has reasonable defaults, so no additional configuration in necessary if you follow the standard naming conventions for pprof reports and Go binaries, e.g. that a cpu profile report has `.pprof` extension. + +Most of the time, however, you'll want to adjust the config to suit your preferences. + +## Configuration + +The default configuration looks like this: + +```json +{ + "reportGlob": "**/*.pprof", + "binaryGlob": undefined, // By default, looks for a binary whose name matches the name of its parent directory + "rootDirectoryMarkers": ["go.mod", ".git"], + "top": { // Options to control `pprof -top` output + "excludeInline": true, // Add `-noinlines` + "nodeCount": undefined, // Add `-nodecount=x`, not set by default + "sort": "cum" // Set `-cum` or `-flat` + } +} +``` + +## Limitations + +`pprof` can collect stack traces for a number of [different profiles](https://pkg.go.dev/runtime/pprof#Profile): + +``` +goroutine - stack traces of all current goroutines +heap - a sampling of memory allocations of live objects +allocs - a sampling of all past memory allocations +threadcreate - stack traces that led to the creation of new OS threads +block - stack traces that led to blocking on synchronization primitives +mutex - stack traces of holders of contended mutexes +``` + +This provider only supports `heap` and CPU profile[^1]. + +## Development + +- [Source code](https://sourcegraph.com/github.com/sourcegraph/openctx/-/tree/provider/pprof) +- [Docs](https://openctx.org/docs/providers/pprof) +- License: Apache 2.0 + +____ + +[^1]: The CPU profile is not available as a `runtime/pprof.Profile` and has a special API. diff --git a/provider/pprof/_log.ts b/provider/pprof/_log.ts new file mode 100644 index 00000000..ce0eaf65 --- /dev/null +++ b/provider/pprof/_log.ts @@ -0,0 +1,35 @@ +/** + * _log is a local debugging util that writes logs to a log file at $HOME/log/openctx/provider-pprof.log. + * I could not find a way to log things to VSCode Output, so I came up with this workaround. + * + * This file is not imported anywhere, so no directories will be created on your machine + * with `pprof` provider enabled. + * + * It's only a temporary fixture -- there's probably a better solution to this. + */ + +import { appendFileSync, closeSync, mkdirSync, openSync, statSync } from 'node:fs' +import { join } from 'node:path' + +const logDir = `${process.env.HOME}/log/openctx` +const logFile = join(logDir, 'provider-pprof.log') + +try { + statSync(logDir) +} catch { + mkdirSync(logDir, { recursive: true }) +} +closeSync(openSync(logFile, 'w')) + +/** + * DEBUG writes logs to $HOME/log/openctx/provider-pprof.log + * To watch the logs run: + * + * ``` + * tail -f $HOME/log/openctx/provider-pprof.log + * ``` + */ +export default function DEBUG(message?: any, ...args: any[]): void { + const now = new Date(Date.now()).toUTCString() + appendFileSync(logFile, `[${now}] ${message}${args.join(' ')}` + '\n') +} diff --git a/provider/pprof/index.test.ts b/provider/pprof/index.test.ts new file mode 100644 index 00000000..063d5594 --- /dev/null +++ b/provider/pprof/index.test.ts @@ -0,0 +1,49 @@ +import { beforeEach } from 'node:test' +import type { MetaResult } from '@openctx/provider' +import { beforeAll, describe, expect, test, vi } from 'vitest' +import pprof from './index.js' +import { getPprof } from './pprof.js' + +vi.mock('./pprof.js', async () => { + const actualPprof = await vi.importActual('./pprof.js') + return { ...actualPprof, getPprof: vi.fn() } +}) +const getPprofMock = vi.mocked(getPprof) + +describe('pprof', () => { + let actualPprof: typeof import('./pprof.js') | undefined + beforeAll(async () => { + actualPprof = await vi.importActual('./pprof.js') + }) + + beforeEach(async () => { + vi.clearAllMocks() + + // All tests should use the actual implementation, we just want to spy on the calls. + // We also know that the original module is available, because it's imported in the beforeAll hook. + getPprofMock.mockImplementationOnce(actualPprof!.getPprof) + }) + + test('meta', () => + expect(pprof.meta({}, {})).toStrictEqual({ + name: 'pprof', + annotations: { + selectors: [{ path: '**/*.go' }], + }, + })) + + test('annotations for a test file', () => { + const content = 'package pkg_test\nfunc DoStuff() {}\n' + + expect( + pprof.annotations!( + { + uri: '/pkg/thing_test.go', + content: content, + }, + {}, + ), + ).toHaveLength(0) + expect(getPprofMock).not.toBeCalled() + }) +}) diff --git a/provider/pprof/index.ts b/provider/pprof/index.ts new file mode 100644 index 00000000..554b8946 --- /dev/null +++ b/provider/pprof/index.ts @@ -0,0 +1,135 @@ +import { dirname } from 'node:path' +import type { + Annotation, + AnnotationsParams, + AnnotationsResult, + Item, + MetaParams, + MetaResult, + Provider, +} from '@openctx/provider' +import { parseGolang } from './parser.js' +import { type Node, type TopOptions, findReportPath as findPprofSources, getPprof } from './pprof.js' + +interface Settings { + /** + * Glob pattern to match the profile report. + * + * Note, that forward slashes _do not need_ to be escaped in the patterns provided in `settings.json` + * + * @default "**\/*.pprof" + * @example "**\/cmd\/*.pb.gz" (limit to asubdirectory) + */ + reportGlob?: string + + /** + * Glob pattern to match the Go binary from which the report was generated. + * + * By default `binaryGlob` not set. The provider will try to locate it by searching + * for an executable file whose name matches that of its parent directory. + * This is what a binary produces by `go build .` would be conventionally named. + */ + binaryGlob?: string + + /** + * The provider will not traverse the file tree past the directory containing `rootDirectoryMarkers`, + * when searching for the profile report and the binary. + * + * @default [".git", "go.mod"] + */ + rootDirectoryMarkers?: string[] + + /** + * Options to control `pprof -top` output. + * + * @default top: { excludeInline: true, sort: 'cum' } + * @example top: { excludeInline: false, sort: 'flat', nodeCount: 10 } + */ + top?: Pick +} + +/** + * An [OpenCtx](https://openctx.org) provider that annotates every function declaration with + * the CPU time and memory allocations associated with it. + * + * Only Go files are supported. + */ +const pprof: Provider = { + meta(params: MetaParams, settings: Settings): MetaResult { + return { + name: 'pprof', + annotations: { + selectors: [{ path: '**/*.go' }], + }, + } + }, + + annotations(params: AnnotationsParams, settings: Settings): AnnotationsResult { + // Test files do not need pprof annotations. + if (params.uri.endsWith('_test.go')) { + return [] + } + + const pprof = getPprof() + if (pprof === null) { + // TODO: log that no command line tool was found. Ideally, do it once on init. + return [] + } + + const searchDir = dirname(params.uri).replace(/^file:\/{2}/, '') + const sources = findPprofSources(searchDir, { + reportGlob: settings.reportGlob || '**/*.pprof', + rootDirectoryMarkers: settings.rootDirectoryMarkers || ['.git', 'go.mod'], + binaryGlob: settings.binaryGlob, + // TODO: pass workspaceRoot once it's made available + // workspaceRoot: workspaceRoot, + }) + if (!sources.report) { + return [] + } + pprof.setSources(sources) + + const content = parseGolang(params.content) + if (!content) { + return [] + } + + const top = pprof.top({ ...settings.top, package: content.package }) + if (top === null) { + return [] + } + + const anns: Annotation[] = [] + top.nodes.forEach((node: Node, i: number) => { + const func = content.funcs[node.function] + if (!func) { + return + } + + let item: Item = { + title: `pprof ${top.type}: cum ${node.cum}${top.unit}, ${node.cumPerc}% (#${ + i + 1 + }, sort=${settings.top?.sort || 'cum'})`, + } + + const list = pprof.list(node.function) + if (list) { + item = { + ...item, + ai: { + content: "Output of 'pprof -list' command for this function:\n" + list.raw, + }, + } + } + + anns.push({ + uri: params.uri, + range: func.range, + item: item, + }) + }) + return anns + }, +} + +export default pprof diff --git a/provider/pprof/package.json b/provider/pprof/package.json new file mode 100644 index 00000000..63854195 --- /dev/null +++ b/provider/pprof/package.json @@ -0,0 +1,26 @@ +{ + "name": "@openctx/provider-pprof", + "version": "0.0.13", + "description": "pprof (OpenCtx provider)", + "license": "Apache-2.0", + "homepage": "https://openctx.org/docs/providers/pprof", + "repository": { + "type": "git", + "url": "https://github.com/sourcegraph/openctx", + "directory": "provider/pprof" + }, + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "files": ["dist/index.js", "dist/index.d.ts"], + "sideEffects": false, + "scripts": { + "build": "tsc --build", + "bundle": "tsc --build && esbuild --log-level=error --bundle --format=esm --platform=node --outfile=dist/bundle.js index.ts", + "prepublishOnly": "tsc --build --clean && pnpm run --silent build", + "test": "vitest" + }, + "dependencies": { + "@openctx/provider": "workspace:*" + } +} diff --git a/provider/pprof/parser.test.ts b/provider/pprof/parser.test.ts new file mode 100644 index 00000000..c234a33e --- /dev/null +++ b/provider/pprof/parser.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, test } from 'vitest' +import { type Contents, parseGolang } from './parser.js' + +describe('file parsing', () => { + test('golang', () => { + const content = `package example + +import "fmt" + +func A(a string) {} +func b_func() {} +func A2() { + fmt.Print("hello pprof") +} + +type Thing struct {} + +func (t *Thing) doStuff(i int) {} +func (t Thing) String() string { return "thing" } + ` + + expect(parseGolang(content)).toStrictEqual({ + package: 'example', + funcs: { + 'example.A': { + name: 'A', + range: { start: { line: 4, character: 5 }, end: { line: 4, character: 6 } }, + }, + 'example.b_func': { + name: 'b_func', + range: { start: { line: 5, character: 5 }, end: { line: 5, character: 11 } }, + }, + 'example.A2': { + name: 'A2', + range: { start: { line: 6, character: 5 }, end: { line: 6, character: 7 } }, + }, + 'example.(*Thing).doStuff': { + name: 'doStuff', + range: { start: { line: 12, character: 16 }, end: { line: 12, character: 23 } }, + receiver: '*Thing', + }, + 'example.(Thing).String': { + name: 'String', + range: { start: { line: 13, character: 15 }, end: { line: 13, character: 21 } }, + receiver: 'Thing', + }, + }, + }) + }) +}) diff --git a/provider/pprof/parser.ts b/provider/pprof/parser.ts new file mode 100644 index 00000000..33475e1c --- /dev/null +++ b/provider/pprof/parser.ts @@ -0,0 +1,100 @@ +import type { Range } from '@openctx/provider' + +const packageRegex = /^package (\w+)/m +const funcRegex = /^func (\w+)(?:\()/m +const methodRegex = /^func \(\w+ (\*)?(\w+)\) (\w+)(?:\()/m + +export interface Contents { + package: string + + /** The key for each function is its fully-qualified name, e.g. example.MyFunc or example.(*Thing).MyMethod, + * as they are unique within the file. + */ + funcs: Record +} + +/** Func is a Go function or method with additional metadata to help locate it in the file and filter for in in `pprof`. */ +export interface Func { + name: string + range: Range + receiver?: string +} + +export function parseGolang(source: string): Contents | null { + const pkgMatch = packageRegex.exec(source) + if (!pkgMatch || !pkgMatch.length) { + return null + } + const pkg = pkgMatch[1] + const result: Contents = { + package: pkg, + funcs: {}, + } + + const lines = source.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + + const readFuncName = (start: number): { func: string; end: number } => { + let end = start + for (const ch of line.substring(start).split('')) { + if (ch === '(') { + break + } + end++ + } + return { + func: line.substring(start, end), + end: end, + } + } + + switch (true) { + case funcRegex.test(line): { + const start = 5 // "func ".length + const { func, end } = readFuncName(start) + + result.funcs[`${pkg}.${func}`] = { + name: func, + range: { + start: { line: i, character: start }, + end: { line: i, character: end }, + }, + } + break + } + case methodRegex.test(line): { + const lparen = 5 // "func ".length + let rparen = lparen + + for (const ch of line.substring(lparen).split('')) { + if (ch === ')') { + break + } + rparen++ + } + + let receiver = line.substring(lparen, rparen) + receiver = receiver.split(' ')[1] + if (!receiver) { + continue + } + + const start = rparen + 2 + const { func, end } = readFuncName(start) + + result.funcs[`${pkg}.(${receiver}).${func}`] = { + name: func, + range: { + start: { line: i, character: start }, + end: { line: i, character: end }, + }, + receiver: receiver, + } + break + } + } + } + + return result +} diff --git a/provider/pprof/pprof.test.ts b/provider/pprof/pprof.test.ts new file mode 100644 index 00000000..862f431d --- /dev/null +++ b/provider/pprof/pprof.test.ts @@ -0,0 +1,316 @@ +import { execSync } from 'node:child_process' +import { type Dirent, type PathLike, accessSync, readdirSync } from 'node:fs' +import { beforeEach, describe, expect, test, vi } from 'vitest' +import { + type ListOutput, + Pprof, + type PprofTool, + type TopOptions, + type TopOutput, + findReportPath, + getPprof, +} from './pprof.js' + +vi.mock('child_process', () => ({ execSync: vi.fn() })) +const execSyncMock = vi.mocked(execSync) + +vi.mock('fs', async () => { + const fs = await vi.importActual('fs') + return { ...fs, readdirSync: vi.fn(), accessSync: vi.fn() } +}) +const readdirSyncMock = vi.mocked(readdirSync) +const accessSyncMock = vi.mocked(accessSync) + +describe('pprof', () => { + beforeEach(() => { + vi.clearAllMocks() + vi.resetAllMocks() + }) + + test('get pprof (go installed)', () => { + execSyncMock.mockImplementation(whichCommand('go', '/usr/local/go/bin/og', 'pprof')) + + const pprof = getPprof() + + expect(execSync).toHaveBeenCalled() + expect(pprof).not.toBeNull() + }) + + test('get pprof (standalone pprof installed)', () => { + execSyncMock.mockImplementation(whichCommand('pprof', '/usr/local/go/bin/pprof', 'go')) + + const pprof = getPprof() + + expect(execSync).toHaveBeenCalled() + expect(pprof).not.toBeNull() + }) + + test('get pprof (not installed)', () => { + execSyncMock.mockReturnValueOnce(buffer('go not found')) + + const pprof = getPprof() + + expect(execSync).toHaveBeenCalled() + expect(pprof).toBeNull() + }) + + test('find report (exists)', () => { + readdirSyncMock.mockReturnValue(['report.pprof'] as unknown as Dirent[]) + + const sources = findReportPath('/path/to/current', { + reportGlob: '**/to/*.pprof', + workspaceRoot: '/path', + }) + + expect(sources.report).toBe('/path/to/report.pprof') + }) + + test('find report with rootDirectoryMarkers (does not exist)', () => { + readdirSyncMock.mockImplementation(((s: string): string[] => { + switch (s) { + case '/': + throw new Error('should not check after root directory') + case '/path': + // Contains root directory markers + return ['README.md', '.git'] + default: + return [] + } + }) as unknown as typeof readdirSync) + + const sources = findReportPath('/path/to/current', { + reportGlob: '**/*.pprof', + rootDirectoryMarkers: ['.git'], + }) + + expect(sources.report).toBeUndefined() + }) + + test('find report with workspaceRoot (does not exist)', () => { + readdirSyncMock.mockReturnValue(['report.pprof'] as unknown as Dirent[]) + + const sources = findReportPath('/path/to/current', { + reportGlob: '/other/path/**/*.pprof', + workspaceRoot: '/path', + }) + + expect(sources.report).toBeUndefined() + }) + + test('find binary (exists in the same directory)', () => { + readdirSyncMock.mockImplementation(((s: string): string[] => { + switch (s) { + case '/root/mybinary': + return ['report.pprof', 'mybinary'] + default: + // File 'mybinary' exists in every directory, but only /root/mybinary/mybinary is executable + return ['README.md', 'mybinary'] + } + }) as unknown as typeof readdirSync) + accessSyncMock.mockImplementation((file: PathLike, mode?: number): void => { + if (!(file as string).endsWith('/mybinary')) { + throw new Error('not a binary') + } + }) + + const sources = findReportPath('/root/mybinary/is/here', { + reportGlob: '**/*.pprof', + workspaceRoot: '/root', + }) + + expect(sources.binary).toBe('/root/mybinary/mybinary') + expect(accessSync).toHaveBeenCalled() + }) + + test('find binary (matches binaryGlob)', () => { + readdirSyncMock.mockImplementation(((s: string): string[] => { + return s === '/root/cmd/here/comes' ? ['mybinary.exe', 'mybinary.yaml', 'nothing'] : [] + }) as unknown as typeof readdirSync) + + const sources = findReportPath('/root/cmd/here/comes/nothing', { + reportGlob: '**/*.pprof', + binaryGlob: '**/cmd/**/*.exe', + workspaceRoot: '/root', + }) + + expect(sources.binary).toBe('/root/cmd/here/comes/mybinary.exe') + }) + + type TopCmdTest = { name: string; tool: PprofTool; opts: TopOptions; binary?: string; want: string } + + test.each([ + { + name: 'defaults', + tool: 'go tool pprof', + opts: { package: 'main' }, + want: `go tool pprof -top -show="main\\." -cum -noinlines report.pprof`, + }, + { + name: 'include binary', + tool: 'go tool pprof', + opts: { package: 'main' }, + binary: './my-binary', + want: `go tool pprof -top -show="main\\." -cum -noinlines ./my-binary report.pprof`, + }, + { + name: 'sort flat', + tool: 'go tool pprof', + opts: { package: 'main', sort: 'flat' }, + binary: './my-binary', + want: `go tool pprof -top -show="main\\." -flat -noinlines ./my-binary report.pprof`, + }, + { + name: 'include inline', + tool: 'go tool pprof', + opts: { package: 'main', excludeInline: false }, + want: `go tool pprof -top -show="main\\." -cum report.pprof`, + }, + { + name: 'limit node count', + tool: 'go tool pprof', + opts: { package: 'main', nodeCount: 2 }, + want: `go tool pprof -top -show="main\\." -cum -noinlines -nodecount=2 report.pprof`, + }, + ])('top command ($name)', (tt: TopCmdTest) => { + execSyncMock.mockReturnValueOnce(buffer('')) + const pprof = new Pprof(tt.tool) + pprof.setSources({ report: 'report.pprof', binary: tt.binary }) + + pprof.top(tt.opts) + + expect(execSyncMock).toHaveBeenCalledOnce() + expect(execSync).toHaveBeenCalledWith(tt.want) + }) + + type TopParseTest = { + name: string + stdout: string + want: TopOutput + } + test.each([ + { + name: 'cpu', + stdout: `File: pprof-example +Type: cpu +Time: Jun 1, 2024 at 10:56pm (CEST) +Duration: 8.39s, Total samples = 13.02s (155.11%) +Active filters: + show=^main\. +Showing nodes accounting for 6.25s, 48.00% of 13.02s total +Dropped 2 nodes (cum <= 0.07s) +Showing top 3 nodes out of 7 + flat flat% sum% cum cum% + 0.03s 0.23% 0.23% 6.38s 49.00% main.main + 5.59s 42.93% 43.16% 6.21s 47.70% main.Run + 0.63s 4.84% 48.00% 0.63s 4.84% pkg/list.(*L).Init`, + want: { + file: 'pprof-example', + type: 'cpu', + unit: 's', + nodes: [ + { function: 'main.main', flat: 0.03, flatPerc: 0.23, cum: 6.38, cumPerc: 49 }, + { function: 'main.Run', flat: 5.59, flatPerc: 42.93, cum: 6.21, cumPerc: 47.7 }, + { + function: 'pkg/list.(*L).Init', + flat: 0.63, + flatPerc: 4.84, + cum: 0.63, + cumPerc: 4.84, + }, + ], + }, + }, + { + name: 'memory', + stdout: `File: pprof-example +Type: inuse_space +Time: May 31, 2024 at 7:35pm (CEST) +Active filters: + show=main\. +Showing nodes accounting for 116.43MB, 100% of 116.43MB total + flat flat% sum% cum cum% + 0.43MB 0.23% 0.23% 6.38MB 49.00% main.main + 77.4MB 42.93% 43.16% 6.21MB 47.70% main.Run + 10.0MB 4.84% 48.00% 10.0MB 4.84% pkg/list.(*L).Init`, + want: { + file: 'pprof-example', + type: 'inuse_space', + unit: 'MB', + nodes: [ + { function: 'main.main', flat: 0.43, flatPerc: 0.23, cum: 6.38, cumPerc: 49 }, + { function: 'main.Run', flat: 77.4, flatPerc: 42.93, cum: 6.21, cumPerc: 47.7 }, + { + function: 'pkg/list.(*L).Init', + flat: 10.0, + flatPerc: 4.84, + cum: 10.0, + cumPerc: 4.84, + }, + ], + }, + }, + ])('parsing top output ($name)', (tt: TopParseTest) => { + const tool: PprofTool = 'go tool pprof' + const topOptions: TopOptions = { package: 'main' } + execSyncMock.mockReturnValueOnce(buffer(tt.stdout)) + + const pprof = new Pprof(tool) + pprof.setSources({ report: '/path/to/report.pprof' }) + + const top = pprof.top(topOptions) + + expect(top).toStrictEqual(tt.want) + }) + + test('list', () => { + const stdout = `Total: 116.43MB +ROUTINE ======================== main.buildDiamond in /Users/johndoe/go/src/local/pprof-example/main.go + 0 49.57MB (flat, cum) 42.57% of Total + . . 108:func buildDiamond(cfgraph *CFG, start int) int { + . . 109: bb0 := start + . 11.50MB 110: NewBasicBlockEdge(cfgraph, bb0, bb0+1) + . 21.06MB 111: NewBasicBlockEdge(cfgraph, bb0, bb0+2) + . 13.50MB 112: NewBasicBlockEdge(cfgraph, bb0+1, bb0+3) + . 3.50MB 113: NewBasicBlockEdge(cfgraph, bb0+2, bb0+3) + . . 114: + . . 115: return bb0 + 3 + . . 116:} + . . 117: + . . 118:func buildConnect(cfgraph *CFG, start int, end int) {` + execSyncMock.mockReturnValueOnce(buffer(stdout)) + const pprof = new Pprof('pprof') + pprof.setSources({ report: 'report.mprof', binary: './mybinary' }) + + const list = pprof.list('example.(*Thing).Do') + + expect(execSyncMock).toHaveBeenCalledOnce() + expect(execSync).toHaveBeenCalledWith( + `pprof -list "example\\.\\(\\*Thing\\)\\.Do" ./mybinary report.mprof`, + ) + expect(list).toStrictEqual({ raw: stdout }) + }) +}) + +function buffer(s: string): Buffer { + return Buffer.from(s, 'utf-8') +} + +/** + * whichCommand helper returns a mock implementation for `execSync` that expects some kind of lookup command, + * e.g. `which`, and returns "not found" for binaries that should not be found in the mock invocation. + * @param found name of the executable that is "found" in this mock + * @param foundPath executable path that should be returned + * @param notFound name of the executable that is "not found" in this mock + * @returns stdout buffer + */ +function whichCommand(found: string, foundPath: string, notFound: string): (cmd: string) => Buffer { + return (cmd: string): Buffer => { + switch (true) { + case cmd.includes(found): + return buffer(foundPath) + case cmd.includes(notFound): + return buffer(`${notFound} not found`) + } + return buffer('command not found: ' + cmd) + } +} diff --git a/provider/pprof/pprof.ts b/provider/pprof/pprof.ts new file mode 100644 index 00000000..5b407e00 --- /dev/null +++ b/provider/pprof/pprof.ts @@ -0,0 +1,351 @@ +import { execSync } from 'node:child_process' +import { constants, accessSync, readdirSync } from 'node:fs' +import { basename, dirname, join, parse } from 'node:path' +import { matchGlob } from '@openctx/provider' + +/** + * topNodeRegex is the output in `pprof -top` command for a single node + * + * An example output would look like this (first row for reference only): + * + * ``` + * flat flat% sum% cum cum% + * 5.59s 42.93% 43.16% 6.21s 47.70% container/list.(*List).Init + * ``` + * + * @see https://regex101.com/r/hP9plv/1 + */ +const topNodeRegex = + /^ +(?[\d\\.]+)(?\w+) +(?[\d\\.]+)% +(?:[\d\\.]+)% +(?[\d\\.]+)(?:\w+) +(?[\d\\.]+)% +(?[\w\\.\\(\\*\\)\\/]+)/ + +/** + * PprofTool is a CLI tool for reading and visualizing profile reports. + * `pprof` is bundled with every `go` binary, but may also be installed as a standalone tool. + */ +export type PprofTool = 'go tool pprof' | 'pprof' + +export function getPprof(): Pprof | null { + try { + const stdout = execSync('which go').toString('utf-8').trim() + if (!stdout.endsWith('not found')) { + return new Pprof('go tool pprof') + } + } catch (e) {} + + try { + const stdout = execSync('which pprof').toString('utf-8').trim() + if (!stdout.endsWith('not found')) { + return new Pprof('pprof') + } + } catch (e) {} + + return null +} + +interface SearchOptions { + reportGlob: string + workspaceRoot?: string + rootDirectoryMarkers?: string[] + binaryGlob?: string +} + +/** PprofSources are passed to pprof command. */ +interface PprofSources { + report?: string + binary?: string +} + +export function findReportPath(currentDir: string, options: SearchOptions): PprofSources { + const matchReport = matchGlob(options.reportGlob) + const matchBinary = options.binaryGlob ? matchGlob(options.binaryGlob) : matchGoBinary + + const reachedRoot = (dir: string): boolean => { + if (options.workspaceRoot !== undefined) { + return options.workspaceRoot === dir + } + if (!options.rootDirectoryMarkers?.length) { + return false + } + + const markers = options.rootDirectoryMarkers + try { + return readdirSync(dir).some(f => markers.includes(f)) + } catch { + return false + } + } + + const sources: PprofSources = {} + let searchDir = currentDir + + while (true) { + let contents: string[] = [] + try { + contents = readdirSync(searchDir) + } catch (e) { + break + } + + for (const file of contents) { + const fullPath = join(searchDir, file) + if (!sources.report && matchReport(fullPath)) { + sources.report = fullPath + } + + // The search favours the binary that's closest to the report file, + // as `sources.binary` will be overwritten with the more recent matches. + if (matchBinary(fullPath)) { + sources.binary = fullPath + } + } + + // Note, that by breaking the loop after finding the report we assume that the binary + // is located in in the same directories or in one of the directories we've searched before. + // Which is a rather fair assumption. + if (sources.report || reachedRoot(searchDir) || searchDir === '/') { + break + } + searchDir = dirname(searchDir) + } + + return sources +} + +/** + * matchGoBinary is a fallback matcher for finding Go binary to pass as a source to `pprof`. + * + * It uses a heuristic of relying on Go's convention to name binaries as their parent directories. + * For example, running `go build .` in `/project/cmd/thing` directory will produce `/project/cmd/thing/thing` binary. + * + * Go build automatically assigns "execute" permission to the binary, so `matchGoBinary` will only match executable files + * in order to differentiate from normal files that are called the same as their parent directory. + * @param file Full path to the file + * @returns + */ +function matchGoBinary(file: string): boolean { + const { base: name, ext, dir: dirFull } = parse(file) + const dir = basename(dirFull) + + if (name !== dir || ext !== '') { + return false + } + return isExecutable(file) +} + +/** isExecutable checks that a file has the "execute" permission. */ +function isExecutable(file: string): boolean { + try { + accessSync(file, constants.X_OK) + return true + } catch (err) { + return false + } +} + +export interface TopOptions { + /** package limits the entries to the current package (`-show="^package\."`)*/ + package: string + + /** nodeCount limits the number of results returned (`-nodecount=x`). Skipped if not set. */ + nodeCount?: number + + /** excludeInline excludes inline function calls from the output (`-noinlines`) */ + excludeInline?: boolean + + /** sort controls how nodes are sorted in the output (`-flat` or `-cum`) */ + sort?: 'flat' | 'cum' +} + +export interface TopOutput { + /** Name of the binary used to generate the report. If no binary was passed to `pprof` command, this field is `undefined`. */ + file?: string + type: string + unit: string + nodes: Node[] +} + +export interface Node { + function: string + + /** Absolute CPU time/ memory associated with this function only. */ + flat: number + + /** Relative CPU time / memory associated with this function only. */ + flatPerc: number + + /** CPU time / memory associated with the function and its descendants. */ + cum: number + + /** Relative CPU time / memory associated with the function and its descendants. */ + cumPerc: number +} + +export interface ListOutput { + /** Raw output of `pprof -list`. */ + raw: string +} + +/** + * Pprof is a wrapper for working with `pprof` CLI. + */ +export class Pprof { + private tool: PprofTool + private sources: PprofSources + + constructor(tool: PprofTool) { + this.tool = tool + this.sources = {} + } + + setSources(sources: PprofSources) { + this.sources = sources + } + + top(options: TopOptions): TopOutput | null { + if (!this.sources.report) { + return null + } + + const cmd = this.topCmd(options) + + let out: string | null = null + try { + out = execSync(cmd).toString('utf-8').trim() + } catch (e) { + return null + } + + if ( + out === null || + out.includes('no such file or directory') || + out.includes('Show expression matched no samples') + ) { + return null + } + + return this.parseTop(out) + } + + private topCmd(options: TopOptions): string { + const { report, binary } = this.sources + const opt: TopOptions = { + ...options, + sort: options.sort ?? 'cum', + excludeInline: options.excludeInline ?? true, + } + + let cmd = this.tool + ` -top -show="${options.package}\\." -${opt.sort}` + + if (opt.excludeInline) { + cmd += ' -noinlines' + } + + if (options.nodeCount) { + cmd += ` -nodecount=${options.nodeCount}` + } + + // Standalone `pprof` is not able to parse a Go binary, so it ignores it altogether. + // Should we omit it from the command in case this.tool === 'pprof' ? + if (binary) { + cmd += ` ${binary}` + } + cmd += ` ${report}` + return cmd + } + + private parseTop(output: string): TopOutput | null { + const binaryName = /^File: ([\w-\\/]+)/m.exec(output) + const reportType = /^Type: (\w+)/m.exec(output) + + let unit: string | null = null + const nodes: Node[] = [] + + // Find the table with per-function stats and discard the headers + const startPos = output.search(/ +flat +flat% +sum% +cum +cum%/) + if (startPos === -1) { + return null + } + const lines = output.substring(startPos).split('\n').slice(1) + for (const line of lines) { + const match = topNodeRegex.exec(line) + if (match === null || !match.groups) { + continue + } + const { groups: node } = match + + if (unit === null) { + unit = node.unit + } + + // Should we include the raw output here the way we do in list()? + // It may include entries for other functions in the same package + // which are declared in different files, which might be a useful + // information to Cody. + nodes.push({ + function: node.func, + flat: Number.parseFloat(node.flat), + flatPerc: Number.parseFloat(node.flatPerc), + cum: Number.parseFloat(node.cum), + cumPerc: Number.parseFloat(node.cumPerc), + }) + } + + return { + type: reportType ? reportType[1] ?? 'cpu' : '', + file: binaryName ? binaryName[1] : undefined, + unit: unit ?? 's', + nodes: nodes, + } + } + + /** + * list fetches the detailed line-by-line breakdown of the function's resource consumption. + * @param funcRegex fully-qualified function name. That includes both the package name and the name of the struct if the function is a method. + * E.g. `example.MyFunction` or `example.(*ReceiverStruct).MyMethod`. + * @returns raw `-list` output + */ + public list(funcRegex: string): ListOutput | null { + if (!this.sources.report) { + return null + } + + const cmd = this.listCmd(funcRegex) + + let out: string | null = null + try { + out = execSync(cmd).toString('utf-8').trim() + } catch (e) { + return null + } + + if ( + out === null || + out.includes('no such file or directory') || + out.includes('no matches found for regexp') + ) { + return null + } + return { raw: out } + } + + private listCmd(funcRegex: string): string { + const { report, binary } = this.sources + + let cmd = this.tool + ` -list "${escapeSpecial(funcRegex)}"` + if (binary) { + cmd += ` ${binary}` + } + cmd += ` ${report}` + + return cmd + } +} + +/** + * Escape all special regex characters in a string. + * @param s string + * @returns string + */ +function escapeSpecial(s: string): string { + return s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&') +} diff --git a/provider/pprof/tsconfig.json b/provider/pprof/tsconfig.json new file mode 100644 index 00000000..a1d94187 --- /dev/null +++ b/provider/pprof/tsconfig.json @@ -0,0 +1,11 @@ +{ + "extends": "../../.config/tsconfig.base.json", + "compilerOptions": { + "rootDir": ".", + "outDir": "dist", + "lib": ["ESNext"] + }, + "include": ["*.ts"], + "exclude": ["dist", "vitest.config.ts"], + "references": [{ "path": "../../lib/provider" }] +} diff --git a/provider/pprof/vitest.config.ts b/provider/pprof/vitest.config.ts new file mode 100644 index 00000000..abed6b21 --- /dev/null +++ b/provider/pprof/vitest.config.ts @@ -0,0 +1,3 @@ +import { defineConfig } from 'vitest/config' + +export default defineConfig({})