-
Notifications
You must be signed in to change notification settings - Fork 453
Feat/eng 2971 #721
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feat/eng 2971 #721
Changes from all commits
3d6f962
6d74596
1d4375f
a943aac
bdd019c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,23 +1,54 @@ | ||
| /* eslint-disable camelcase */ | ||
| import {readFile as readFileAsync} from 'node:fs/promises' | ||
| import {relative as relativePath} from 'node:path' | ||
|
|
||
| import type {AnalyticsEventName} from '../../../shared/analytics/event-names.js' | ||
| import type {CurateRunCompletedProps} from '../../../shared/analytics/events/curate-run-completed.js' | ||
| import type {PropsArg} from '../../../shared/analytics/events/index.js' | ||
| import type {QueryCompletedProps} from '../../../shared/analytics/events/query-completed.js' | ||
| import type {TaskType} from '../../../shared/analytics/task-types.js' | ||
| import type {LlmToolResultEvent} from '../../core/domain/transport/schemas.js' | ||
| import type {TaskInfo} from '../../core/domain/transport/task-info.js' | ||
| import type {IAnalyticsClient} from '../../core/interfaces/analytics/i-analytics-client.js' | ||
| import type {ITaskLifecycleHook} from '../../core/interfaces/process/i-task-lifecycle-hook.js' | ||
| import type {QueryResultMetadata} from './query-log-handler.js' | ||
|
|
||
| import {AnalyticsEventNames} from '../../../shared/analytics/event-names.js' | ||
| import {TaskTypes} from '../../../shared/analytics/task-types.js' | ||
| import {parseFrontmatter} from '../../core/domain/knowledge/markdown-writer.js' | ||
| import {extractCurateOperations} from '../../utils/curate-result-parser.js' | ||
| import {processLog} from '../../utils/process-logger.js' | ||
| import {CURATE_TASK_TYPES} from './curate-log-handler.js' | ||
| import {QUERY_TASK_TYPES} from './query-log-handler.js' | ||
|
|
||
| /** | ||
| * Translate the daemon's runtime task type string to the canonical | ||
| * analytics wire value. The daemon still dispatches the pre-ENG-2925 | ||
| * name `'curate-html-direct'`; analytics emits the post-rename | ||
| * `'curate-tool-mode'`. Once the rename PR lands, this becomes a | ||
| * no-op identity and can be inlined. | ||
| */ | ||
| function toAnalyticsTaskType(daemonType: string): TaskType { | ||
| if (daemonType === 'curate-html-direct') return TaskTypes.CURATE_TOOL_MODE | ||
| return daemonType as TaskType | ||
| } | ||
|
|
||
| /** | ||
| * Convert an absolute filesystem path to a project-relative path for the | ||
| * analytics wire. Falls back to the input unchanged when projectPath is | ||
| * unset (e.g., search tasks scoped to the daemon root). Keeps emits free | ||
| * of `/Users/{name}` PII while still letting PMs reason about which file | ||
| * inside a project an operation touched. | ||
| */ | ||
| function toRelativePath(filePath: string, projectPath?: string): string { | ||
| if (!projectPath) return filePath | ||
| const rel = relativePath(projectPath, filePath) | ||
| // `path.relative` returns '' when paths are identical — defensively | ||
| // surface a leaf token rather than emit a zero-length wire string that | ||
| // would fail `z.string().min(1)`. | ||
| return rel === '' ? '.' : rel | ||
| } | ||
|
Comment on lines
+43
to
+50
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (concern): The PII-removal claim is only partly true. In real curate runs the affected files almost always live inside Two options worth considering:
Also: the |
||
|
|
||
| // `CURATE_TASK_TYPES` is exported as a readonly tuple; wrap in a Set<string> | ||
| // for cast-free `.has()` lookups against TaskInfo.type (string). | ||
| const CURATE_TASK_TYPE_SET: ReadonlySet<string> = new Set(CURATE_TASK_TYPES) | ||
|
|
@@ -67,6 +98,8 @@ type CurateCounters = { | |
| type CurateTaskAnalyticsState = { | ||
| counters: CurateCounters | ||
| flavor: 'curate' | ||
| /** Captured at onTaskCreate so onToolResult emits can relativize op.filePath. */ | ||
| projectPath?: string | ||
| taskType: CurateTaskTypeLiteral | ||
| } | ||
|
|
||
|
|
@@ -142,36 +175,57 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
|
|
||
| async onTaskCancelled(taskId: string, task: TaskInfo): Promise<void> { | ||
| await this.dispatchTerminal(taskId, task, 'cancelled') | ||
| this.emitTaskFailed(taskId, task) | ||
| } | ||
|
|
||
| async onTaskCompleted(taskId: string, _result: string, task: TaskInfo): Promise<void> { | ||
| const state = this.tasks.get(taskId) | ||
| if (!state) return | ||
| if (state) { | ||
| // Drain any in-flight per-op processing so CURATE_OPERATION_APPLIED emits | ||
| // land BEFORE the run-completion emit on the wire. The chain never | ||
| // rejects (see `onToolResult`), so this await is safe. | ||
| await this.pendingByTask.get(taskId) | ||
|
|
||
| if (state.flavor === 'curate') { | ||
| const outcome = state.counters.failed > 0 ? 'partial' : 'completed' | ||
| this.emit( | ||
| AnalyticsEventNames.CURATE_RUN_COMPLETED, | ||
| this.buildCurateRunPayload({outcome, state, task, taskId}), | ||
| ) | ||
| } else { | ||
| this.emit( | ||
| AnalyticsEventNames.QUERY_COMPLETED, | ||
| await this.buildQueryCompletedPayload({outcome: 'completed', state, task, taskId}), | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| // Drain any in-flight per-op processing so CURATE_OPERATION_APPLIED emits | ||
| // land BEFORE the run-completion emit on the wire. The chain never | ||
| // rejects (see `onToolResult`), so this await is safe. | ||
| await this.pendingByTask.get(taskId) | ||
|
|
||
| if (state.flavor === 'curate') { | ||
| const outcome = state.counters.failed > 0 ? 'partial' : 'completed' | ||
| this.emit( | ||
| AnalyticsEventNames.CURATE_RUN_COMPLETED, | ||
| this.buildCurateRunPayload({outcome, state, task, taskId}), | ||
| ) | ||
| } else { | ||
| this.emit( | ||
| AnalyticsEventNames.QUERY_COMPLETED, | ||
| await this.buildQueryCompletedPayload({outcome: 'completed', state, task, taskId}), | ||
| ) | ||
| } | ||
| // M14.3 generic funnel emit. Fires for EVERY task type AFTER any | ||
| // per-flavor M12 emit (terminal-event-last convention). | ||
| this.emit(AnalyticsEventNames.TASK_COMPLETED, { | ||
| duration_ms: this.durationMs(task), | ||
| task_id: taskId, | ||
| task_type: toAnalyticsTaskType(task.type), | ||
| }) | ||
| } | ||
|
|
||
| async onTaskCreate(task: TaskInfo): Promise<void> { | ||
| // M14.3 generic funnel-entry emit. Fires for EVERY task type BEFORE | ||
| // the M12 per-flavor state init so the entry event lands even if | ||
| // state setup throws downstream. | ||
| this.emit(AnalyticsEventNames.TASK_CREATED, { | ||
| has_files: (task.files?.length ?? 0) > 0, | ||
| has_folder: typeof task.folderPath === 'string' && task.folderPath.length > 0, | ||
| task_id: task.taskId, | ||
| task_type: toAnalyticsTaskType(task.type), | ||
| }) | ||
|
|
||
| if (isCurateLiteral(task.type)) { | ||
| this.tasks.set(task.taskId, { | ||
| counters: {added: 0, deleted: 0, failed: 0, merged: 0, pendingReview: 0, updated: 0}, | ||
| flavor: 'curate', | ||
| projectPath: task.projectPath, | ||
| taskType: task.type, | ||
| }) | ||
| return | ||
|
|
@@ -184,6 +238,7 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
|
|
||
| async onTaskError(taskId: string, _errorMessage: string, task: TaskInfo): Promise<void> { | ||
| await this.dispatchTerminal(taskId, task, 'error') | ||
| this.emitTaskFailed(taskId, task) | ||
| } | ||
|
|
||
| async onToolResult(taskId: string, payload: LlmToolResultEvent): Promise<void> { | ||
|
|
@@ -246,7 +301,7 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
| outcome, | ||
| pending_review_count: state.counters.pendingReview, | ||
| task_id: taskId, | ||
| task_type: state.taskType, | ||
| task_type: toAnalyticsTaskType(state.taskType), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -303,17 +358,24 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
|
|
||
| // M12.3: harvest per-path frontmatter on the same async read path used | ||
| // for curate emits. Entries whose file is unreadable / has no frontmatter | ||
| // carry `absolute_path` alone (the three array fields stay absent). | ||
| // `Promise.all` preserves input-array order in the result regardless of | ||
| // which read settles first. | ||
| // carry empty keywords / tags / related_paths arrays — the wire shape | ||
| // is uniform regardless of read success. `Promise.all` preserves | ||
| // input-array order in the result regardless of which read settles first. | ||
| const readPathsWithMetadata = await Promise.all( | ||
| cappedPaths.map(async (p) => { | ||
| const fm = await this.readFrontmatterFields(p) | ||
| return { | ||
| absolute_path: p, | ||
| ...(fm.keywords ? {keywords: fm.keywords} : {}), | ||
| ...(fm.related ? {related: fm.related} : {}), | ||
| ...(fm.tags ? {tags: fm.tags} : {}), | ||
| keywords: fm.keywords ?? [], | ||
| // M14 review tightening: each related entry is structured so a | ||
| // later FU can populate the linked file's own keywords/tags | ||
| // without changing the wire shape. | ||
| related_paths: (fm.related ?? []).map((r) => ({ | ||
| keywords: [], | ||
| relative_path: r, | ||
| tags: [], | ||
| })), | ||
| relative_path: toRelativePath(p, task.projectPath), | ||
| tags: fm.tags ?? [], | ||
| } | ||
| }), | ||
| ) | ||
|
|
@@ -331,7 +393,7 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
| read_tool_call_count: readToolCallCount, | ||
| search_call_count: searchCallCount, | ||
| task_id: taskId, | ||
| task_type: 'query', | ||
| task_type: toAnalyticsTaskType(task.type), | ||
| ...(tier === undefined ? {} : {tier}), | ||
| } | ||
| } | ||
|
|
@@ -371,6 +433,20 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * M14.3 generic terminal-failure emit. Fired by both onTaskError and | ||
| * onTaskCancelled AFTER dispatchTerminal so M12 per-flavor failure | ||
| * emits land first on the wire. Cancellation maps to task_failed | ||
| * (not a distinct event) per the schema's docblock. | ||
| */ | ||
| private emitTaskFailed(taskId: string, task: TaskInfo): void { | ||
| this.emit(AnalyticsEventNames.TASK_FAILED, { | ||
| duration_ms: this.durationMs(task), | ||
| task_id: taskId, | ||
| task_type: toAnalyticsTaskType(task.type), | ||
| }) | ||
| } | ||
|
|
||
| private async processToolResult(taskId: string, payload: LlmToolResultEvent): Promise<void> { | ||
| const state = this.tasks.get(taskId) | ||
| if (!state || state.flavor !== 'curate') return | ||
|
|
@@ -423,20 +499,21 @@ export class AnalyticsHook implements ITaskLifecycleHook { | |
|
|
||
| // M12.3: read post-op frontmatter for ADD / UPDATE / MERGE-target / | ||
| // UPSERT. DELETE skips the read (file is gone). Frontmatter fields | ||
| // stay absent when the read fails (ENOENT, EACCES, malformed YAML). | ||
| // default to empty arrays when the read fails (ENOENT, EACCES, | ||
| // malformed YAML) so the wire shape stays uniform. | ||
| // eslint-disable-next-line no-await-in-loop -- emit order MUST match op order | ||
| const frontmatter = op.type === 'DELETE' ? {} : await this.readFrontmatterFields(op.filePath) | ||
|
|
||
| this.emit(AnalyticsEventNames.CURATE_OPERATION_APPLIED, { | ||
| absolute_path: op.filePath, | ||
| ...(op.confidence ? {confidence: op.confidence} : {}), | ||
| ...(op.impact ? {impact: op.impact} : {}), | ||
| ...(frontmatter.keywords ? {keywords: frontmatter.keywords} : {}), | ||
| keywords: frontmatter.keywords ?? [], | ||
| knowledge_path: op.path, | ||
| needs_review: op.needsReview ?? false, | ||
| operation_type: op.type, | ||
| ...(frontmatter.related ? {related: frontmatter.related} : {}), | ||
| ...(frontmatter.tags ? {tags: frontmatter.tags} : {}), | ||
| relative_path: toRelativePath(op.filePath, state.projectPath), | ||
| tags: frontmatter.tags ?? [], | ||
|
Comment on lines
+510
to
+516
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (concern): When A downstream dashboard that reconstructs counts from Two reasonable fixes:
The comment at line 497 explicitly says these ops are rare, so (1) is the simplest reconciliation. |
||
| task_id: taskId, | ||
| }) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,24 +5,28 @@ import {z} from 'zod' | |
| * Per-event schema for `curate_operation_applied`. | ||
| * | ||
| * Emitted by the daemon's `AnalyticsHook` (M12.2) once per successful curate | ||
| * operation. Each operation carries the affected file's absolute path, its | ||
| * knowledge-tree address, review/impact metadata, and (M12.3) the file's | ||
| * current-state frontmatter values for tags / keywords / related. | ||
| * operation. Each operation carries the affected file's project-relative | ||
| * path, its knowledge-tree address, review/impact metadata, and (M12.3) the | ||
| * file's current-state frontmatter values for tags / keywords / related. | ||
| * | ||
| * All three frontmatter arrays are optional and absent on DELETE operations | ||
| * (the file is gone post-op) and on read failures (defensive). | ||
| * Review tightening (M14 follow-up): | ||
| * - `absolute_path` → `relative_path` for privacy + portability across hosts | ||
| * - `keywords` / `tags` are now required arrays (default empty) so consumers | ||
| * don't have to special-case the "field absent" shape | ||
| * - `related` stays optional and absent on DELETE / read-failure (file is | ||
| * gone or unreadable, no related-link source to harvest from) | ||
| */ | ||
| export const CurateOperationAppliedSchema = z | ||
| .object({ | ||
| absolute_path: z.string().min(1), | ||
| confidence: z.enum(['high', 'low']).optional(), | ||
| impact: z.enum(['high', 'low']).optional(), | ||
| keywords: z.array(z.string().max(256)).max(50).optional(), | ||
| keywords: z.array(z.string().max(256)).max(50), | ||
| knowledge_path: z.string().min(1), | ||
| needs_review: z.boolean(), | ||
| operation_type: z.enum(['ADD', 'UPDATE', 'DELETE', 'MERGE', 'UPSERT']), | ||
| related: z.array(z.string().max(256)).max(50).optional(), | ||
| tags: z.array(z.string().max(256)).max(50).optional(), | ||
| relative_path: z.string().min(1), | ||
| tags: z.array(z.string().max(256)).max(50), | ||
| task_id: z.string().min(1), | ||
| }) | ||
| .strict() | ||
|
Comment on lines
19
to
32
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (blocking) — wire-format breaking change: This rev does three things on the same emit:
If the analytics backend (telemetry-dev / prod) was previously consuming Please confirm in the PR body:
The same review tightening applies to |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion (concern, non-blocking): the
daemonType as TaskTypecast bypasses validation. If a new daemonTaskTypeSchemavalue is added (e.g. a futurevc-commit) but not mirrored inTASK_TYPE_VALUES, this silently emits a payload that will be rejected by the per-event schemas downstream (since they usez.enum(TASK_TYPE_VALUES)). PerCLAUDE.md: "Avoidas Typeassertions — use type guards or proper typing instead."A safer pattern:
…and have
emitskip the call whentask_typeis undefined (with aprocessLogfor visibility). Today the daemon'sTaskTypeSchemaonly diverges bycurate-html-direct, so the practical fallout is bounded — but the comment promises this becomes a "no-op identity" once rename lands, while in practice it would still need the safety net.