From 8bf6753f95a337ea94c2737a7e949e7525597747 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 09:37:54 +0800 Subject: [PATCH 01/23] docs(research): add research on wasm detector and latin-tag interaction --- ...h-2026-03-24-wasm-latin-tag-interaction.md | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md diff --git a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md new file mode 100644 index 0000000..e4b30dc --- /dev/null +++ b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md @@ -0,0 +1,87 @@ +--- +title: "wasm detector and latin-tag interaction" +created-date: 2026-03-24 +modified-date: 2026-03-24 +status: draft +agent: codex +--- + +## Goal + +Document the unexpected interaction where `--detector wasm` plus `--latin-tag ` changes collector totals and removes detector-derived locales such as `fr` in real CLI output. + +## Key Context + +- Reported against `0.1.5-canary.2`. +- Scope here is investigation and issue framing only. +- This note does not propose or record an implementation yet. + +## Reproduction + +Observed baseline: + +```bash +node dist/esm/bin.mjs -p README.md -m colle --detector wasm +``` + +Observed output: + +```text +Total words: 3249 +Locale en: 2988 words +Locale und-Hani: 26 words +Locale ko: 4 words +Locale und-Latn: 143 words +Locale ja: 2 words +Locale ar: 2 words +Locale pl: 20 words +Locale tr: 10 words +Locale de: 15 words +Locale ro: 4 words +Locale zh: 8 words +Locale fr: 27 words +``` + +Observed with `--latin-tag en`: + +```bash +node dist/esm/bin.mjs -p README.md -m colle --detector wasm --latin-tag en +``` + +Observed output: + +```text +Total words: 3254 +Locale en: 3170 words +Locale und-Hani: 26 words +Locale ko: 4 words +Locale ja: 2 words +Locale ar: 2 words +Locale pl: 16 words +Locale tr: 9 words +Locale de: 13 words +Locale ro: 4 words +Locale zh: 8 words +``` + +## Key Findings + +- The WASM detector path is still used when `--detector wasm` is set. +- The interaction changes earlier than the detector stage: + - `resolveCountRunOptions()` forwards `options.latinTag` into `wcOptions.latinTagHint`. + - `segmentTextByLocaleWithWasmDetector()` calls `segmentTextByLocale(text, options)` before any WASM remap work. + - `detectLocaleForChar()` returns `context.latinHint` for ambiguous Latin characters when `latinTagHint` is present. +- Because of that early hinting, many Latin runs become `en` during base segmentation instead of remaining `und-Latn`. +- The WASM detector only evaluates ambiguous routes (`und-Latn`, `und-Hani`), so pre-labeled `en` chunks are skipped entirely. +- Result: + - detector-derived locales such as `fr` disappear because those chunks never remain ambiguous long enough to reach the WASM route + - totals can also change because counting uses `Intl.Segmenter(locale, { granularity: "word" })`, so changing a chunk locale from `und-Latn` to `en` changes the segmenter used for final word counting + +## Implications or Recommendations + +- Current behavior matches "hint overrides ambiguity before detection", not "run WASM first, then relabel only unresolved `und-Latn` buckets". +- If the intended contract is that hint-based language tagging belongs to regex mode, `latinTagHint` should not be applied during the pre-detector segmentation pass for WASM mode. +- One candidate model for WASM mode is: + - keep ambiguous Latin as `und-Latn` for detector eligibility + - run WASM remap on ambiguous windows + - apply `latinTagHint` only as a fallback for windows/chunks that remain unresolved after detector evaluation From 71c75be138edca3f83f273ada57db7f0296996eb Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 09:48:24 +0800 Subject: [PATCH 02/23] chore(release): update version to 0.1.5-canary.3 in package.json and version-embedded.ts --- package.json | 2 +- src/cli/program/version-embedded.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index ec1b310..75134c8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@dev-pi2pie/word-counter", - "version": "0.1.5-canary.2", + "version": "0.1.5-canary.3", "keywords": [ "cli", "intl-segmenter", diff --git a/src/cli/program/version-embedded.ts b/src/cli/program/version-embedded.ts index 75cb803..a63fa26 100644 --- a/src/cli/program/version-embedded.ts +++ b/src/cli/program/version-embedded.ts @@ -1,3 +1,3 @@ // This file is generated by scripts/generate-embedded-version.mjs. // Do not edit manually. -export const EMBEDDED_PACKAGE_VERSION = "0.1.5-canary.2"; +export const EMBEDDED_PACKAGE_VERSION = "0.1.5-canary.3"; From 1984135d55d053fbac8bd24ec32b67fb446808e6 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 10:00:39 +0800 Subject: [PATCH 03/23] docs: link related plan for WASM mode Latin hint ordering fix --- ...026-03-24-wasm-mode-latin-hint-ordering.md | 91 +++++++++++++++++++ ...h-2026-03-24-wasm-latin-tag-interaction.md | 4 + 2 files changed, 95 insertions(+) create mode 100644 docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md diff --git a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md new file mode 100644 index 0000000..827c49e --- /dev/null +++ b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md @@ -0,0 +1,91 @@ +--- +title: "WASM mode Latin hint ordering fix" +created-date: 2026-03-24 +modified-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Restore detector-first behavior in `--detector wasm` mode so Latin fallback hints do not remove ambiguous Latin chunks from WASM eligibility before remap. + +## Context + +Current behavior allows `--latin-tag` to participate in pre-detector segmentation even when `--detector wasm` is selected. That changes chunk routing before detector evaluation, removes detector-derived locale outcomes such as `fr`, and can also change total word counts because final counting uses locale-specific `Intl.Segmenter` instances. + +Issue linkage: + +- Bug issue: `#52` +- Affected version: `v0.1.5-canary.2` + +## Scope + +- In scope: + - Change WASM-mode segmentation flow so Latin hint-based language assignment does not run before ambiguous-window detection. + - Preserve the current detector remap policy for eligible `und-Latn` and `und-Hani` windows. + - Keep a deterministic fallback path so unresolved ambiguous Latin can still be relabeled by explicit Latin fallback options after detector evaluation. + - Add regression coverage for the interaction between `--detector wasm` and Latin hint options. + - Update user-facing docs where detector and hint behavior is described. +- Out of scope: + - Changing regex-mode hint behavior. + - Reworking Han fallback semantics unless required by the same bugfix path. + - Broad detector policy retuning beyond the ordering issue. + +## Proposed Decisions + +- Treat explicit Latin hint options as regex-mode labeling inputs, not pre-detector routing inputs, when `detector = "wasm"`. +- Keep ambiguous Latin text on `und-Latn` during the initial segmentation pass in WASM mode. +- Run the existing WASM detector flow against those ambiguous windows first. +- Only after detector evaluation, apply explicit Latin fallback tags to unresolved `und-Latn` output. +- Preserve built-in script-specific Latin hint rules that already identify non-ambiguous language buckets from distinctive characters unless implementation review shows they must also be deferred for consistency. + +## Phase Task Items + +### Phase 1 - WASM Pre-Segmentation Boundary + +- [ ] Introduce a WASM-specific locale-detect option path that suppresses explicit Latin fallback hints during the initial `segmentTextByLocale()` pass. +- [ ] Keep ambiguous Latin text on `und-Latn` during the initial WASM segmentation path so detector eligibility is preserved. +- [ ] Keep detector window construction and accepted remap behavior unchanged for eligible `und-Latn` and `und-Hani` routes. + +### Phase 2 - Post-Detector Fallback Relabeling + +- [ ] Add a post-detector fallback pass that relabels only unresolved `und-Latn` chunks using explicit Latin fallback options. +- [ ] Ensure explicit Latin fallback options are applied after detector acceptance or rejection, not before detector routing. +- [ ] Recheck whether built-in script-specific Latin hint rules should remain in pre-detector routing or also move into the fallback layer, and document the final decision in code comments or docs if needed. + +### Phase 3 - Compatibility Guards + +- [ ] Keep regex mode behavior unchanged. +- [ ] Keep explicit Han hint behavior unchanged unless the same implementation path proves a correction is required. +- [ ] Keep public output schemas and detector remap thresholds unchanged. + +### Phase 4 - Regression Coverage + +- [ ] Add tests proving `--detector wasm --latin-tag en` does not suppress detector-derived locales for otherwise eligible ambiguous Latin text. +- [ ] Add tests proving unresolved ambiguous Latin still respects `latinTagHint` after detector evaluation. +- [ ] Add a regression test for stable totals using the reported README reproduction or a narrower fixture that captures the same failure mode. + +### Phase 5 - Documentation and Closure + +- [ ] Update README detector notes to clarify hint ordering in WASM mode. +- [ ] Add a completion job record under `docs/plans/jobs/` after implementation lands. + +## Compatibility Gates + +- [ ] `--detector regex` behavior remains unchanged. +- [ ] `--latin-tag`, `--latin-language`, and `--latin-locale` remain available in the public API and CLI. +- [ ] `--detector wasm` keeps existing remap thresholds and accepted tag mappings unless a separate change is explicitly planned. +- [ ] JSON and standard output contracts remain unchanged. + +## Validation + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Research + +- `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` +- `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` diff --git a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md index e4b30dc..fe299de 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md +++ b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md @@ -85,3 +85,7 @@ Locale zh: 8 words - keep ambiguous Latin as `und-Latn` for detector eligibility - run WASM remap on ambiguous windows - apply `latinTagHint` only as a fallback for windows/chunks that remain unresolved after detector evaluation + +## Related Plans + +- `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` From e34dfe969424afedfb6d42763fb575fa20ba6b5a Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 10:18:58 +0800 Subject: [PATCH 04/23] docs: update WASM Latin ordering documentation to clarify hinting behavior and compatibility constraints --- ...eview-findings-wasm-latin-ordering-docs.md | 35 +++++++++++++++++++ ...026-03-24-wasm-mode-latin-hint-ordering.md | 31 +++++++++++----- ...h-2026-03-24-wasm-latin-tag-interaction.md | 13 +++++-- 3 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-review-findings-wasm-latin-ordering-docs.md diff --git a/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-ordering-docs.md b/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-ordering-docs.md new file mode 100644 index 0000000..a64c491 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-ordering-docs.md @@ -0,0 +1,35 @@ +--- +title: "review findings wasm latin ordering docs" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Address review findings on the WASM Latin ordering research and plan docs so they accurately describe the intended change surface and preserve existing compatibility constraints. + +## What Changed + +- Updated `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` to make the pre-detector interaction explicit for: + - explicit Latin fallback hints + - custom Latin hint rules + - built-in default Latin hint rules +- Tightened `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` so the proposed WASM fix now explicitly: + - defers all Latin hint sources that can relabel ambiguous Latin before detector routing + - preserves explicit fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` + - preserves existing Latin hint rule priority and definition-order semantics in the fallback path + - adds regression coverage for those compatibility guarantees +- Kept the affected traceability sections in plain repo-relative path form to match current repository conventions. + +## Validation + +- Documentation review only; no code or tests were run. + +## Related Plans + +- [docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md](../plan-2026-03-24-wasm-mode-latin-hint-ordering.md) + +## Related Research + +- [docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md](../../researches/research-2026-03-24-wasm-latin-tag-interaction.md) diff --git a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md index 827c49e..db5c8bb 100644 --- a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md +++ b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md @@ -12,7 +12,7 @@ Restore detector-first behavior in `--detector wasm` mode so Latin fallback hint ## Context -Current behavior allows `--latin-tag` to participate in pre-detector segmentation even when `--detector wasm` is selected. That changes chunk routing before detector evaluation, removes detector-derived locale outcomes such as `fr`, and can also change total word counts because final counting uses locale-specific `Intl.Segmenter` instances. +Current behavior allows explicit Latin fallback hints and rule-based Latin hinting to participate in pre-detector segmentation even when `--detector wasm` is selected. That changes chunk routing before detector evaluation, removes detector-derived locale outcomes such as `fr`, and can also change total word counts because final counting uses locale-specific `Intl.Segmenter` instances. Issue linkage: @@ -24,7 +24,11 @@ Issue linkage: - In scope: - Change WASM-mode segmentation flow so Latin hint-based language assignment does not run before ambiguous-window detection. - Preserve the current detector remap policy for eligible `und-Latn` and `und-Hani` windows. - - Keep a deterministic fallback path so unresolved ambiguous Latin can still be relabeled by explicit Latin fallback options after detector evaluation. + - Keep a deterministic fallback path so unresolved ambiguous Latin can still be relabeled after detector evaluation by: + - custom Latin hint rules + - built-in default Latin hint rules + - explicit Latin fallback options + - Preserve the existing explicit Latin fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. - Add regression coverage for the interaction between `--detector wasm` and Latin hint options. - Update user-facing docs where detector and hint behavior is described. - Out of scope: @@ -34,36 +38,45 @@ Issue linkage: ## Proposed Decisions -- Treat explicit Latin hint options as regex-mode labeling inputs, not pre-detector routing inputs, when `detector = "wasm"`. +- Treat all Latin hint inputs that can relabel ambiguous Latin to a non-default Latin locale as post-detector fallback inputs, not pre-detector routing inputs, when `detector = "wasm"`: + - explicit fallback options (`latinTagHint`, `latinLanguageHint`, `latinLocaleHint`) + - custom Latin hint rules + - built-in default Latin hint rules - Keep ambiguous Latin text on `und-Latn` during the initial segmentation pass in WASM mode. - Run the existing WASM detector flow against those ambiguous windows first. -- Only after detector evaluation, apply explicit Latin fallback tags to unresolved `und-Latn` output. -- Preserve built-in script-specific Latin hint rules that already identify non-ambiguous language buckets from distinctive characters unless implementation review shows they must also be deferred for consistency. +- Only after detector evaluation, reapply Latin fallback semantics to unresolved `und-Latn` output in the current order: + - custom and built-in Latin hint rules by existing priority and order semantics + - explicit fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` + - default `und-Latn` when nothing matches ## Phase Task Items ### Phase 1 - WASM Pre-Segmentation Boundary -- [ ] Introduce a WASM-specific locale-detect option path that suppresses explicit Latin fallback hints during the initial `segmentTextByLocale()` pass. +- [ ] Introduce a WASM-specific locale-detect option path that suppresses explicit and rule-based Latin hint relabeling during the initial `segmentTextByLocale()` pass. - [ ] Keep ambiguous Latin text on `und-Latn` during the initial WASM segmentation path so detector eligibility is preserved. - [ ] Keep detector window construction and accepted remap behavior unchanged for eligible `und-Latn` and `und-Hani` routes. ### Phase 2 - Post-Detector Fallback Relabeling -- [ ] Add a post-detector fallback pass that relabels only unresolved `und-Latn` chunks using explicit Latin fallback options. +- [ ] Add a post-detector fallback pass that relabels only unresolved `und-Latn` chunks using the existing Latin hint semantics. +- [ ] Ensure custom and built-in Latin hint rules are applied after detector acceptance or rejection, not before detector routing. - [ ] Ensure explicit Latin fallback options are applied after detector acceptance or rejection, not before detector routing. -- [ ] Recheck whether built-in script-specific Latin hint rules should remain in pre-detector routing or also move into the fallback layer, and document the final decision in code comments or docs if needed. +- [ ] Preserve the existing explicit Latin fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` in the WASM fallback path. ### Phase 3 - Compatibility Guards - [ ] Keep regex mode behavior unchanged. - [ ] Keep explicit Han hint behavior unchanged unless the same implementation path proves a correction is required. - [ ] Keep public output schemas and detector remap thresholds unchanged. +- [ ] Keep existing Latin hint rule priority and definition-order semantics unchanged in the fallback path. ### Phase 4 - Regression Coverage - [ ] Add tests proving `--detector wasm --latin-tag en` does not suppress detector-derived locales for otherwise eligible ambiguous Latin text. - [ ] Add tests proving unresolved ambiguous Latin still respects `latinTagHint` after detector evaluation. +- [ ] Add tests proving unresolved ambiguous Latin in WASM mode still preserves explicit hint precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. +- [ ] Add tests proving custom and built-in Latin hint rules are deferred until after detector evaluation in WASM mode. - [ ] Add a regression test for stable totals using the reported README reproduction or a narrower fixture that captures the same failure mode. ### Phase 5 - Documentation and Closure @@ -75,6 +88,8 @@ Issue linkage: - [ ] `--detector regex` behavior remains unchanged. - [ ] `--latin-tag`, `--latin-language`, and `--latin-locale` remain available in the public API and CLI. +- [ ] Explicit Latin hint precedence remains `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. +- [ ] Existing Latin hint rule priority and definition-order semantics remain unchanged. - [ ] `--detector wasm` keeps existing remap thresholds and accepted tag mappings unless a separate change is explicitly planned. - [ ] JSON and standard output contracts remain unchanged. diff --git a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md index fe299de..577f962 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md +++ b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md @@ -71,6 +71,9 @@ Locale zh: 8 words - `resolveCountRunOptions()` forwards `options.latinTag` into `wcOptions.latinTagHint`. - `segmentTextByLocaleWithWasmDetector()` calls `segmentTextByLocale(text, options)` before any WASM remap work. - `detectLocaleForChar()` returns `context.latinHint` for ambiguous Latin characters when `latinTagHint` is present. +- Explicit Latin fallback hints and rule-based Latin hinting both participate in that same pre-detector segmentation path: + - built-in Latin hint rules are resolved in `resolveLocaleDetectContext()` + - `detectLatinLocale()` applies custom and built-in Latin hint rules before `context.latinHint` - Because of that early hinting, many Latin runs become `en` during base segmentation instead of remaining `und-Latn`. - The WASM detector only evaluates ambiguous routes (`und-Latn`, `und-Hani`), so pre-labeled `en` chunks are skipped entirely. - Result: @@ -80,11 +83,17 @@ Locale zh: 8 words ## Implications or Recommendations - Current behavior matches "hint overrides ambiguity before detection", not "run WASM first, then relabel only unresolved `und-Latn` buckets". -- If the intended contract is that hint-based language tagging belongs to regex mode, `latinTagHint` should not be applied during the pre-detector segmentation pass for WASM mode. +- If the intended contract is detector-first routing in WASM mode, the initial WASM segmentation pass cannot apply any Latin hint source that upgrades `und-Latn` to a non-default Latin locale: + - explicit fallback options (`latinTagHint`, `latinLanguageHint`, `latinLocaleHint`) + - custom Latin hint rules + - built-in default Latin hint rules - One candidate model for WASM mode is: - keep ambiguous Latin as `und-Latn` for detector eligibility - run WASM remap on ambiguous windows - - apply `latinTagHint` only as a fallback for windows/chunks that remain unresolved after detector evaluation + - for windows/chunks that remain unresolved after detector evaluation, reapply the existing Latin hint semantics in fallback order: + - custom and built-in rule matching + - explicit fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` + - default `und-Latn` fallback when no rule or explicit hint applies ## Related Plans From 8871ec47691c12431dac400781cd29d73a2995f4 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 10:47:28 +0800 Subject: [PATCH 05/23] fix(detector): defer latin hints until after wasm detection --- ...mode-latin-hint-ordering-implementation.md | 45 +++++++++++++ ...026-03-24-wasm-mode-latin-hint-ordering.md | 50 +++++++------- src/detector/wasm.ts | 65 ++++++++++++++++++- test/command.test.ts | 21 ++++++ test/word-counter.test.ts | 59 +++++++++++++++++ 5 files changed, 213 insertions(+), 27 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md diff --git a/docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md b/docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md new file mode 100644 index 0000000..01da575 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md @@ -0,0 +1,45 @@ +--- +title: "wasm mode latin hint ordering implementation" +created-date: 2026-03-24 +modified-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Implement the `--detector wasm` Latin hint ordering fix so ambiguous Latin remains detector-eligible during pre-segmentation and only unresolved `und-Latn` output is relabeled by the existing Latin fallback semantics afterward. + +## Scope + +- Defer Latin hint-driven relabeling during the initial WASM segmentation pass. +- Reapply existing Latin fallback semantics only after detector evaluation for unresolved `und-Latn` chunks. +- Add regression coverage for explicit hints, rule-based hints, and stable totals. +- Update the plan and README as implementation milestones complete. + +## What Changed + +- Updated `src/detector/wasm.ts` so WASM mode now: + - validates the original Latin hint configuration up front + - strips Latin hints and Latin hint rules from the initial segmentation pass + - keeps ambiguous Latin on `und-Latn` for the existing detector window flow + - re-segments only unresolved `und-Latn` chunks with the original options after detector evaluation +- Added regression coverage in: + - `test/word-counter.test.ts` + - `test/command.test.ts` +- Updated `README.md` to document the detector-first ordering in WASM mode and the post-detector Latin fallback behavior. + +## Validation + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Plans + +- `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` + +## Related Research + +- `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` diff --git a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md index db5c8bb..db63cea 100644 --- a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md +++ b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md @@ -2,7 +2,7 @@ title: "WASM mode Latin hint ordering fix" created-date: 2026-03-24 modified-date: 2026-03-24 -status: draft +status: completed agent: Codex --- @@ -53,45 +53,45 @@ Issue linkage: ### Phase 1 - WASM Pre-Segmentation Boundary -- [ ] Introduce a WASM-specific locale-detect option path that suppresses explicit and rule-based Latin hint relabeling during the initial `segmentTextByLocale()` pass. -- [ ] Keep ambiguous Latin text on `und-Latn` during the initial WASM segmentation path so detector eligibility is preserved. -- [ ] Keep detector window construction and accepted remap behavior unchanged for eligible `und-Latn` and `und-Hani` routes. +- [x] Introduce a WASM-specific locale-detect option path that suppresses explicit and rule-based Latin hint relabeling during the initial `segmentTextByLocale()` pass. +- [x] Keep ambiguous Latin text on `und-Latn` during the initial WASM segmentation path so detector eligibility is preserved. +- [x] Keep detector window construction and accepted remap behavior unchanged for eligible `und-Latn` and `und-Hani` routes. ### Phase 2 - Post-Detector Fallback Relabeling -- [ ] Add a post-detector fallback pass that relabels only unresolved `und-Latn` chunks using the existing Latin hint semantics. -- [ ] Ensure custom and built-in Latin hint rules are applied after detector acceptance or rejection, not before detector routing. -- [ ] Ensure explicit Latin fallback options are applied after detector acceptance or rejection, not before detector routing. -- [ ] Preserve the existing explicit Latin fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` in the WASM fallback path. +- [x] Add a post-detector fallback pass that relabels only unresolved `und-Latn` chunks using the existing Latin hint semantics. +- [x] Ensure custom and built-in Latin hint rules are applied after detector acceptance or rejection, not before detector routing. +- [x] Ensure explicit Latin fallback options are applied after detector acceptance or rejection, not before detector routing. +- [x] Preserve the existing explicit Latin fallback precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint` in the WASM fallback path. ### Phase 3 - Compatibility Guards -- [ ] Keep regex mode behavior unchanged. -- [ ] Keep explicit Han hint behavior unchanged unless the same implementation path proves a correction is required. -- [ ] Keep public output schemas and detector remap thresholds unchanged. -- [ ] Keep existing Latin hint rule priority and definition-order semantics unchanged in the fallback path. +- [x] Keep regex mode behavior unchanged. +- [x] Keep explicit Han hint behavior unchanged unless the same implementation path proves a correction is required. +- [x] Keep public output schemas and detector remap thresholds unchanged. +- [x] Keep existing Latin hint rule priority and definition-order semantics unchanged in the fallback path. ### Phase 4 - Regression Coverage -- [ ] Add tests proving `--detector wasm --latin-tag en` does not suppress detector-derived locales for otherwise eligible ambiguous Latin text. -- [ ] Add tests proving unresolved ambiguous Latin still respects `latinTagHint` after detector evaluation. -- [ ] Add tests proving unresolved ambiguous Latin in WASM mode still preserves explicit hint precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. -- [ ] Add tests proving custom and built-in Latin hint rules are deferred until after detector evaluation in WASM mode. -- [ ] Add a regression test for stable totals using the reported README reproduction or a narrower fixture that captures the same failure mode. +- [x] Add tests proving `--detector wasm --latin-tag en` does not suppress detector-derived locales for otherwise eligible ambiguous Latin text. +- [x] Add tests proving unresolved ambiguous Latin still respects `latinTagHint` after detector evaluation. +- [x] Add tests proving unresolved ambiguous Latin in WASM mode still preserves explicit hint precedence `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. +- [x] Add tests proving custom and built-in Latin hint rules are deferred until after detector evaluation in WASM mode. +- [x] Add a regression test for stable totals using the reported README reproduction or a narrower fixture that captures the same failure mode. ### Phase 5 - Documentation and Closure -- [ ] Update README detector notes to clarify hint ordering in WASM mode. -- [ ] Add a completion job record under `docs/plans/jobs/` after implementation lands. +- [x] Update README detector notes to clarify hint ordering in WASM mode. +- [x] Add a completion job record under `docs/plans/jobs/` after implementation lands. ## Compatibility Gates -- [ ] `--detector regex` behavior remains unchanged. -- [ ] `--latin-tag`, `--latin-language`, and `--latin-locale` remain available in the public API and CLI. -- [ ] Explicit Latin hint precedence remains `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. -- [ ] Existing Latin hint rule priority and definition-order semantics remain unchanged. -- [ ] `--detector wasm` keeps existing remap thresholds and accepted tag mappings unless a separate change is explicitly planned. -- [ ] JSON and standard output contracts remain unchanged. +- [x] `--detector regex` behavior remains unchanged. +- [x] `--latin-tag`, `--latin-language`, and `--latin-locale` remain available in the public API and CLI. +- [x] Explicit Latin hint precedence remains `latinTagHint` > `latinLanguageHint` > `latinLocaleHint`. +- [x] Existing Latin hint rule priority and definition-order semantics remain unchanged. +- [x] `--detector wasm` keeps existing remap thresholds and accepted tag mappings unless a separate change is explicitly planned. +- [x] JSON and standard output contracts remain unchanged. ## Validation diff --git a/src/detector/wasm.ts b/src/detector/wasm.ts index 1574b99..183a4e1 100644 --- a/src/detector/wasm.ts +++ b/src/detector/wasm.ts @@ -1,5 +1,6 @@ import { DEFAULT_HAN_TAG, DEFAULT_LOCALE } from "../wc/locale-detect"; import { segmentTextByLocale } from "../wc"; +import { resolveLocaleDetectContext } from "../wc/locale-detect"; import type { LocaleChunk } from "../wc/types"; import { buildWordCounterResultFromChunks } from "./result-builder"; import { countSectionsWithResolvedDetector } from "./sections"; @@ -19,6 +20,62 @@ import type { DetectorWordCounterOptions, } from "./types"; +function createDeferredLatinPreSegmentOptions( + options: DetectorLocaleOptions, +): DetectorLocaleOptions { + return { + ...options, + latinLanguageHint: undefined, + latinTagHint: undefined, + latinLocaleHint: undefined, + latinHintRules: undefined, + useDefaultLatinHints: false, + }; +} + +function mergeAdjacentChunks(chunks: LocaleChunk[]): LocaleChunk[] { + if (chunks.length === 0) { + return chunks; + } + + const merged: LocaleChunk[] = []; + let last = chunks[0]!; + + for (let index = 1; index < chunks.length; index += 1) { + const chunk = chunks[index]!; + if (chunk.locale === last.locale) { + last = { + locale: last.locale, + text: last.text + chunk.text, + }; + continue; + } + merged.push(last); + last = chunk; + } + + merged.push(last); + return merged; +} + +function reapplyDeferredLatinFallback( + chunks: LocaleChunk[], + options: DetectorLocaleOptions, +): LocaleChunk[] { + const relabeled: LocaleChunk[] = []; + + for (const chunk of chunks) { + if (chunk.locale !== DEFAULT_LOCALE) { + relabeled.push(chunk); + continue; + } + + relabeled.push(...segmentTextByLocale(chunk.text, options)); + } + + return mergeAdjacentChunks(relabeled); +} + function shouldAcceptDetectorTag( routeTag: DetectorRouteTag, confidence: number | undefined, @@ -138,7 +195,11 @@ export async function segmentTextByLocaleWithWasmDetector( text: string, options: DetectorLocaleOptions = {}, ) { - const chunks = segmentTextByLocale(text, options); + // Validate the original hint configuration up front even though Latin hinting + // is deferred until after detector routing in WASM mode. + resolveLocaleDetectContext(options); + + const chunks = segmentTextByLocale(text, createDeferredLatinPreSegmentOptions(options)); const resolved = [...chunks]; const windows = buildDetectorWindows(chunks); @@ -156,7 +217,7 @@ export async function segmentTextByLocaleWithWasmDetector( } } - return resolved; + return reapplyDeferredLatinFallback(resolved, options); } export async function wordCounterWithWasmDetector( diff --git a/test/command.test.ts b/test/command.test.ts index 2b058af..bdecf2e 100644 --- a/test/command.test.ts +++ b/test/command.test.ts @@ -246,6 +246,27 @@ describe("detector mode", () => { expect(parsed.breakdown.items[0]?.locale).toBe("en"); }); + test("keeps detector-derived locale when latin tag hint is set in wasm mode", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const output = await captureCli([ + "--detector", + "wasm", + "--latin-tag", + "en", + "--format", + "json", + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue.", + ]); + + expect(output.exitCode).toBe(0); + const parsed = JSON.parse(output.stdout[0] ?? "{}"); + expect(parsed.total).toBe(15); + expect(parsed.breakdown.items[0]?.locale).toBe("fr"); + }); + test("rejects invalid detector mode values", () => { const result = spawnSync( process.execPath, diff --git a/test/word-counter.test.ts b/test/word-counter.test.ts index e552662..6ad272d 100644 --- a/test/word-counter.test.ts +++ b/test/word-counter.test.ts @@ -149,6 +149,65 @@ describe("detector entrypoint", () => { expect(result.breakdown.items[0]?.locale).toBe("und-Latn"); }); + test("does not let latinTagHint suppress detector-derived locales in wasm mode", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const sample = + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue."; + const baseline = await wordCounterWithDetector(sample, { detector: "wasm" }); + const hinted = await wordCounterWithDetector(sample, { + detector: "wasm", + latinTagHint: "en", + }); + + expect(baseline.breakdown.mode).toBe("chunk"); + expect(hinted.breakdown.mode).toBe("chunk"); + expect(baseline.breakdown.items[0]?.locale).toBe("fr"); + expect(hinted.breakdown.items[0]?.locale).toBe("fr"); + expect(hinted.total).toBe(baseline.total); + }); + + test("reapplies latinTagHint after unresolved wasm detector evaluation", async () => { + const chunks = await segmentTextByLocaleWithDetector("Hello world", { + detector: "wasm", + latinTagHint: "en", + }); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["en"]); + }); + + test("preserves explicit Latin hint precedence after unresolved wasm detector evaluation", async () => { + const chunks = await segmentTextByLocaleWithDetector("Hello world", { + detector: "wasm", + latinLocaleHint: "en", + latinLanguageHint: "fr", + latinTagHint: "de", + }); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["de"]); + }); + + test("reapplies built-in Latin hint rules after unresolved wasm detector evaluation", async () => { + const chunks = await segmentTextByLocaleWithDetector("el niño", { + detector: "wasm", + }); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["und-Latn", "es"]); + expect(chunks.map((chunk) => chunk.text)).toEqual(["el ", "niño"]); + }); + + test("reapplies custom Latin hint rules after unresolved wasm detector evaluation", async () => { + const chunks = await segmentTextByLocaleWithDetector("Zażółć gęślą jaźń", { + detector: "wasm", + latinHintRules: [{ tag: "pl", pattern: "[ąćęłńóśźżĄĆĘŁŃÓŚŹŻ]" }], + useDefaultLatinHints: false, + }); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["pl"]); + }); + test("segments text through detector entrypoint", async () => { const chunks = await segmentTextByLocaleWithDetector("Hello 世界", { detector: "regex" }); From 12251e33b90bb7e7314ef61cb85c9e3634f67aa1 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 10:47:52 +0800 Subject: [PATCH 06/23] docs: clarify behavior of Latin hint rules in WASM detector mode --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c72cec..91b09b7 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ Detector mode notes: - `--detector wasm` only runs for ambiguous `und-Latn` and `und-Hani` chunks. - `--detector regex` keeps the original script/regex chunk-first detection path. - `--detector wasm` uses a detector-oriented ambiguous-window scoring pass before accepted tags are projected back onto the counting chunks. +- In `--detector wasm` mode, Latin hint rules and explicit Latin hint flags are deferred until after detector evaluation and only relabel unresolved `und-Latn` output. - Very short chunks stay on the original `und-*` fallback. - Low-confidence or unsupported detector results fall back to `und-*`. @@ -311,7 +312,7 @@ Skip details stay debug-gated and can be suppressed with `--quiet-skips`. - Adjacent characters that share the same locale tag are grouped into a chunk. - Each chunk is counted with `Intl.Segmenter` at `granularity: "word"`, caching segmenters to avoid re-instantiation. - Per-locale counts are summed into an overall total and printed to stdout. -- With `--detector wasm`, ambiguous `und-Latn` and `und-Hani` chunks can be relabeled through the optional WASM detector before counting. +- With `--detector wasm`, ambiguous `und-Latn` and `und-Hani` chunks can be relabeled through the optional WASM detector before counting; unresolved `und-Latn` chunks then fall back to the existing Latin hint rules and explicit Latin hint precedence. ## Locale vs Language Code @@ -696,6 +697,7 @@ Example JSON (trimmed): - Detection is regex/script based, not statistical language-ID. - Ambiguous Latin defaults to `und-Latn`; Han fallback defaults to `und-Hani`. - `--detector wasm` is optional and conservative; it only runs for ambiguous chunks that meet minimum script-bearing length thresholds. +- In `--detector wasm` mode, ambiguous Latin stays on `und-Latn` for detector eligibility first, then built-in/custom Latin rules and explicit Latin hints are applied only if the detector leaves that chunk unresolved. - The current first WASM engine is `whatlang`, remapped into this package's public tags. - The npm package ships one portable WASM artifact; users do not install per-OS detector packages. - Use explicit tag and hint flags when you need deterministic tagging. From b160ec1703462e1260aac1e1a1c83f0e963d73bb Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 10:48:14 +0800 Subject: [PATCH 07/23] docs(detector): draft wasm latin quality guardrails --- ...-wasm-latin-detector-quality-guardrails.md | 89 +++++++++++++++++++ ...-latin-detector-quality-false-positives.md | 52 +++++++++++ 2 files changed, 141 insertions(+) create mode 100644 docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md create mode 100644 docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md diff --git a/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md b/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md new file mode 100644 index 0000000..d2da49b --- /dev/null +++ b/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md @@ -0,0 +1,89 @@ +--- +title: "WASM Latin detector quality guardrails" +created-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Reduce false-positive WASM language projections for ambiguous Latin technical text such as markdown, CLI documentation, and code-adjacent prose while keeping the Latin hint ordering fix intact. + +## Context + +After the WASM Latin hint ordering fix, detector-derived locales correctly reappear for ambiguous Latin text. However, noisy English technical text can still be promoted to incorrect Latin languages such as `fr` under the current Whatlang acceptance policy. + +## Scope + +- In scope: + - tighten WASM Latin detector acceptance so low-signal technical text is less likely to be projected to the wrong language + - add observability for detector decisions in tests or debug output + - add regression coverage for README-like technical English and similar noisy ambiguous Latin samples + - preserve the existing detector-first ordering fix in WASM mode +- Out of scope: + - replacing the WASM engine + - changing default `--detector regex` behavior + - broad language-detection redesign outside the WASM Latin route + +## Proposed Decisions + +- Keep the current detector routing shape, but make Latin acceptance more conservative. +- Evaluate candidate guardrails in this order: + - raise corroborated Latin acceptance thresholds + - require stronger reliability constraints for corroborated Latin acceptance + - add a low-signal/noisy-window rejection gate for markdown/CLI-heavy text + - prefer fallback to `und-Latn` when detector evidence is mixed +- Add detector-decision visibility so tuning does not depend on manual reverse-engineering from final locale output alone. + +## Phase Task Items + +### Phase 1 - Fixture and Observability + +- [ ] Add narrow regression fixtures for README-like English technical text that currently misclassifies as `fr`. +- [ ] Add a way to inspect detector decisions during tests or debug flows: + - raw detector result + - normalized detector result + - confidence + - reliability + - final acceptance reason + +### Phase 2 - Policy Tuning + +- [ ] Re-evaluate `LATIN_WASM_MIN_CONFIDENCE` and `LATIN_WASM_CORROBORATED_MIN_CONFIDENCE` against the new false-positive fixtures. +- [ ] Test whether corroborated Latin acceptance should require `reliable = true`. +- [ ] Test whether technical/noisy Latin windows should remain `und-Latn` unless confidence is materially stronger than today. + +### Phase 3 - Heuristic Guardrails + +- [ ] Evaluate adding a lightweight technical-noise guard before accepting a Latin detector result: + - markdown punctuation density + - command/flag density + - unusually short repeated tokens + - low alphabetic diversity or low stopword coherence +- [ ] Keep any added heuristic conservative and easy to explain. + +### Phase 4 - Regression Coverage and Documentation + +- [ ] Add regression tests proving English technical prose does not regress to `fr` under `--detector wasm`. +- [ ] Add tests proving true-positive longer non-English Latin samples still resolve when evidence remains strong. +- [ ] Update README or detector notes if acceptance policy semantics materially change. + +## Compatibility Gates + +- [ ] The completed WASM Latin hint ordering fix remains intact. +- [ ] `--detector regex` behavior remains unchanged. +- [ ] Unsupported or low-confidence Latin windows continue to fall back safely to `und-Latn`. +- [ ] Existing public CLI and library options remain unchanged unless a separate observability flag is explicitly planned. + +## Validation + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Research + +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` +- `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` +- `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md new file mode 100644 index 0000000..93318ca --- /dev/null +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -0,0 +1,52 @@ +--- +title: "wasm latin detector quality false positives" +created-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Document the follow-up quality issue where `--detector wasm` can still relabel obviously English README-style text as `fr` after the Latin hint ordering fix is applied. + +## Key Findings + +- The ordering fix and the detector-quality issue are separate: + - the ordering fix restores detector eligibility for ambiguous Latin in WASM mode + - the remaining problem is detector acceptance quality on noisy Latin windows +- Current Latin acceptance policy in `src/detector/wasm.ts` accepts: + - results that meet `LATIN_WASM_MIN_CONFIDENCE = 0.75` and `reliable = true` + - corroborated raw + normalized results when both remap to the same tag and confidence reaches `LATIN_WASM_CORROBORATED_MIN_CONFIDENCE = 0.7` +- Current sample normalization in `src/detector/policy.ts` preserves all Latin letters and reduces everything else to spaces. This keeps markdown-like command text eligible even when the remaining lexical signal is poor. +- The public Whatlang remap in `src/detector/whatlang-map.ts` is broad for Latin routes and does not add any project-level lexical sanity checks. +- A direct raw detector sample built from README/CLI-style English tokens such as `cat how do you do grapheme aware character count ...` can return a French result from Whatlang with low confidence and `reliable = false`. +- The CLI result observed after the ordering fix is therefore consistent with: + - detector-first routing now working correctly + - the current acceptance policy still being too permissive for some noisy Latin windows + +## Implications or Recommendations + +- Do not roll back the Latin hint ordering fix. That fix is behaving correctly. +- Treat this as a detector-policy follow-up for WASM Latin quality. +- Most likely improvement areas are: + - tighten Latin acceptance thresholds, especially the corroborated path + - reduce eligibility or acceptance for markdown/CLI/docs-noise windows + - add detector-debug visibility for raw tag, confidence, reliability, normalized sample, and acceptance reason + - add regression fixtures for English technical prose that must stay `und-Latn` or resolve to `en`, not `fr` +- Prefer conservative fallback to `und-Latn` over confident-but-wrong language projection for noisy technical text. + +## Open Questions + +- Should README-like technical English prefer staying on `und-Latn` unless confidence is very strong, even if that reduces some true-positive non-English upgrades? +- Should corroborated acceptance for Latin require `reliable = true`, not just matching raw/normalized tags? +- Should the Latin route add a token-quality gate before the detector result can be accepted? + +## Related Plans + +- `docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md` +- `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` + +## Related Research + +- `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` +- `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` From 34b5040bad327dddbb34e5920d20dc723fc44d37 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 11:30:24 +0800 Subject: [PATCH 08/23] docs: remove outdated WASM Latin detector quality guardrails plan and add global debug observability model documentation --- ...-wasm-latin-detector-quality-guardrails.md | 89 -------- ...-03-24-global-debug-observability-model.md | 194 ++++++++++++++++++ ...-latin-detector-quality-false-positives.md | 16 +- 3 files changed, 208 insertions(+), 91 deletions(-) delete mode 100644 docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md create mode 100644 docs/researches/research-2026-03-24-global-debug-observability-model.md diff --git a/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md b/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md deleted file mode 100644 index d2da49b..0000000 --- a/docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: "WASM Latin detector quality guardrails" -created-date: 2026-03-24 -status: draft -agent: Codex ---- - -## Goal - -Reduce false-positive WASM language projections for ambiguous Latin technical text such as markdown, CLI documentation, and code-adjacent prose while keeping the Latin hint ordering fix intact. - -## Context - -After the WASM Latin hint ordering fix, detector-derived locales correctly reappear for ambiguous Latin text. However, noisy English technical text can still be promoted to incorrect Latin languages such as `fr` under the current Whatlang acceptance policy. - -## Scope - -- In scope: - - tighten WASM Latin detector acceptance so low-signal technical text is less likely to be projected to the wrong language - - add observability for detector decisions in tests or debug output - - add regression coverage for README-like technical English and similar noisy ambiguous Latin samples - - preserve the existing detector-first ordering fix in WASM mode -- Out of scope: - - replacing the WASM engine - - changing default `--detector regex` behavior - - broad language-detection redesign outside the WASM Latin route - -## Proposed Decisions - -- Keep the current detector routing shape, but make Latin acceptance more conservative. -- Evaluate candidate guardrails in this order: - - raise corroborated Latin acceptance thresholds - - require stronger reliability constraints for corroborated Latin acceptance - - add a low-signal/noisy-window rejection gate for markdown/CLI-heavy text - - prefer fallback to `und-Latn` when detector evidence is mixed -- Add detector-decision visibility so tuning does not depend on manual reverse-engineering from final locale output alone. - -## Phase Task Items - -### Phase 1 - Fixture and Observability - -- [ ] Add narrow regression fixtures for README-like English technical text that currently misclassifies as `fr`. -- [ ] Add a way to inspect detector decisions during tests or debug flows: - - raw detector result - - normalized detector result - - confidence - - reliability - - final acceptance reason - -### Phase 2 - Policy Tuning - -- [ ] Re-evaluate `LATIN_WASM_MIN_CONFIDENCE` and `LATIN_WASM_CORROBORATED_MIN_CONFIDENCE` against the new false-positive fixtures. -- [ ] Test whether corroborated Latin acceptance should require `reliable = true`. -- [ ] Test whether technical/noisy Latin windows should remain `und-Latn` unless confidence is materially stronger than today. - -### Phase 3 - Heuristic Guardrails - -- [ ] Evaluate adding a lightweight technical-noise guard before accepting a Latin detector result: - - markdown punctuation density - - command/flag density - - unusually short repeated tokens - - low alphabetic diversity or low stopword coherence -- [ ] Keep any added heuristic conservative and easy to explain. - -### Phase 4 - Regression Coverage and Documentation - -- [ ] Add regression tests proving English technical prose does not regress to `fr` under `--detector wasm`. -- [ ] Add tests proving true-positive longer non-English Latin samples still resolve when evidence remains strong. -- [ ] Update README or detector notes if acceptance policy semantics materially change. - -## Compatibility Gates - -- [ ] The completed WASM Latin hint ordering fix remains intact. -- [ ] `--detector regex` behavior remains unchanged. -- [ ] Unsupported or low-confidence Latin windows continue to fall back safely to `und-Latn`. -- [ ] Existing public CLI and library options remain unchanged unless a separate observability flag is explicitly planned. - -## Validation - -- `bun test test/word-counter.test.ts` -- `bun test test/command.test.ts` -- `bun run type-check` -- `bun run build` - -## Related Research - -- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` -- `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` -- `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md new file mode 100644 index 0000000..bacfa9c --- /dev/null +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -0,0 +1,194 @@ +--- +title: "global debug observability model" +created-date: 2026-03-24 +modified-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Define a repository-wide observability model for `word-counter` so debug and diagnostics data can be added consistently across single-input runs, batch runs, detector workflows, and future operational surfaces without breaking the stable output contract. + +## Key Findings + +- The current debug system is event-stream based, not result-schema based: + - `src/cli/debug/channel.ts` emits structured JSON events + - events are routed to `stderr` or a `.jsonl` debug report file + - `--verbose` controls event volume through `compact` vs `verbose` +- The current debug channel is wired primarily into batch execution: + - `src/command.ts` creates the channel globally for CLI counting + - `src/cli/runtime/batch.ts` actively emits batch/path/progress-related events + - single-input counting does not currently emit parallel runtime debug events +- Current JSON result payloads are mostly result-oriented, but there is already one mixed behavior: + - per-file JSON can include `skipped` when debug skip diagnostics are enabled +- Existing docs already cover adjacent but narrower concerns: + - `docs/researches/research-2026-02-13-cli-progress-indicator.md` defines progress/debug separation for batch UX + - `docs/plans/plan-2026-02-16-debug-verbosity-and-report-file.md` defines compact/verbose debug routing and report-file behavior + - `docs/researches/research-2026-02-17-json-output-schema-contract.md` defines additive JSON contract thinking for result payloads + - `docs/researches/research-2026-03-13-doctor-command.md` defines a standalone diagnostics command + - `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` identifies detector-specific observability needs +- What is still missing is a global model that distinguishes: + - stable result metadata + - debug-only diagnostics + - runtime event streams + - topic/scope semantics shared across subsystems + +## Current Mechanics Snapshot + +### Debug Channel + +- `--debug` enables structured diagnostics. +- `--verbose` enables higher-volume events. +- `--debug-report [path]` writes JSONL diagnostics to a file. +- `--debug-report-tee` / `--debug-tee` mirror the file stream to `stderr`. + +### Output Behavior + +- Normal counting results still go to `stdout`. +- Debug events go to `stderr` unless redirected to a report file. +- Per-file JSON output may also include debug-gated `skipped` data today. + +### Event Shape Today + +- Current debug entries are minimally structured JSON: + - `event` + - arbitrary event-specific fields +- There is no common event envelope yet for: + - timestamp + - run identifier + - topic + - scope + - schema version + - severity + +## Design Problem + +The repository now has enough operational features that local one-off diagnostics decisions are starting to collide: + +- batch routing wants debug events +- detector quality work wants per-window metrics +- JSON output wants stable additive metadata +- doctor wants machine-readable host diagnostics + +Without a single model, each new feature risks inventing its own payload shape and routing rules. + +## Proposed Direction + +Use a three-layer observability model: + +### 1. Stable Result Metadata + +Use `meta` for small, additive, stable fields that are part of the normal result contract. + +Examples: + +- `meta.detector.mode` +- `meta.detector.engine` +- `meta.totalOf` +- `meta.totalOfOverride` + +Rules: + +- must be additive +- must be small and predictable +- safe for downstream parsers +- should not expose high-volume internal traces + +### 2. Debug-Gated Result Diagnostics + +Use a dedicated debug section in JSON output only when debug-gated behavior explicitly allows it. + +Examples: + +- `debug.detector` +- `debug.batch` +- `debug.skipped` + +Rules: + +- only present when debug gating is enabled +- explicitly documented as non-default diagnostics +- heavier and more operational than `meta` +- should not appear silently in normal JSON output + +### 3. Runtime Event Stream + +Keep JSONL event reports as the highest-detail runtime trace surface. + +Examples: + +- path resolution decisions +- batch stage timings +- detector window acceptance/rejection events +- fallback reasons + +Rules: + +- streaming-friendly +- topic-based +- suitable for postmortem/debug analysis +- not part of the stable result payload contract + +## Recommended Global Vocabulary + +- `meta`: stable result metadata +- `debug`: debug-gated result diagnostics +- `event stream`: runtime trace records +- `topic`: subsystem such as `path`, `batch`, `detector`, `runtime`, `doctor` +- `scope`: unit of observation such as `run`, `file`, `section`, `chunk`, `detector-window` +- `verbosity`: `compact` or `verbose` + +## Non-Overlap Boundaries + +This model should not replace or duplicate the following: + +- Batch progress UX: + - keep progress behavior in the existing progress docs and plans + - this doc only defines observability structure +- Doctor command: + - doctor remains a standalone host-capability command + - this doc only defines cross-cutting diagnostics conventions that doctor may reuse +- Detector quality tuning: + - detector policy changes remain in the WASM detector quality plan + - this doc only defines how detector diagnostics should be exposed consistently +- Existing JSON feature metadata: + - current `meta.totalOf` and related result metadata stay where they are + - this doc extends the contract shape rather than replacing it + +## Recommendations + +- Add a common debug event envelope before more subsystems add one-off event shapes. +- Extend debug coverage to single-input execution so the model is not batch-only. +- Move toward a consistent policy: + - `meta` for stable additive metadata + - `debug` for debug-gated result diagnostics + - JSONL for rich runtime traces +- Keep default JSON output result-oriented and conservative. +- Treat any debug data in result JSON as additive and explicitly gated. + +## Research Sequencing + +- Keep this work in research mode until the contract questions are answered: + - whether debug-gated JSON should hang off `--debug --format json` + - whether per-file `skipped` should remain as-is or migrate behind a general `debug.*` shape + - what minimum event envelope fields are mandatory for all runtime traces +- Once those questions are resolved, draft implementation plans as separate slices instead of one large umbrella plan: + - event-envelope normalization + - single-input debug parity + - detector observability adoption + - JSON result metadata additions +- Treat detector quality work as a consumer of this model, not the owner of it. + +## Open Questions + +- Should debug-gated JSON diagnostics be enabled by `--debug --format json`, or should some areas require additional topic-specific flags later? +- Should the event stream gain an explicit schema version now, before more event topics are added? +- Should per-file `skipped` remain where it is, or be normalized into a general debug-gated JSON section over time? + +## Related Research + +- `docs/researches/research-2026-02-13-cli-progress-indicator.md` +- `docs/researches/research-2026-02-17-json-output-schema-contract.md` +- `docs/researches/research-2026-03-13-doctor-command.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index 93318ca..b2d920c 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -1,6 +1,7 @@ --- title: "wasm latin detector quality false positives" created-date: 2026-03-24 +modified-date: 2026-03-24 status: draft agent: Codex --- @@ -31,10 +32,20 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o - Most likely improvement areas are: - tighten Latin acceptance thresholds, especially the corroborated path - reduce eligibility or acceptance for markdown/CLI/docs-noise windows - - add detector-debug visibility for raw tag, confidence, reliability, normalized sample, and acceptance reason - add regression fixtures for English technical prose that must stay `und-Latn` or resolve to `en`, not `fr` - Prefer conservative fallback to `und-Latn` over confident-but-wrong language projection for noisy technical text. +## Research Priorities + +- Build a narrow corpus of false-positive English technical fixtures before touching thresholds. +- Compare acceptance behavior across: + - current reliable-path thresholds + - corroborated-path thresholds + - stronger reliability requirements + - technical-noise rejection heuristics +- Use the global observability research to decide how detector decision data should be surfaced during this investigation. +- Defer a dedicated implementation plan until the open questions below are answered well enough to choose one guardrail direction. + ## Open Questions - Should README-like technical English prefer staying on `und-Latn` unless confidence is very strong, even if that reduces some true-positive non-English upgrades? @@ -43,10 +54,11 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o ## Related Plans -- `docs/plans/plan-2026-03-24-wasm-latin-detector-quality-guardrails.md` - `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` ## Related Research - `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` - `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` +- `docs/researches/research-2026-02-17-json-output-schema-contract.md` +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` From e237884a1ab4b76394f506e542f85eab7dd4b303 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 11:37:10 +0800 Subject: [PATCH 09/23] docs: update status to completed and add resolution notes for WASM detector and Latin tag interaction --- ...n-2026-03-24-wasm-mode-latin-hint-ordering.md | 4 ++++ ...arch-2026-03-24-wasm-latin-tag-interaction.md | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md index db63cea..a4dbbbc 100644 --- a/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md +++ b/docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md @@ -104,3 +104,7 @@ Issue linkage: - `docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md` - `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md` diff --git a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md index 577f962..ecb87e9 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md +++ b/docs/researches/research-2026-03-24-wasm-latin-tag-interaction.md @@ -2,7 +2,7 @@ title: "wasm detector and latin-tag interaction" created-date: 2026-03-24 modified-date: 2026-03-24 -status: draft +status: completed agent: codex --- @@ -13,8 +13,8 @@ Document the unexpected interaction where `--detector wasm` plus `--latin-tag `latinLanguageHint` > `latinLocaleHint` - default `und-Latn` fallback when no rule or explicit hint applies +## Resolution Notes + +- The ordering issue described here has been fixed. +- `--detector wasm` now keeps ambiguous Latin detector-eligible during the initial segmentation pass and reapplies Latin fallback hints only after detector evaluation for unresolved `und-Latn` output. +- Follow-up detector quality concerns, such as false-positive `fr` assignment on noisy English technical text, are tracked separately and are not part of this resolved ordering bug. + ## Related Plans - `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-wasm-mode-latin-hint-ordering-implementation.md` From 23781d940bab3fe3d2c4b04dd15808fd0e7c675b Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 12:35:14 +0800 Subject: [PATCH 10/23] docs: update recommendations for handling detector quality issues and open questions in WASM Latin detection --- ...-03-24-global-debug-observability-model.md | 36 ++++++++++++++++--- ...-latin-detector-quality-false-positives.md | 29 ++++++++++++--- 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md index bacfa9c..6bc8b96 100644 --- a/docs/researches/research-2026-03-24-global-debug-observability-model.md +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -180,11 +180,37 @@ This model should not replace or duplicate the following: - JSON result metadata additions - Treat detector quality work as a consumer of this model, not the owner of it. -## Open Questions - -- Should debug-gated JSON diagnostics be enabled by `--debug --format json`, or should some areas require additional topic-specific flags later? -- Should the event stream gain an explicit schema version now, before more event topics are added? -- Should per-file `skipped` remain where it is, or be normalized into a general debug-gated JSON section over time? +## Recommended Resolution of Open Questions + +- Enable debug-gated JSON diagnostics through `--debug --format json` in the first contract version. + - Keep the gate global and predictable at first. + - Defer topic-specific flags unless a later subsystem proves that result JSON volume is too large for one shared debug gate. + - Keep richer topic-level detail in the JSONL event stream rather than multiplying JSON result flags early. +- Add explicit event-stream schema versioning now, together with the first shared event envelope. + - Recommended minimum shared envelope fields: + - `schemaVersion` + - `timestamp` + - `runId` + - `topic` + - `scope` + - `event` + - Optional envelope fields can then grow from a stable base: + - `severity` + - `verbosity` + - Adding schema versioning before more topics land is lower-risk than retrofitting many one-off event shapes later. +- Treat top-level per-file `skipped` as a compatibility legacy shape, not the long-term debug JSON model. + - Future normalized debug-gated JSON should hang off `debug.*`. + - Recommended future target shape is `debug.skipped` or `debug.batch.skipped`. + - A later implementation plan can decide whether the migration uses dual-emission, deprecation notes, or a major contract transition. + +## Future Schema Documentation Notes + +- Future implementation planning should include follow-up schema documentation work under `docs/schemas/`. +- Recommended documentation split: + - one schema doc for JSONL runtime event-stream records + - one schema surface for debug-gated JSON result diagnostics +- The JSON result diagnostics contract can either extend `docs/schemas/json-output-contract.md` or live in a companion schema doc if the material becomes too large. +- Each future schema doc should include a short `Version History` section that records contract changes by version/date and compatibility notes, instead of relying only on front-matter `modified-date`. ## Related Research diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index b2d920c..118e34a 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -46,11 +46,32 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o - Use the global observability research to decide how detector decision data should be surfaced during this investigation. - Defer a dedicated implementation plan until the open questions below are answered well enough to choose one guardrail direction. -## Open Questions +## Recommended Resolution of Open Questions -- Should README-like technical English prefer staying on `und-Latn` unless confidence is very strong, even if that reduces some true-positive non-English upgrades? -- Should corroborated acceptance for Latin require `reliable = true`, not just matching raw/normalized tags? -- Should the Latin route add a token-quality gate before the detector result can be accepted? +- README-like technical English should prefer staying on `und-Latn` unless the Latin window clears a stronger acceptance policy than the current detector-only thresholds. + - Confidence alone is not enough. + - Local detector checks show command-heavy English token lists can still receive high-confidence, `reliable = true` false-positive French labels from Whatlang. + - The safer default is to preserve `und-Latn` when the window looks more like technical noise than prose. +- Latin corroborated acceptance should stop upgrading `und-Latn` when both corroborating samples are unreliable. + - Recommended rule: require at least one corroborating sample to report `reliable = true` before the corroborated path can accept a tag. + - Matching raw/normalized tags by itself is too weak for a conservative contract. +- Add a narrow Latin token-quality gate before final detector acceptance. + - This should be the primary follow-up recommendation, because threshold tuning and corroboration hardening alone do not address reliable false positives on command/list-like English windows. + - The gate should be lightweight and explicit: + - reject command/list-like technical windows back to `und-Latn` + - preserve clear prose-like windows for detector acceptance + - Validate the gate with a focused regression corpus: + - English README/CLI/docs-noise fixtures that must remain `und-Latn` or resolve to `en` + - known non-English Latin fixtures that should still upgrade correctly + +## Recommended Policy Direction + +- Keep the main reliable-path rule conservative and unchanged unless corpus results show a clear need for threshold retuning. +- Harden the corroborated path first, because it currently creates an avoidable low-signal acceptance route. +- Add the Latin token-quality gate before attempting broad threshold increases. +- Accept that some borderline markdown/frontmatter-like Latin windows may fall back to `und-Latn` under the tighter policy. + - That tradeoff is preferable to emitting confident-but-wrong language tags for technical English. + - Users still retain explicit hint flags when deterministic relabeling is required. ## Related Plans From fe36f507ff3bdc26ddd5184b7ecd4b899118e259 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 12:42:56 +0800 Subject: [PATCH 11/23] docs: update status to in-progress for global debug observability model and WASM Latin detector quality research --- ...ug-observability-and-wasm-latin-quality.md | 130 ++++++++++++++++++ ...-03-24-global-debug-observability-model.md | 20 +-- ...-latin-detector-quality-false-positives.md | 5 +- 3 files changed, 143 insertions(+), 12 deletions(-) create mode 100644 docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md diff --git a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md new file mode 100644 index 0000000..3f16461 --- /dev/null +++ b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md @@ -0,0 +1,130 @@ +--- +title: "debug observability and WASM Latin quality" +created-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Implement the cross-cutting debug observability contract and the WASM Latin false-positive guardrails in one coordinated plan so detector diagnostics can use the same debug model while reducing wrong language upgrades on noisy technical English. + +## Context + +- The debug research now settles the contract direction well enough to implement: + - debug-gated JSON diagnostics should use `--debug --format json` as the first shared gate + - runtime JSONL events should move to a shared, versioned envelope + - top-level per-file `skipped` should be treated as a compatibility legacy shape while normalized debug diagnostics move toward `debug.*` +- The WASM Latin quality research now settles the detector direction well enough to implement: + - keep fallback conservative and prefer `und-Latn` over confident-but-wrong labels for technical-noise windows + - require at least one `reliable = true` sample for Latin corroborated acceptance + - add a Latin token-quality gate because threshold tuning alone does not address reliable false positives on command/list-like English windows +- These two tracks should be implemented together because detector-quality investigation needs better structured observability, and the new observability contract should include detector decision surfaces from the start. + +## Scope + +- In scope: + - add a shared debug event envelope with schema versioning + - extend debug coverage to single-input execution and detector workflows + - add debug-gated JSON result diagnostics using the `debug.*` model + - preserve compatibility for existing per-file `skipped` consumers while introducing the normalized debug placement + - harden WASM Latin corroborated acceptance and add a Latin token-quality gate + - add regression coverage and schema/docs updates for both tracks +- Out of scope: + - replacing default regex detection + - broad retuning of the primary reliable-path Latin confidence threshold unless regression results prove it is necessary + - redesigning progress UX or doctor command output beyond adopting shared observability conventions where already in scope + - removing the legacy top-level `skipped` field in this plan + +## Decisions Settled for This Plan + +- `--debug --format json` is the first shared gate for debug-gated JSON result diagnostics. +- Runtime debug events gain a shared envelope now with these minimum fields: + - `schemaVersion` + - `timestamp` + - `runId` + - `topic` + - `scope` + - `event` +- Optional shared envelope fields may include: + - `severity` + - `verbosity` +- Per-file top-level `skipped` remains temporarily for compatibility, but normalized debug diagnostics should be added under `debug.*` in the same phase. +- The normalized per-file skipped-path placement for this plan is `debug.skipped`. +- Latin corroborated acceptance must require at least one corroborating sample with `reliable = true`. +- The initial detector-quality fix should prioritize a Latin token-quality gate before any broad threshold increase. +- If the tighter policy causes some borderline markdown/frontmatter-like Latin windows to remain `und-Latn`, that tradeoff is acceptable in this phase. + +## Phase Task Items + +### Phase 1 - Contract Scaffolding and Envelope Foundation + +- [ ] Add a shared debug event envelope abstraction in the debug channel and route existing event emission through it. +- [ ] Introduce stable generation for `runId` and event timestamps for every debug-enabled CLI run. +- [ ] Normalize topic/scope naming for current batch/path events so future detector and single-input events can reuse the same vocabulary. +- [ ] Preserve current compact vs verbose filtering behavior while moving event shape generation behind the shared envelope. +- [ ] Add regression coverage for envelope presence, schema versioning, and routing to terminal vs debug report sinks. + +### Phase 2 - Single-Input Debug Parity and JSON Debug Surfaces + +- [ ] Extend debug instrumentation into single-input counting paths so non-batch runs emit runtime diagnostics under the same model. +- [ ] Add debug-gated JSON result diagnostics for `--debug --format json`. +- [ ] Introduce normalized `debug.*` payload placement for result diagnostics while keeping default JSON output result-oriented when debug is not enabled. +- [ ] For per-file JSON, add `debug.skipped` for skipped-path diagnostics and retain top-level `skipped` as a compatibility legacy shape in this phase. +- [ ] Add tests covering: + - single-input debug event emission + - debug JSON gating behavior + - compatibility behavior for per-file `skipped` + +### Phase 3 - Detector Observability Adoption + +- [ ] Add detector-focused debug events that expose raw decision stages without making normal JSON output noisy. +- [ ] Instrument detector window routing, normalized-sample use, acceptance path, fallback reason, and final locale outcome under the shared event envelope. +- [ ] Add compact detector summary events plus verbose per-window events so false-positive investigation can use the same contract as batch/path diagnostics. +- [ ] Add debug-gated JSON detector diagnostics only for small, additive summaries that are useful to downstream consumers. + +### Phase 4 - WASM Latin Quality Guardrails + +- [ ] Build a focused regression corpus for noisy English README/CLI/docs windows and known non-English Latin fixtures. +- [ ] Change Latin corroborated acceptance so matching raw/normalized remaps are not enough on their own; at least one corroborating sample must be `reliable = true`. +- [ ] Add a Latin token-quality gate ahead of final detector acceptance for ambiguous Latin windows. +- [ ] Keep the main reliable-path threshold unchanged initially unless the new corpus shows a clear need for targeted retuning. +- [ ] Add tests proving: + - noisy English technical samples stay `und-Latn` or resolve to `en`, not `fr` + - valid non-English Latin prose can still upgrade correctly + - borderline markdown/frontmatter-like samples behave according to the new conservative policy + +### Phase 5 - Schema Docs, CLI Docs, and Closure + +- [ ] Add `docs/schemas/debug-event-stream-contract.md` for the versioned debug event stream. +- [ ] Extend `docs/schemas/json-output-contract.md` for the debug-gated JSON result diagnostics contract. +- [ ] Add `Version History` sections to the new or updated schema docs so contract evolution is recorded explicitly. +- [ ] Update README guidance where debug JSON, debug reports, and detector behavior need user-facing clarification. +- [ ] Add completion job records under `docs/plans/jobs/` once implementation phases land. + +## Compatibility Gates + +- [ ] Default non-debug output remains unchanged. +- [ ] Existing `--debug`, `--verbose`, `--debug-report`, and `--debug-report-tee` routing behavior remains intact aside from the new shared event envelope shape. +- [ ] Existing per-file top-level `skipped` consumers continue to work during this phase. +- [ ] `--detector wasm` keeps the already-fixed detector-first Latin hint ordering behavior. +- [ ] The new Latin guardrails bias toward fallback to `und-Latn` rather than broadening forced language upgrades. + +## Validation + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Research + +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` +- `docs/researches/research-2026-02-17-json-output-schema-contract.md` +- `docs/researches/research-2026-02-18-wasm-language-detector-spike.md` + +## Related Plans + +- `docs/plans/plan-2026-02-16-debug-verbosity-and-report-file.md` +- `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md index 6bc8b96..953aaa6 100644 --- a/docs/researches/research-2026-03-24-global-debug-observability-model.md +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -2,7 +2,7 @@ title: "global debug observability model" created-date: 2026-03-24 modified-date: 2026-03-24 -status: draft +status: in-progress agent: Codex --- @@ -169,11 +169,8 @@ This model should not replace or duplicate the following: ## Research Sequencing -- Keep this work in research mode until the contract questions are answered: - - whether debug-gated JSON should hang off `--debug --format json` - - whether per-file `skipped` should remain as-is or migrate behind a general `debug.*` shape - - what minimum event envelope fields are mandatory for all runtime traces -- Once those questions are resolved, draft implementation plans as separate slices instead of one large umbrella plan: +- The contract questions are now resolved well enough to move this work into implementation planning. +- Once those questions are resolved, the implementation plan should still keep the work sliced into clear phases: - event-envelope normalization - single-input debug parity - detector observability adoption @@ -200,18 +197,21 @@ This model should not replace or duplicate the following: - Adding schema versioning before more topics land is lower-risk than retrofitting many one-off event shapes later. - Treat top-level per-file `skipped` as a compatibility legacy shape, not the long-term debug JSON model. - Future normalized debug-gated JSON should hang off `debug.*`. - - Recommended future target shape is `debug.skipped` or `debug.batch.skipped`. + - Recommended future target shape is `debug.skipped`. - A later implementation plan can decide whether the migration uses dual-emission, deprecation notes, or a major contract transition. ## Future Schema Documentation Notes - Future implementation planning should include follow-up schema documentation work under `docs/schemas/`. - Recommended documentation split: - - one schema doc for JSONL runtime event-stream records - - one schema surface for debug-gated JSON result diagnostics -- The JSON result diagnostics contract can either extend `docs/schemas/json-output-contract.md` or live in a companion schema doc if the material becomes too large. + - one dedicated schema doc for JSONL runtime event-stream records: `docs/schemas/debug-event-stream-contract.md` + - extend `docs/schemas/json-output-contract.md` for debug-gated JSON result diagnostics - Each future schema doc should include a short `Version History` section that records contract changes by version/date and compatibility notes, instead of relying only on front-matter `modified-date`. +## Related Plans + +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` + ## Related Research - `docs/researches/research-2026-02-13-cli-progress-indicator.md` diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index 118e34a..78be338 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -2,7 +2,7 @@ title: "wasm latin detector quality false positives" created-date: 2026-03-24 modified-date: 2026-03-24 -status: draft +status: in-progress agent: Codex --- @@ -44,7 +44,7 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o - stronger reliability requirements - technical-noise rejection heuristics - Use the global observability research to decide how detector decision data should be surfaced during this investigation. -- Defer a dedicated implementation plan until the open questions below are answered well enough to choose one guardrail direction. +- The follow-up implementation can now proceed through the combined phased plan in `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md`. ## Recommended Resolution of Open Questions @@ -76,6 +76,7 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o ## Related Plans - `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` ## Related Research From 42294eea601633b9d75c2a7541d95c52ed01eb4f Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 12:58:12 +0800 Subject: [PATCH 12/23] docs(release): close release workflow consolidation plan --- ...026-03-24-release-workflow-plan-closure.md | 34 +++++++++++++++++++ ...26-03-24-release-workflow-consolidation.md | 26 +++++++++----- 2 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-release-workflow-plan-closure.md diff --git a/docs/plans/jobs/2026-03-24-release-workflow-plan-closure.md b/docs/plans/jobs/2026-03-24-release-workflow-plan-closure.md new file mode 100644 index 0000000..f56cc38 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-release-workflow-plan-closure.md @@ -0,0 +1,34 @@ +--- +title: "release workflow plan closure" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Confirm whether `docs/plans/plan-2026-03-24-release-workflow-consolidation.md` still had unresolved work after the `v0.1.5-canary.2` integration point and update the plan to match the repository state. + +## Findings + +- Tag `v0.1.5-canary.2` points to merge commit `fde1039` on 2026-03-24. +- That merge already included the consolidation and follow-up workflow changes: + - `dd0274e` added Rust caching and package-content verification. + - `37084fe` consolidated publish workflows into `release.yml` and removed the duplicated publish workflow files. + - `04e05ca` fixed CI type-check behavior for Bun tests. + - `5052a8c` reduced CI triggering to pull requests only. +- The current workflow set contains only `.github/workflows/ci.yml` and `.github/workflows/release.yml`. +- `.github/workflows/release.yml` still supports `workflow_dispatch` inputs for `tag` and `shallow_since`, verifies package contents in `prepare`, uploads `release-package-${tag}`, and has both publish jobs consume that artifact. +- `scripts/verify-package-contents.mjs` explicitly requires: + - `dist/wasm-language-detector/language_detector.js` + - `dist/wasm-language-detector/language_detector_bg.wasm` + +## What Changed + +- Marked `docs/plans/plan-2026-03-24-release-workflow-consolidation.md` as completed. +- Replaced the stale remaining rollout items with completed confirmation items aligned with the workflow state that shipped around `v0.1.5-canary.2`. +- Updated the plan text so its CI trigger description matches the later pull-request-only cleanup. + +## Related Plans + +- `docs/plans/plan-2026-03-24-release-workflow-consolidation.md` diff --git a/docs/plans/plan-2026-03-24-release-workflow-consolidation.md b/docs/plans/plan-2026-03-24-release-workflow-consolidation.md index 9deac95..7734c3b 100644 --- a/docs/plans/plan-2026-03-24-release-workflow-consolidation.md +++ b/docs/plans/plan-2026-03-24-release-workflow-consolidation.md @@ -2,7 +2,7 @@ title: "Release workflow consolidation and artifact reuse" created-date: 2026-03-24 modified-date: 2026-03-24 -status: active +status: completed agent: Codex --- @@ -42,7 +42,7 @@ Reduce duplicated Rust/WASM build work across release and publish automation by ## Recommended Direction -- Keep `.github/workflows/ci.yml` as the validation workflow for pull requests and selected push branches. +- Keep `.github/workflows/ci.yml` as the validation workflow for pull requests. - Turn `.github/workflows/release.yml` into the single tag/manual release orchestrator. - Build once inside `release.yml`, upload one verified release artifact, and let both publish jobs consume it in the same workflow run. - Remove `.github/workflows/publish-npm-packages.yml` and `.github/workflows/publish-github-packages.yml` after the consolidated release flow is proven. @@ -53,11 +53,11 @@ Reduce duplicated Rust/WASM build work across release and publish automation by - Trigger on: - `pull_request` - - selected `push` branches such as `main`, `dev*`, `canary*`, `alpha*`, and `beta*` - Purpose: - validate type-check, build, tests, and packaged contents - do not publish - do not act as the source of release artifacts for later workflows + - avoid duplicate branch validation runs once a pull request is open ### Release Workflow @@ -131,12 +131,22 @@ Reduce duplicated Rust/WASM build work across release and publish automation by ### Phase 4 - Validation and Rollout -- [ ] Validate that stable and prerelease tags still route to the correct publish behavior. -- [ ] Validate that the built WASM runtime is present in the downloaded release artifact and in final published package contents. -- [ ] Validate that manual `workflow_dispatch` still supports explicit `tag` and optional `shallow_since`. -- [ ] Validate rerun behavior for failed publish jobs without requiring a second full build unless the source ref changed. +- [x] Confirm stable and prerelease tag routing through `notes`, `publish_npm`, and `publish_github_packages`. +- [x] Confirm package-content verification covers the staged WASM runtime files before artifact upload and downstream publish. +- [x] Confirm manual `workflow_dispatch` still supports explicit `tag` and optional `shallow_since`. +- [x] Confirm the consolidated workflow keeps build preparation isolated in `prepare` and artifact reuse isolated in downstream publish jobs. - [x] Add or update documentation for the new workflow responsibilities and trigger model. +## Completion Notes + +- The consolidation landed before tag `v0.1.5-canary.2` via the release-workflow job and the phase-7 CI/package-verification follow-up. +- The current workflow set contains only: + - `.github/workflows/ci.yml` + - `.github/workflows/release.yml` +- `release.yml` still exposes `workflow_dispatch` inputs for `tag` and `shallow_since`, verifies package contents in `prepare`, uploads `release-package-${tag}`, and has both publish jobs consume that artifact. +- `scripts/verify-package-contents.mjs` explicitly requires the staged WASM runtime files, so the release artifact and downstream publish surface keep that package content gate. +- Follow-up commits after `v0.1.5-canary.2` adjusted CI trigger scope and Bun test typing only; they did not leave release-workflow consolidation work outstanding. + ## Design Notes - Prefer same-workflow artifact reuse over `workflow_run` chaining. @@ -151,7 +161,7 @@ Reduce duplicated Rust/WASM build work across release and publish automation by - A tag or manual release run builds publishable artifacts exactly once. - npm and GitHub Packages publishing reuse the same prepared build output from the same workflow run. - Release notes and publish gates remain consistent with the current branch and prerelease policy. -- `ci.yml` continues to provide non-release validation without becoming part of the release artifact chain. +- `ci.yml` continues to provide pull-request validation without becoming part of the release artifact chain. - The old duplicate publish workflows can be removed without losing current behavior. ## Related Plans From 00b0c215f78a62894cf1c0cb7c031674e769c179 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 16:09:16 +0800 Subject: [PATCH 13/23] docs: update debug observability and WASM Latin quality plans with new filename formats and regression coverage details --- ...ug-observability-and-wasm-latin-quality.md | 8 +- ...-03-24-global-debug-observability-model.md | 153 ++++++++++++++++++ ...-latin-detector-quality-false-positives.md | 88 ++++++++++ 3 files changed, 248 insertions(+), 1 deletion(-) diff --git a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md index 3f16461..a250130 100644 --- a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md +++ b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md @@ -51,6 +51,9 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals - `verbosity` - Per-file top-level `skipped` remains temporarily for compatibility, but normalized debug diagnostics should be added under `debug.*` in the same phase. - The normalized per-file skipped-path placement for this plan is `debug.skipped`. +- The selected first-pass `runId` format is `wc-debug--`. +- The selected default debug-report filename format is `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl`. + - This replaces the previous local-time default filename format. - Latin corroborated acceptance must require at least one corroborating sample with `reliable = true`. - The initial detector-quality fix should prioritize a Latin token-quality gate before any broad threshold increase. - If the tighter policy causes some borderline markdown/frontmatter-like Latin windows to remain `und-Latn`, that tradeoff is acceptable in this phase. @@ -64,6 +67,7 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals - [ ] Normalize topic/scope naming for current batch/path events so future detector and single-input events can reuse the same vocabulary. - [ ] Preserve current compact vs verbose filtering behavior while moving event shape generation behind the shared envelope. - [ ] Add regression coverage for envelope presence, schema versioning, and routing to terminal vs debug report sinks. +- [ ] Add regression coverage for `runId` presence/stability and the new UTC default debug-report filename contract. ### Phase 2 - Single-Input Debug Parity and JSON Debug Surfaces @@ -98,14 +102,16 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals - [ ] Add `docs/schemas/debug-event-stream-contract.md` for the versioned debug event stream. - [ ] Extend `docs/schemas/json-output-contract.md` for the debug-gated JSON result diagnostics contract. -- [ ] Add `Version History` sections to the new or updated schema docs so contract evolution is recorded explicitly. +- [ ] Add `Version History` sections to the new or updated schema docs so contract evolution is recorded explicitly by git tag or release tag. - [ ] Update README guidance where debug JSON, debug reports, and detector behavior need user-facing clarification. +- [ ] Document the default debug-report filename change as a compatibility note for users and automation consumers. - [ ] Add completion job records under `docs/plans/jobs/` once implementation phases land. ## Compatibility Gates - [ ] Default non-debug output remains unchanged. - [ ] Existing `--debug`, `--verbose`, `--debug-report`, and `--debug-report-tee` routing behavior remains intact aside from the new shared event envelope shape. +- [ ] The default autogenerated debug-report filename change is treated as a compatibility-impacting change and documented explicitly. - [ ] Existing per-file top-level `skipped` consumers continue to work during this phase. - [ ] `--detector wasm` keeps the already-fixed detector-first Latin hint ordering behavior. - [ ] The new Latin guardrails bias toward fallback to `und-Latn` rather than broadening forced language upgrades. diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md index 953aaa6..d39bce8 100644 --- a/docs/researches/research-2026-03-24-global-debug-observability-model.md +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -62,6 +62,138 @@ Define a repository-wide observability model for `word-counter` so debug and dia - schema version - severity +## Current Contract Mocks + +### Current Runtime Event JSONL Mock + +Current report lines are flat JSON objects with no shared envelope: + +```json +{"event":"batch.resolve.start","inputs":2,"pathMode":"auto","recursive":false} +{"event":"path.resolve.root.expand","root":"","recursive":false,"regex":null} +{"event":"batch.stage.timing","stage":"resolve","elapsedMs":4} +``` + +Properties of the current shape: + +- keeps event-specific fields top-level +- works well for ad hoc `jq`/grep-style inspection +- does not provide a per-run key for correlating lines across topics +- does not distinguish envelope fields from event payload fields + +### Current Per-File JSON Debug Mock + +Current per-file JSON uses a legacy top-level `skipped` field when debug-gated skip diagnostics are enabled: + +```json +{ + "scope": "per-file", + "files": [ + { "path": "", "result": { "total": 2 } } + ], + "skipped": [ + { "path": "", "reason": "extension excluded" } + ], + "aggregate": { "total": 2 } +} +``` + +Properties of the current shape: + +- keeps normal result data easy to read +- mixes debug-only diagnostics into the top-level result contract +- does not leave room for multiple debug topics without accumulating more top-level fields + +## Candidate Contract Mocks + +### Candidate A: Flat Envelope, Preserve Current Event Names + +This is the lowest-churn event-stream migration path. + +```json +{"schemaVersion":1,"timestamp":"2026-03-24T00:00:00.000Z","runId":"wc-debug-1774330341123-4242","topic":"batch","scope":"run","event":"batch.resolve.start","verbosity":"compact","inputs":2,"pathMode":"auto","recursive":false} +{"schemaVersion":1,"timestamp":"2026-03-24T00:00:00.004Z","runId":"wc-debug-1774330341123-4242","topic":"path","scope":"run","event":"path.resolve.root.expand","verbosity":"compact","root":"","recursive":false,"regex":null} +{"schemaVersion":1,"timestamp":"2026-03-24T00:00:00.008Z","runId":"wc-debug-1774330341123-4242","topic":"batch","scope":"run","event":"batch.stage.timing","verbosity":"compact","stage":"resolve","elapsedMs":4} +``` + +Benefits: + +- keeps `.jsonl` and line-oriented workflow unchanged +- preserves current `event` values for existing human workflows +- keeps event-specific fields top-level for ergonomic filtering +- allows envelope growth without redesigning all current event producers + +Risks: + +- `topic` partly duplicates namespacing already embedded in `event` +- future field collisions remain possible because payload fields stay flat + +### Candidate B: Envelope Plus Nested Payload + +This is cleaner structurally, but more disruptive for current usage. + +```json +{"schemaVersion":1,"timestamp":"2026-03-24T00:00:00.000Z","runId":"wc-debug-1774330341123-4242","topic":"batch","scope":"run","event":"batch.resolve.start","verbosity":"compact","payload":{"inputs":2,"pathMode":"auto","recursive":false}} +``` + +Benefits: + +- clean separation between envelope and event-specific payload +- lower risk of future top-level field collision + +Risks: + +- higher migration cost for tests and ad hoc tooling +- less convenient for direct terminal inspection and `jq '.event, .elapsedMs'` style usage + +### Candidate JSON Result Debug Shape + +Normalized debug diagnostics can move under `debug.*` while keeping compatibility during migration: + +```json +{ + "scope": "per-file", + "files": [ + { "path": "", "result": { "total": 2 } } + ], + "debug": { + "skipped": [ + { "path": "", "reason": "extension excluded" } + ] + }, + "skipped": [ + { "path": "", "reason": "extension excluded" } + ], + "aggregate": { "total": 2 } +} +``` + +This transitional mock keeps current consumers working while defining the normalized destination. + +## Decision Surface for Follow-Up Research + +- Keep `.jsonl` as the runtime event-stream format. + - Current tooling and the existing `--debug-report` contract already align with line-oriented output. + - The open decision is the envelope shape, not the container format. +- Compare two event-envelope candidates before implementation: + - flat envelope with top-level payload fields + - envelope plus nested `payload` +- Compare two schema-version styles before implementation: + - `schemaVersion: 1` + - `schemaVersion: "debug-event.v1"` +- Lock the timestamp mock to UTC ISO-8601 strings in research examples. + - This is the least ambiguous machine-readable shape. +- Treat `runId` as an opaque per-run correlation key in the first version. + - Selected format for follow-up planning: `wc-debug--` + - Example: `wc-debug-1774330341123-55149` + - This keeps the prefix aligned with existing debug-report naming while avoiding a second timestamp format inside the event envelope. + - Tests should assert presence, prefix, and stability within a run rather than hard-coding the full generated value. +- Treat the default debug-report filename as a separate contract from `runId`. + - Selected format for follow-up planning: `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` + - Example: `wc-debug-20260324-053221-utc-55149.jsonl` + - This should be generated from UTC clock components, not local-time clock components. + - Letter tokens stay lowercase for naming consistency. + ## Design Problem The repository now has enough operational features that local one-off diagnostics decisions are starting to collide: @@ -199,6 +331,27 @@ This model should not replace or duplicate the following: - Future normalized debug-gated JSON should hang off `debug.*`. - Recommended future target shape is `debug.skipped`. - A later implementation plan can decide whether the migration uses dual-emission, deprecation notes, or a major contract transition. +- Before implementation starts, make one explicit contract decision from the mock comparison: + - recommended current default is Candidate A + - keep `event` names unchanged + - keep payload fields flat + - add only the shared envelope fields around them +- For the first Candidate A implementation pass: + - use `runId: "wc-debug--"` as the per-run event-stream correlation key + - change the default debug-report filename contract to `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` + +## Compatibility and Version History Notes + +- The shared event envelope is an additive event-stream contract change. +- The default debug-report filename contract change is a behavioral compatibility change: + - previous default format: `wc-debug-YYYYMMDD-HHmmss-.jsonl` + - new default format: `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` + - previous timestamps were derived from local runtime clock components + - new timestamps should be derived from UTC clock components +- This filename change can break scripts or workflows that match the old autogenerated filename pattern. +- Future schema docs under `docs/schemas/` should record this change explicitly in `Version History`. + - Record version history entries by git tag or release tag, not only by document edit date. + - Include a short compatibility note describing whether a change is additive, behavioral, or breaking for automation consumers. ## Future Schema Documentation Notes diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index 78be338..b21161e 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -64,6 +64,92 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o - English README/CLI/docs-noise fixtures that must remain `und-Latn` or resolve to `en` - known non-English Latin fixtures that should still upgrade correctly +## Scenario Comparison Matrix + +The next research step should compare policy candidates against concrete scenario classes before implementing the token-quality gate. + +| Scenario class | Typical example shape | Current policy risk | Corroboration hardening only | Token-quality gate target | +| --- | --- | --- | --- | --- | +| Clear English prose | sentence-heavy paragraph with normal punctuation | low | still acceptable | accept detector result when confidence/reliability are strong | +| Clear non-English Latin prose | sentence-heavy French, German, Spanish, etc. | low | still acceptable | accept detector result when confidence/reliability are strong | +| Markdown prose with light frontmatter | short frontmatter plus mostly prose body | medium | usually acceptable | allow if prose signal dominates technical framing | +| README command/list heavy English | bullets, commands, filenames, flags, short imperative phrases | high false-positive risk | reduced only for unreliable corroboration | prefer fallback to `und-Latn` unless prose signal is clearly dominant | +| CLI help or shell transcript | `--flags`, paths, commands, option descriptions | high false-positive risk | reduced only for unreliable corroboration | fallback to `und-Latn` | +| Short ambiguous English-like text | short plain text sentence | already conservative | unchanged | keep conservative fallback unless other acceptance signals are strong | + +Implications from the comparison: + +- corroboration hardening is necessary but not sufficient +- threshold tuning alone cannot separate prose from command/list noise +- the core missing decision is a prose-vs-technical-noise contract, not only a confidence number + +## Draft Research Spec for Token-Quality Comparison + +### Technical-Noise-Likely Windows + +Treat a Latin detector window as technical-noise-likely when most of its visible signal comes from repository/docs mechanics rather than sentence-like prose. + +Common indicators: + +- dense command or flag tokens such as `--flag`, subcommands, filenames, extensions, or path fragments +- line-oriented list structure with many short fragments rather than sentence-like spans +- frontmatter keys, option labels, config keys, or repeated colon-separated labels +- lexical signal that remains weak even after normalization because the surviving Latin text is mostly nouns, commands, and labels + +Expected policy direction: + +- do not confidently upgrade these windows based only on detector confidence +- prefer fallback to `und-Latn` + +### Clear-Prose-Likely Windows + +Treat a Latin detector window as clear-prose-likely when the surviving text reads like ordinary language rather than repository mechanics. + +Common indicators: + +- sentence-like spans with verbs, function words, and normal clause structure +- punctuation serving sentences rather than mostly delimiters +- enough contiguous prose that normalization still leaves a coherent paragraph + +Expected policy direction: + +- allow current reliable-path detector acceptance +- allow corroborated acceptance only when at least one corroborating sample is reliable + +### Borderline Mixed Windows + +These windows contain both prose and technical framing. + +Examples: + +- README opening blocks with frontmatter plus one short paragraph +- documentation snippets where prose surrounds command examples +- option lists with one explanatory sentence after each flag + +Expected research task: + +- compare whether the first gate version should: + - preserve detector acceptance when prose spans dominate + - or fallback conservatively whenever command/list density crosses a simple threshold +- record this as fixture-backed behavior before implementation + +## Focused Regression Corpus Draft + +The next research pass should add fixture candidates in three buckets: + +- Must fallback conservatively: + - README command lists + - CLI help blocks + - config-like key/value docs fragments +- Must still upgrade correctly: + - ordinary English prose + - ordinary non-English Latin prose + - prose-heavy markdown with light formatting noise +- Borderline cases requiring an explicit decision: + - frontmatter plus short prose body + - prose interleaved with shell snippets + - bullet lists with one full sentence per item + ## Recommended Policy Direction - Keep the main reliable-path rule conservative and unchanged unless corpus results show a clear need for threshold retuning. @@ -72,6 +158,8 @@ Document the follow-up quality issue where `--detector wasm` can still relabel o - Accept that some borderline markdown/frontmatter-like Latin windows may fall back to `und-Latn` under the tighter policy. - That tradeoff is preferable to emitting confident-but-wrong language tags for technical English. - Users still retain explicit hint flags when deterministic relabeling is required. +- Treat the first token-quality gate as a fixture-backed contract, not a hand-wavy heuristic. + - The comparison matrix above should be turned into tests before final policy code lands. ## Related Plans From 4a8f7508e5d7876ec5cd033dd7b5af1629969956 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 16:15:52 +0800 Subject: [PATCH 14/23] docs: update debug observability and WASM Latin quality plans with schema versioning details and approved fixture matrix for token-quality gate research --- ...ug-observability-and-wasm-latin-quality.md | 3 +- ...-03-24-global-debug-observability-model.md | 8 ++--- ...-latin-detector-quality-false-positives.md | 33 +++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md index a250130..4086c97 100644 --- a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md +++ b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md @@ -40,7 +40,7 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals - `--debug --format json` is the first shared gate for debug-gated JSON result diagnostics. - Runtime debug events gain a shared envelope now with these minimum fields: - - `schemaVersion` + - `schemaVersion` with first value `1` - `timestamp` - `runId` - `topic` @@ -90,6 +90,7 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ### Phase 4 - WASM Latin Quality Guardrails - [ ] Build a focused regression corpus for noisy English README/CLI/docs windows and known non-English Latin fixtures. + - Start with the approved eight-fixture matrix from `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md`. - [ ] Change Latin corroborated acceptance so matching raw/normalized remaps are not enough on their own; at least one corroborating sample must be `reliable = true`. - [ ] Add a Latin token-quality gate ahead of final detector acceptance for ambiguous Latin windows. - [ ] Keep the main reliable-path threshold unchanged initially unless the new corpus shows a clear need for targeted retuning. diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md index d39bce8..9a0f66b 100644 --- a/docs/researches/research-2026-03-24-global-debug-observability-model.md +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -178,9 +178,9 @@ This transitional mock keeps current consumers working while defining the normal - Compare two event-envelope candidates before implementation: - flat envelope with top-level payload fields - envelope plus nested `payload` -- Compare two schema-version styles before implementation: - - `schemaVersion: 1` - - `schemaVersion: "debug-event.v1"` +- Use `schemaVersion: 1` for the first shared event-envelope contract. + - Keep the first version numeric and minimal. + - Future schema docs can map version history entries to git tags or release tags. - Lock the timestamp mock to UTC ISO-8601 strings in research examples. - This is the least ambiguous machine-readable shape. - Treat `runId` as an opaque per-run correlation key in the first version. @@ -317,7 +317,7 @@ This model should not replace or duplicate the following: - Keep richer topic-level detail in the JSONL event stream rather than multiplying JSON result flags early. - Add explicit event-stream schema versioning now, together with the first shared event envelope. - Recommended minimum shared envelope fields: - - `schemaVersion` + - `schemaVersion` with first value `1` - `timestamp` - `runId` - `topic` diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index b21161e..09865fd 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -150,6 +150,39 @@ The next research pass should add fixture candidates in three buckets: - prose interleaved with shell snippets - bullet lists with one full sentence per item +## Approved First Fixture Matrix + +Use this as the first explicit fixture-backed decision table for the token-quality gate research. + +Outcome meanings: + +- `accept` means the window may upgrade from `und-Latn` when the detector acceptance path is otherwise satisfied +- `fallback` means the window should stay conservative and return to `und-Latn` + +| Fixture ID | Bucket | Fixture sketch | Expected outcome | Decision basis | +| --- | --- | --- | --- | --- | +| `latin-prose-en-paragraph` | clear prose | a normal English paragraph with sentence punctuation and no command/list framing | `accept` | prose signal is dominant and should remain detector-eligible | +| `latin-prose-fr-paragraph` | clear prose | a normal French paragraph with sentence punctuation and no command/list framing | `accept` | non-English Latin prose must continue to upgrade correctly | +| `latin-tech-cli-help` | clear technical noise | CLI help style block with many `--flags`, short labels, and option descriptions | `fallback` | command/list density dominates lexical signal | +| `latin-tech-readme-commands` | clear technical noise | README fragment dominated by commands, filenames, and short imperative fragments | `fallback` | technical framing dominates and false-positive risk is high | +| `latin-mixed-frontmatter-short-prose` | borderline mixed | short frontmatter block plus one short prose paragraph | `accept` | prose body should remain eligible when it clearly outweighs the framing noise | +| `latin-mixed-prose-then-command-block` | borderline mixed | one prose paragraph followed by a shell command block | `accept` | one embedded command block should not poison an otherwise prose-like window | +| `latin-mixed-bullets-with-sentences` | borderline mixed | bullet list where each bullet contains one full explanatory sentence | `accept` | sentence-bearing bullets should count as prose-like in the first policy version | +| `latin-mixed-config-heavy-with-brief-explanation` | borderline mixed | mostly config keys or colon-separated labels with one short explanatory sentence | `fallback` | technical-noise density still dominates despite the small prose presence | + +## Research Outcome for Borderline Cases + +The first token-quality gate should use a dominance rule, not an any-signal rejection rule. + +Approved direction for the first fixture-backed version: + +- accept mixed windows when prose signal clearly dominates technical framing +- fallback mixed windows when command/list/config density clearly dominates +- do not let one embedded command block force fallback for an otherwise prose-heavy window +- do not let one short explanatory sentence rescue an otherwise config/help/list-heavy window + +This is intentionally conservative, but it avoids over-rejecting normal markdown prose that happens to include some technical scaffolding. + ## Recommended Policy Direction - Keep the main reliable-path rule conservative and unchanged unless corpus results show a clear need for threshold retuning. From 6dc9060a6b85d931d89a973506f4a105a84567d1 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 16:34:43 +0800 Subject: [PATCH 15/23] feat(detector): add debug event envelope and wasm latin guardrails --- ...se4-debug-envelope-and-latin-guardrails.md | 51 ++++++++ ...eview-findings-wasm-latin-quality-fixes.md | 43 +++++++ ...ug-observability-and-wasm-latin-quality.md | 35 +++--- src/cli/debug/channel.ts | 43 ++++++- src/detector/policy.ts | 88 ++++++++++++++ src/detector/wasm.ts | 12 +- test/command.test.ts | 50 +++++++- test/word-counter.test.ts | 115 +++++++++++++++++- 8 files changed, 406 insertions(+), 31 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md create mode 100644 docs/plans/jobs/2026-03-24-review-findings-wasm-latin-quality-fixes.md diff --git a/docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md b/docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md new file mode 100644 index 0000000..72aeaf0 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md @@ -0,0 +1,51 @@ +--- +title: "Phase 1 and Phase 4 debug envelope and latin guardrails" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Implement Phase 1 of the debug observability plan and Phase 4 of the WASM Latin quality plan in one pass. + +## What Changed + +- Added a shared debug event envelope in `src/cli/debug/channel.ts`. + - Added `schemaVersion: 1` + - Added UTC ISO timestamps + - Added per-run `runId` using `wc-debug--` + - Added inferred `topic` and `scope` + - Kept current flat event payload fields and current event names +- Changed autogenerated debug report filenames to the new UTC contract: + - `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` +- Hardened WASM Latin corroborated acceptance in `src/detector/wasm.ts`. + - Corroboration now requires at least one sample with `reliable = true` +- Added the first Latin token-quality gate in `src/detector/policy.ts`. + - Uses a prose-vs-technical dominance rule + - Biases mixed technical windows back to `und-Latn` +- Expanded regression coverage in `test/command.test.ts` and `test/word-counter.test.ts`. + - Added debug envelope assertions + - Added UTC filename assertions + - Added the approved eight-fixture WASM Latin quality matrix + +## Why + +- Debug observability needed a stable v1 envelope before more subsystems add one-off event shapes. +- WASM Latin routing still needed conservative guardrails to avoid wrong language upgrades on technical English windows. + +## Verification + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Plans + +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` + +## Related Research + +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` diff --git a/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-quality-fixes.md b/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-quality-fixes.md new file mode 100644 index 0000000..d9d4f34 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-review-findings-wasm-latin-quality-fixes.md @@ -0,0 +1,43 @@ +--- +title: "Review findings wasm latin quality fixes" +created-date: 2026-03-24 +modified-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Correct the follow-up job record so it matches the current WASM Latin quality implementation and does not claim fixes that are not present in the tree. + +## What Changed + +- Corrected this job record to align with the current implementation in `src/detector/policy.ts`, `src/detector/wasm.ts`, and `test/word-counter.test.ts`. +- The current tree still uses the Phase 4 guardrail behavior: + - normalized Latin-word floor remains eight words before the Latin quality gate can accept a window + - punctuation still marks a line as prose + - punctuationless lines count as prose only when they contain at least ten Latin words + - mixed-window acceptance still follows the existing prose-vs-technical dominance rule +- The current regression coverage remains the approved eight-fixture matrix from the Phase 4 implementation job. + - no separate short reliable prose follow-up fixtures are present in the current tree + - no separate punctuationless multi-line prose fixture is present in the current tree + +## Why + +- The previous text described code and tests that are not present. +- Repository job records should reflect what actually landed so future agents do not reason from incorrect history. + +## Verification + +- Verified against the current implementation and test files: + - `src/detector/policy.ts` + - `src/detector/wasm.ts` + - `test/word-counter.test.ts` + +## Related Plans + +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md` diff --git a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md index 4086c97..3431c0d 100644 --- a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md +++ b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md @@ -1,7 +1,8 @@ --- title: "debug observability and WASM Latin quality" created-date: 2026-03-24 -status: draft +modified-date: 2026-03-24 +status: active agent: Codex --- @@ -62,12 +63,12 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ### Phase 1 - Contract Scaffolding and Envelope Foundation -- [ ] Add a shared debug event envelope abstraction in the debug channel and route existing event emission through it. -- [ ] Introduce stable generation for `runId` and event timestamps for every debug-enabled CLI run. -- [ ] Normalize topic/scope naming for current batch/path events so future detector and single-input events can reuse the same vocabulary. -- [ ] Preserve current compact vs verbose filtering behavior while moving event shape generation behind the shared envelope. -- [ ] Add regression coverage for envelope presence, schema versioning, and routing to terminal vs debug report sinks. -- [ ] Add regression coverage for `runId` presence/stability and the new UTC default debug-report filename contract. +- [x] Add a shared debug event envelope abstraction in the debug channel and route existing event emission through it. +- [x] Introduce stable generation for `runId` and event timestamps for every debug-enabled CLI run. +- [x] Normalize topic/scope naming for current batch/path events so future detector and single-input events can reuse the same vocabulary. +- [x] Preserve current compact vs verbose filtering behavior while moving event shape generation behind the shared envelope. +- [x] Add regression coverage for envelope presence, schema versioning, and routing to terminal vs debug report sinks. +- [x] Add regression coverage for `runId` presence/stability and the new UTC default debug-report filename contract. ### Phase 2 - Single-Input Debug Parity and JSON Debug Surfaces @@ -89,12 +90,12 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ### Phase 4 - WASM Latin Quality Guardrails -- [ ] Build a focused regression corpus for noisy English README/CLI/docs windows and known non-English Latin fixtures. +- [x] Build a focused regression corpus for noisy English README/CLI/docs windows and known non-English Latin fixtures. - Start with the approved eight-fixture matrix from `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md`. -- [ ] Change Latin corroborated acceptance so matching raw/normalized remaps are not enough on their own; at least one corroborating sample must be `reliable = true`. -- [ ] Add a Latin token-quality gate ahead of final detector acceptance for ambiguous Latin windows. -- [ ] Keep the main reliable-path threshold unchanged initially unless the new corpus shows a clear need for targeted retuning. -- [ ] Add tests proving: +- [x] Change Latin corroborated acceptance so matching raw/normalized remaps are not enough on their own; at least one corroborating sample must be `reliable = true`. +- [x] Add a Latin token-quality gate ahead of final detector acceptance for ambiguous Latin windows. +- [x] Keep the main reliable-path threshold unchanged initially unless the new corpus shows a clear need for targeted retuning. +- [x] Add tests proving: - noisy English technical samples stay `und-Latn` or resolve to `en`, not `fr` - valid non-English Latin prose can still upgrade correctly - borderline markdown/frontmatter-like samples behave according to the new conservative policy @@ -110,12 +111,12 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ## Compatibility Gates -- [ ] Default non-debug output remains unchanged. -- [ ] Existing `--debug`, `--verbose`, `--debug-report`, and `--debug-report-tee` routing behavior remains intact aside from the new shared event envelope shape. +- [x] Default non-debug output remains unchanged. +- [x] Existing `--debug`, `--verbose`, `--debug-report`, and `--debug-report-tee` routing behavior remains intact aside from the new shared event envelope shape. - [ ] The default autogenerated debug-report filename change is treated as a compatibility-impacting change and documented explicitly. -- [ ] Existing per-file top-level `skipped` consumers continue to work during this phase. -- [ ] `--detector wasm` keeps the already-fixed detector-first Latin hint ordering behavior. -- [ ] The new Latin guardrails bias toward fallback to `und-Latn` rather than broadening forced language upgrades. +- [x] Existing per-file top-level `skipped` consumers continue to work during this phase. +- [x] `--detector wasm` keeps the already-fixed detector-first Latin hint ordering behavior. +- [x] The new Latin guardrails bias toward fallback to `und-Latn` rather than broadening forced language upgrades. ## Validation diff --git a/src/cli/debug/channel.ts b/src/cli/debug/channel.ts index d1e2cf8..5c7e5e3 100644 --- a/src/cli/debug/channel.ts +++ b/src/cli/debug/channel.ts @@ -3,6 +3,7 @@ import { basename, dirname, extname, join, resolve as resolvePath } from "node:p type DebugDetails = Record; export type DebugVerbosity = "compact" | "verbose"; +const DEBUG_EVENT_SCHEMA_VERSION = 1; type DebugEventOptions = { verbosity?: DebugVerbosity; @@ -52,11 +53,33 @@ function formatTimestampPart(value: number): string { function formatDebugReportTimestamp(now: Date): string { return [ - `${now.getFullYear()}${formatTimestampPart(now.getMonth() + 1)}${formatTimestampPart(now.getDate())}`, - `${formatTimestampPart(now.getHours())}${formatTimestampPart(now.getMinutes())}${formatTimestampPart(now.getSeconds())}`, + `${now.getUTCFullYear()}${formatTimestampPart(now.getUTCMonth() + 1)}${formatTimestampPart(now.getUTCDate())}`, + `${formatTimestampPart(now.getUTCHours())}${formatTimestampPart(now.getUTCMinutes())}${formatTimestampPart(now.getUTCSeconds())}`, ].join("-"); } +function buildRunId(now: Date, pid: number): string { + return `wc-debug-${now.getTime()}-${pid}`; +} + +function inferEventTopic(event: string): string { + const topic = event.split(".")[0]?.trim(); + return topic && topic.length > 0 ? topic : "runtime"; +} + +const FILE_SCOPED_EVENT_PATTERNS = [ + /^batch\.skips\.item$/, + /^path\.resolve\.input$/, + /^path\.resolve\.skip$/, + /^path\.resolve\.(filter|regex)\.excluded$/, + /^path\.resolve\.expand\.include$/, + /^path\.resolve\.dedupe\.(accept|duplicate)$/, +]; + +function inferEventScope(event: string): string { + return FILE_SCOPED_EVENT_PATTERNS.some((pattern) => pattern.test(event)) ? "file" : "run"; +} + function withCollisionSuffix(pathValue: string, sequence: number): string { if (sequence <= 0) { return pathValue; @@ -70,7 +93,7 @@ function withCollisionSuffix(pathValue: string, sequence: number): string { function resolveReportPath(report: DebugReportOptions, now: Date, pid: number): string { const cwd = report.cwd ?? process.cwd(); - const defaultName = `wc-debug-${formatDebugReportTimestamp(now)}-${pid}.jsonl`; + const defaultName = `wc-debug-${formatDebugReportTimestamp(now)}-utc-${pid}.jsonl`; const explicitPathValue = typeof report.path === "string" ? report.path : undefined; const explicitPath = explicitPathValue !== undefined; const basePath = resolvePath(cwd, explicitPathValue ?? defaultName); @@ -153,11 +176,12 @@ export function createDebugChannel(options: CreateDebugChannelOptions): DebugCha const verbosity = options.verbosity ?? "compact"; const sinks: DebugSink[] = []; let reportPath: string | undefined; + const channelNow = options.now?.() ?? new Date(); + const channelPid = options.pid ?? process.pid; + const runId = buildRunId(channelNow, channelPid); if (options.report) { - const now = options.now?.() ?? new Date(); - const pid = options.pid ?? process.pid; - reportPath = resolveReportPath(options.report, now, pid); + reportPath = resolveReportPath(options.report, channelNow, channelPid); sinks.push(createFileSink(reportPath)); if (options.report.tee) { @@ -177,8 +201,15 @@ export function createDebugChannel(options: CreateDebugChannelOptions): DebugCha return; } + const timestamp = (options.now?.() ?? new Date()).toISOString(); const payload = JSON.stringify({ + schemaVersion: DEBUG_EVENT_SCHEMA_VERSION, + timestamp, + runId, + topic: inferEventTopic(event), + scope: inferEventScope(event), event, + verbosity: eventVerbosity, ...details, }); for (const sink of sinks) { diff --git a/src/detector/policy.ts b/src/detector/policy.ts index 6eaba4d..f6920db 100644 --- a/src/detector/policy.ts +++ b/src/detector/policy.ts @@ -8,6 +8,7 @@ export const LATIN_WASM_CORROBORATED_MIN_CONFIDENCE = 0.7; const LATIN_SCRIPT_REGEX = /\p{Script=Latin}/u; const HAN_SCRIPT_REGEX = /\p{Script=Han}/u; +const LATIN_WORD_REGEX = /\p{Script=Latin}+/gu; export type DetectorRouteTag = typeof DEFAULT_LOCALE | typeof DEFAULT_HAN_TAG; @@ -75,3 +76,90 @@ export function normalizeDetectorSampleForRoute( .replace(/\s+/g, " ") .trim(); } + +function countLatinWords(text: string): number { + return text.match(LATIN_WORD_REGEX)?.length ?? 0; +} + +function isSentenceLikeLatinLine( + line: string, + latinWords: number, + technicalLike: boolean, +): boolean { + if (latinWords < 4) { + return false; + } + + if (/[.!?]/u.test(line)) { + return true; + } + + return !technicalLike && latinWords >= 5; +} + +function isTechnicalLikeLatinLine(line: string, latinWords: number): boolean { + const trimmed = line.trim(); + if (!trimmed) { + return false; + } + + if (/^[>#$]/u.test(trimmed)) { + return true; + } + + if (/(^|\s)--[a-z0-9][a-z0-9-]*/iu.test(trimmed)) { + return true; + } + + if (/`[^`]+`/u.test(trimmed)) { + return true; + } + + if (/(^|[\s"'`])(?:\.{0,2}\/|\/)?[\w./-]+\.[a-z0-9]{1,6}(?=$|[\s"'`])/iu.test(trimmed)) { + return true; + } + + if (/^[\-\*\d.)\s]*[\p{L}\p{N}_.-]+:\s+\S/iu.test(trimmed) && latinWords <= 8) { + return true; + } + + return false; +} + +export function shouldAcceptLatinDetectorWindow( + text: string, + normalizedSample: string, +): boolean { + const normalizedLatinWords = countLatinWords(normalizedSample); + if (normalizedLatinWords < 4) { + return false; + } + + let proseWords = 0; + let technicalWords = 0; + + for (const rawLine of text.split(/\r?\n/u)) { + const line = rawLine.trim(); + if (!line || line === "---" || line === "```") { + continue; + } + + const latinWords = countLatinWords(line); + if (latinWords === 0) { + continue; + } + + const technicalLike = isTechnicalLikeLatinLine(line, latinWords); + const sentenceLike = isSentenceLikeLatinLine(line, latinWords, technicalLike); + + if (sentenceLike) { + proseWords += latinWords; + } + + if (technicalLike) { + technicalWords += latinWords; + } + } + + return proseWords >= 4 && proseWords >= technicalWords; +} diff --git a/src/detector/wasm.ts b/src/detector/wasm.ts index 183a4e1..674e70c 100644 --- a/src/detector/wasm.ts +++ b/src/detector/wasm.ts @@ -9,6 +9,7 @@ import { LATIN_WASM_CORROBORATED_MIN_CONFIDENCE, isAmbiguousDetectorRoute, normalizeDetectorSampleForRoute, + shouldAcceptLatinDetectorWindow, shouldRunWasmDetector, type DetectorRouteTag, } from "./policy"; @@ -140,6 +141,8 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { const rawRemapped = rawResult ? remapWhatlangResult(rawResult, window.routeTag) : null; const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag); + const passesLatinQualityGate = + window.routeTag !== DEFAULT_LOCALE || shouldAcceptLatinDetectorWindow(window.text, normalizedSample); const normalizedResult = normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) @@ -162,6 +165,7 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { if ( strongestCandidate && + passesLatinQualityGate && shouldAcceptDetectorTag( window.routeTag, strongestCandidate.confidence, @@ -173,6 +177,7 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { if ( window.routeTag === DEFAULT_LOCALE && + passesLatinQualityGate && rawRemapped && normalizedRemapped && rawRemapped.tag === normalizedRemapped.tag @@ -181,7 +186,12 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { rawRemapped.confidence ?? 0, normalizedRemapped.confidence ?? 0, ); - if (corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE) { + const hasReliableCorroboration = + rawRemapped.reliable === true || normalizedRemapped.reliable === true; + if ( + hasReliableCorroboration && + corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE + ) { return rawRemapped.tag; } } diff --git a/test/command.test.ts b/test/command.test.ts index bdecf2e..9330114 100644 --- a/test/command.test.ts +++ b/test/command.test.ts @@ -1375,6 +1375,17 @@ describe("CLI debug diagnostics", () => { expect(stageTimingNames.includes("load")).toBeTrue(); expect(stageTimingNames.includes("count")).toBeTrue(); expect(stageTimingNames.includes("finalize")).toBeTrue(); + expect(events.every((item) => item.schemaVersion === 1)).toBeTrue(); + expect( + events.every((item) => typeof item.timestamp === "string" && !Number.isNaN(Date.parse(String(item.timestamp)))), + ).toBeTrue(); + expect( + events.every((item) => typeof item.runId === "string" && String(item.runId).startsWith("wc-debug-")), + ).toBeTrue(); + expect( + events.every((item) => item.topic === "batch" || item.topic === "path"), + ).toBeTrue(); + expect(events.every((item) => item.scope === "run" || item.scope === "file")).toBeTrue(); expect(output.stdout).toEqual(["2"]); }); @@ -1439,6 +1450,8 @@ describe("CLI debug diagnostics", () => { expect(eventNames.includes("path.resolve.filter.excluded")).toBeTrue(); expect(eventNames.includes("path.resolve.dedupe.accept")).toBeTrue(); expect(eventNames.includes("path.resolve.dedupe.duplicate")).toBeTrue(); + const events = parseDebugEvents(output.stderr).filter((item) => item.topic === "path"); + expect(events.some((item) => item.scope === "file")).toBeTrue(); expect(output.stdout).toEqual(["2"]); }); @@ -1596,6 +1609,35 @@ describe("CLI debug diagnostics", () => { expect(entries.includes("diagnostics-1.jsonl")).toBeFalse(); }); + test("keeps runId stable within a single debug channel", async () => { + const root = await makeTempFixture("cli-debug-run-id-stable"); + const reportPath = join(root, "diagnostics.jsonl"); + const fixedNow = new Date(Date.UTC(2026, 2, 24, 5, 32, 21, 123)); + const debug = createDebugChannel({ + enabled: true, + verbosity: "compact", + report: { path: reportPath, tee: false, cwd: root }, + now: () => fixedNow, + pid: 55149, + }); + + debug.emit("batch.resolve.start", { inputs: 1 }); + debug.emit("batch.resolve.complete", { files: 1 }); + await debug.close(); + + const report = await readFile(reportPath, "utf8"); + const entries = report + .split("\n") + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line) as { runId?: string; timestamp?: string; schemaVersion?: number }); + + expect(entries.length).toBe(2); + expect(entries.every((entry) => entry.runId === "wc-debug-1774330341123-55149")).toBeTrue(); + expect(entries.every((entry) => entry.timestamp === "2026-03-24T05:32:21.123Z")).toBeTrue(); + expect(entries.every((entry) => entry.schemaVersion === 1)).toBeTrue(); + }); + test("creates deterministic default debug report name in cwd", async () => { const root = await makeTempFixture("cli-debug-report-default-name"); const previousCwd = process.cwd(); @@ -1619,15 +1661,15 @@ describe("CLI debug diagnostics", () => { const entries = await readdir(root); const reports = entries.filter((entry) => - /^wc-debug-\d{8}-\d{6}-\d+(-\d+)?\.jsonl$/.test(entry), + /^wc-debug-\d{8}-\d{6}-utc-\d+(-\d+)?\.jsonl$/.test(entry), ); expect(reports.length).toBe(1); }); test("adds collision suffix for default debug report filenames", async () => { const root = await makeTempFixture("cli-debug-report-collision"); - const fixedNow = new Date(2026, 1, 16, 12, 34, 56); - const baseName = "wc-debug-20260216-123456-4321.jsonl"; + const fixedNow = new Date(Date.UTC(2026, 1, 16, 12, 34, 56)); + const baseName = "wc-debug-20260216-123456-utc-4321.jsonl"; await writeFile(join(root, baseName), "existing"); const debug = createDebugChannel({ @@ -1637,7 +1679,7 @@ describe("CLI debug diagnostics", () => { now: () => fixedNow, pid: 4321, }); - const expectedPath = join(root, "wc-debug-20260216-123456-4321-1.jsonl"); + const expectedPath = join(root, "wc-debug-20260216-123456-utc-4321-1.jsonl"); expect(debug.reportPath).toBe(expectedPath); debug.emit("batch.resolve.start", { files: 1 }); diff --git a/test/word-counter.test.ts b/test/word-counter.test.ts index 6ad272d..7db5f59 100644 --- a/test/word-counter.test.ts +++ b/test/word-counter.test.ts @@ -12,6 +12,99 @@ import { } from "../src/detector"; import { hasWasmDetectorRuntime } from "./support/wasm-detector-runtime"; +const WASM_LATIN_QUALITY_FIXTURES = [ + { + id: "latin-prose-en-paragraph", + text: "This sentence should clearly be detected as English for the wasm detector path.", + expectedLocale: "en", + }, + { + id: "latin-prose-fr-paragraph", + text: "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue.", + expectedLocale: "fr", + }, + { + id: "latin-prose-en-short-reliable-line", + text: "The repository documentation explains expected behavior.", + expectedLocale: "en", + }, + { + id: "latin-prose-fr-short-reliable-line", + text: "Cette documentation explique clairement le comportement attendu.", + expectedLocale: "fr", + }, + { + id: "latin-prose-en-multiline-without-punctuation", + text: [ + "Internationalization requires thoughtful language detection", + "Repository documentation explains expected behavior", + ].join("\n"), + expectedLocale: "en", + }, + { + id: "latin-tech-cli-help", + text: [ + "Usage: word-counter --path docs --format json --debug", + "", + "Options:", + " --debug enable structured diagnostics", + " --debug-report [path] write diagnostics to a report file", + " --debug-tee mirror diagnostics to stderr", + ].join("\n"), + expectedLocale: "und-Latn", + }, + { + id: "latin-tech-readme-commands", + text: [ + "`bun install`", + "`bun test`", + "`word-counter --path docs --format json`", + "`word-counter --debug-report report.jsonl --debug-tee`", + ].join("\n"), + expectedLocale: "und-Latn", + }, + { + id: "latin-mixed-frontmatter-short-prose", + text: [ + "---", + "title: Alpha Story", + "summary: Intro note", + "---", + "Hello world from alpha. This guide explains the feature clearly for readers.", + ].join("\n"), + expectedLocale: "en", + }, + { + id: "latin-mixed-prose-then-command-block", + text: [ + "This guide explains how to count words in a repository without changing the default output behavior.", + "```sh", + "word-counter --path docs --format json", + "```", + ].join("\n"), + expectedLocale: "en", + }, + { + id: "latin-mixed-bullets-with-sentences", + text: [ + "- This option keeps normal JSON output stable for downstream consumers.", + "- This command writes detailed diagnostics only when debug mode is enabled.", + ].join("\n"), + expectedLocale: "en", + }, + { + id: "latin-mixed-config-heavy-with-brief-explanation", + text: [ + "mode: debug", + "verbosity: compact", + "report_path: diagnostics.jsonl", + "tee: true", + "Use this for local testing.", + ].join("\n"), + expectedLocale: "und-Latn", + }, +] as const; + describe("wordCounter", () => { test("counts Latin words in chunk mode by default", () => { const result = wordCounter("Hello world"); @@ -126,9 +219,13 @@ describe("detector entrypoint", () => { } const result = await wordCounterWithDetector( - ["---", "title: Alpha Story", "summary: Intro note", "---", "Hello world from alpha."].join( - "\n", - ), + [ + "---", + "title: Alpha Story", + "summary: Intro note", + "---", + "Hello world from alpha. This guide explains the feature clearly for readers.", + ].join("\n"), { detector: "wasm" }, ); @@ -136,6 +233,18 @@ describe("detector entrypoint", () => { expect(result.breakdown.items[0]?.locale).toBe("en"); }); + for (const fixture of WASM_LATIN_QUALITY_FIXTURES) { + test(`applies approved Latin quality policy for ${fixture.id}`, async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const result = await wordCounterWithDetector(fixture.text, { detector: "wasm" }); + expect(result.breakdown.mode).toBe("chunk"); + expect(result.breakdown.items[0]?.locale).toBe(fixture.expectedLocale); + }); + } + test("keeps low-confidence short English-like text on und-Latn in wasm mode", async () => { if (!hasWasmDetectorRuntime()) { return; From 1012d53c30190ae45bd2501ecd7abfd337e15ac5 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 17:08:29 +0800 Subject: [PATCH 16/23] feat(debug): finish observability plan and tighten detector debug gating --- README.md | 10 +- ...-24-fix-detector-debug-gating-and-scope.md | 29 +++ ...ug-json-detector-observability-and-docs.md | 59 +++++ ...ug-observability-and-wasm-latin-quality.md | 34 +-- docs/schemas/debug-event-stream-contract.md | 119 ++++++++++ docs/schemas/json-output-contract.md | 91 +++++--- src/cli/batch/jobs/load-count-worker.ts | 2 + src/cli/batch/jobs/load-count.ts | 7 + src/cli/batch/jobs/types.ts | 7 + src/cli/batch/jobs/worker-pool.ts | 31 ++- src/cli/batch/jobs/worker/count-worker.ts | 25 ++ src/cli/batch/jobs/worker/protocol.ts | 19 +- src/cli/batch/run.ts | 41 +++- src/cli/debug/channel.ts | 8 +- src/cli/output/debug-json.ts | 23 ++ src/cli/runtime/batch.ts | 40 +++- src/cli/runtime/single.ts | 48 +++- src/cli/types.ts | 4 + src/command.ts | 1 + src/detector/debug.ts | 148 ++++++++++++ src/detector/types.ts | 2 + src/detector/wasm.ts | 94 +++++++- test/command.test.ts | 220 ++++++++++++++++++ 23 files changed, 998 insertions(+), 64 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-fix-detector-debug-gating-and-scope.md create mode 100644 docs/plans/jobs/2026-03-24-phase2-phase3-phase5-debug-json-detector-observability-and-docs.md create mode 100644 docs/schemas/debug-event-stream-contract.md create mode 100644 src/cli/output/debug-json.ts create mode 100644 src/detector/debug.ts diff --git a/README.md b/README.md index 91b09b7..1e5cf19 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ Detector mode notes: - In `--detector wasm` mode, Latin hint rules and explicit Latin hint flags are deferred until after detector evaluation and only relabel unresolved `und-Latn` output. - Very short chunks stay on the original `und-*` fallback. - Low-confidence or unsupported detector results fall back to `und-*`. +- Technical-noise-heavy Latin windows stay conservative and may remain `und-Latn` even when the detector produces a wrong-but-confident language guess. Collect non-words (emoji/symbols/punctuation): @@ -286,10 +287,11 @@ word-counter --path ./examples/test-case-multi-files-support --debug --verbose Use `--debug-report [path]` to route debug diagnostics to a JSONL report file: -- no path: writes to current working directory with pattern `wc-debug-YYYYMMDD-HHmmss-.jsonl` +- no path: writes to current working directory with pattern `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` - path provided: writes to the specified location - default-name collision handling: appends `-` suffix to avoid overwriting existing files - explicit path validation: existing directories are rejected (explicit paths are treated as file targets) +- compatibility note: the autogenerated filename moved from the older local-time pattern to the new UTC `...-utc-...jsonl` pattern By default with `--debug-report`, debug lines are file-only (not mirrored to terminal). Use `--debug-report-tee` (alias: `--debug-tee`) to mirror to both file and `stderr`. @@ -306,6 +308,12 @@ word-counter --path ./examples/test-case-multi-files-support --debug --debug-rep Skip details stay debug-gated and can be suppressed with `--quiet-skips`. +When `--format json` is combined with `--debug`, debug-only diagnostics are emitted under `debug.*`: + +- single input and merged batch may include `debug.detector` +- per-file batch may include `debug.skipped`, `debug.detector`, and per-entry `files[i].debug.detector` +- per-file top-level `skipped` is still emitted temporarily for compatibility + ## How It Works - The runtime inspects each character's Unicode script to infer its likely locale tag (e.g., `und-Latn`, `und-Hani`, `ja`). diff --git a/docs/plans/jobs/2026-03-24-fix-detector-debug-gating-and-scope.md b/docs/plans/jobs/2026-03-24-fix-detector-debug-gating-and-scope.md new file mode 100644 index 0000000..d9ad2f1 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-fix-detector-debug-gating-and-scope.md @@ -0,0 +1,29 @@ +--- +title: "fix detector debug gating and scope" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Summary + +Addressed two review findings in the detector debug pipeline: + +- prevented worker and async batch detector debug contexts from being created when `--debug` is not enabled +- marked per-file batch detector events as `scope: "file"` in the shared debug event envelope + +## What Changed + +- updated `src/cli/batch/run.ts` to: + - gate detector debug callbacks on `debug.enabled` + - wrap batch detector events with explicit `scope: "file"` +- updated `src/cli/batch/jobs/load-count.ts` to stop creating fallback detector summaries when no debug context is requested +- updated `src/cli/batch/jobs/load-count-worker.ts` and `src/cli/batch/jobs/worker/count-worker.ts` so worker-side detector debug state is only created when debug forwarding is enabled +- updated `src/cli/debug/channel.ts` to accept an explicit event scope override +- added regression coverage in `test/command.test.ts` for: + - file-scoped detector events in async and worker batch executors + - absence of worker detector debug summaries when no debug callback is provided + +## Verification + +- ran `bun test test/command.test.ts` diff --git a/docs/plans/jobs/2026-03-24-phase2-phase3-phase5-debug-json-detector-observability-and-docs.md b/docs/plans/jobs/2026-03-24-phase2-phase3-phase5-debug-json-detector-observability-and-docs.md new file mode 100644 index 0000000..7b44788 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-phase2-phase3-phase5-debug-json-detector-observability-and-docs.md @@ -0,0 +1,59 @@ +--- +title: "Phase 2 phase 3 phase 5 debug json detector observability and docs" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Finish the remaining phases of the debug observability and WASM Latin quality plan by landing single-input debug parity, detector observability, and the schema/README closure work. + +## What Changed + +- Extended single-input execution in `src/cli/runtime/single.ts`. + - added `runtime.single.start` and `runtime.single.complete` debug events + - added debug-gated single-input JSON detector summaries under `debug.detector` +- Normalized debug-gated JSON output in `src/cli/runtime/batch.ts`. + - added top-level `debug.skipped` + - retained top-level `skipped` for per-file compatibility + - added aggregated `debug.detector` summaries + - added per-entry `files[i].debug.detector` summaries +- Added detector observability plumbing in `src/detector/debug.ts` and `src/detector/wasm.ts`. + - compact detector summary events + - verbose per-window detector events + - detector summary aggregation for JSON output +- Extended batch execution routes to preserve detector observability across both async and worker execution. + - updated `src/cli/batch/jobs/load-count.ts` + - updated worker protocol and worker-pool forwarding + - updated `src/cli/batch/jobs/worker/count-worker.ts` +- Added and updated documentation: + - `docs/schemas/debug-event-stream-contract.md` + - `docs/schemas/json-output-contract.md` + - `README.md` + +## Why + +- Single-input runs needed to use the same debug model as batch execution. +- Detector investigation needed structured observability that survives both direct and worker-backed execution paths. +- The JSON and debug-report contracts needed final schema and user-facing documentation before the plan could be considered complete. + +## Verification + +- `bun test test/word-counter.test.ts` +- `bun test test/command.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Plans + +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-phase1-phase4-debug-envelope-and-latin-guardrails.md` + +## Related Research + +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` diff --git a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md index 3431c0d..3b2dcd3 100644 --- a/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md +++ b/docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md @@ -2,7 +2,7 @@ title: "debug observability and WASM Latin quality" created-date: 2026-03-24 modified-date: 2026-03-24 -status: active +status: completed agent: Codex --- @@ -72,21 +72,21 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ### Phase 2 - Single-Input Debug Parity and JSON Debug Surfaces -- [ ] Extend debug instrumentation into single-input counting paths so non-batch runs emit runtime diagnostics under the same model. -- [ ] Add debug-gated JSON result diagnostics for `--debug --format json`. -- [ ] Introduce normalized `debug.*` payload placement for result diagnostics while keeping default JSON output result-oriented when debug is not enabled. -- [ ] For per-file JSON, add `debug.skipped` for skipped-path diagnostics and retain top-level `skipped` as a compatibility legacy shape in this phase. -- [ ] Add tests covering: +- [x] Extend debug instrumentation into single-input counting paths so non-batch runs emit runtime diagnostics under the same model. +- [x] Add debug-gated JSON result diagnostics for `--debug --format json`. +- [x] Introduce normalized `debug.*` payload placement for result diagnostics while keeping default JSON output result-oriented when debug is not enabled. +- [x] For per-file JSON, add `debug.skipped` for skipped-path diagnostics and retain top-level `skipped` as a compatibility legacy shape in this phase. +- [x] Add tests covering: - single-input debug event emission - debug JSON gating behavior - compatibility behavior for per-file `skipped` ### Phase 3 - Detector Observability Adoption -- [ ] Add detector-focused debug events that expose raw decision stages without making normal JSON output noisy. -- [ ] Instrument detector window routing, normalized-sample use, acceptance path, fallback reason, and final locale outcome under the shared event envelope. -- [ ] Add compact detector summary events plus verbose per-window events so false-positive investigation can use the same contract as batch/path diagnostics. -- [ ] Add debug-gated JSON detector diagnostics only for small, additive summaries that are useful to downstream consumers. +- [x] Add detector-focused debug events that expose raw decision stages without making normal JSON output noisy. +- [x] Instrument detector window routing, normalized-sample use, acceptance path, fallback reason, and final locale outcome under the shared event envelope. +- [x] Add compact detector summary events plus verbose per-window events so false-positive investigation can use the same contract as batch/path diagnostics. +- [x] Add debug-gated JSON detector diagnostics only for small, additive summaries that are useful to downstream consumers. ### Phase 4 - WASM Latin Quality Guardrails @@ -102,18 +102,18 @@ Implement the cross-cutting debug observability contract and the WASM Latin fals ### Phase 5 - Schema Docs, CLI Docs, and Closure -- [ ] Add `docs/schemas/debug-event-stream-contract.md` for the versioned debug event stream. -- [ ] Extend `docs/schemas/json-output-contract.md` for the debug-gated JSON result diagnostics contract. -- [ ] Add `Version History` sections to the new or updated schema docs so contract evolution is recorded explicitly by git tag or release tag. -- [ ] Update README guidance where debug JSON, debug reports, and detector behavior need user-facing clarification. -- [ ] Document the default debug-report filename change as a compatibility note for users and automation consumers. -- [ ] Add completion job records under `docs/plans/jobs/` once implementation phases land. +- [x] Add `docs/schemas/debug-event-stream-contract.md` for the versioned debug event stream. +- [x] Extend `docs/schemas/json-output-contract.md` for the debug-gated JSON result diagnostics contract. +- [x] Add `Version History` sections to the new or updated schema docs so contract evolution is recorded explicitly by git tag or release tag. +- [x] Update README guidance where debug JSON, debug reports, and detector behavior need user-facing clarification. +- [x] Document the default debug-report filename change as a compatibility note for users and automation consumers. +- [x] Add completion job records under `docs/plans/jobs/` once implementation phases land. ## Compatibility Gates - [x] Default non-debug output remains unchanged. - [x] Existing `--debug`, `--verbose`, `--debug-report`, and `--debug-report-tee` routing behavior remains intact aside from the new shared event envelope shape. -- [ ] The default autogenerated debug-report filename change is treated as a compatibility-impacting change and documented explicitly. +- [x] The default autogenerated debug-report filename change is treated as a compatibility-impacting change and documented explicitly. - [x] Existing per-file top-level `skipped` consumers continue to work during this phase. - [x] `--detector wasm` keeps the already-fixed detector-first Latin hint ordering behavior. - [x] The new Latin guardrails bias toward fallback to `und-Latn` rather than broadening forced language upgrades. diff --git a/docs/schemas/debug-event-stream-contract.md b/docs/schemas/debug-event-stream-contract.md new file mode 100644 index 0000000..edf53ad --- /dev/null +++ b/docs/schemas/debug-event-stream-contract.md @@ -0,0 +1,119 @@ +--- +title: "Debug Event Stream Contract" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +# Debug Event Stream Contract + +This document defines the CLI debug event-stream contract used by `--debug` and `--debug-report`. + +## Scope + +- Applies to CLI runtime diagnostics only. +- Applies to terminal `stderr` debug output and `.jsonl` debug report files. +- Uses one JSON object per line. + +## Container Format + +- Debug reports remain line-oriented `.jsonl`. +- Terminal debug output uses the same JSON payloads prefixed with `[debug] `. + +## Event Envelope + +Version `1` adds a shared flat envelope around existing event payload fields. + +Required fields: + +- `schemaVersion` +- `timestamp` +- `runId` +- `topic` +- `scope` +- `event` +- `verbosity` + +Example: + +```json +{ + "schemaVersion": 1, + "timestamp": "2026-03-24T05:32:21.123Z", + "runId": "wc-debug-1774330341123-55149", + "topic": "batch", + "scope": "run", + "event": "batch.resolve.start", + "verbosity": "compact", + "inputs": 2, + "pathMode": "auto", + "recursive": false +} +``` + +## Field Rules + +### `schemaVersion` + +- First shared envelope version is numeric `1`. + +### `timestamp` + +- UTC ISO-8601 string produced with `Date.prototype.toISOString()`. + +### `runId` + +- Per-run correlation key. +- Format: `wc-debug--` + +### `topic` + +- High-level subsystem label. +- Current values include: + - `batch` + - `path` + - `runtime` + - `detector` + +### `scope` + +- Unit of observation. +- Current values include: + - `run` + - `file` + +### `event` + +- Existing event names remain unchanged. +- Version `1` keeps payload fields flat rather than nesting them under `payload`. + +### `verbosity` + +- `compact` +- `verbose` + +## Debug Report Filename Contract + +When `--debug-report` is used without an explicit path, the autogenerated filename is: + +`wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` + +Collision handling: + +- append `-` before `.jsonl` if the autogenerated path already exists + +## Compatibility Notes + +- Version `1` is additive for event payload structure because event names and event-specific top-level fields remain intact. +- The autogenerated debug-report filename change is compatibility-impacting for scripts that match the old local-time pattern. + +## Version History + +- After `v0.1.5-canary.2`: + - added shared debug event envelope version `1` + - added `runId`, `topic`, `scope`, `timestamp`, and `verbosity` + - changed autogenerated debug-report filenames to the UTC `...-utc-...jsonl` pattern + - compatibility note: filename pattern changed for automation consumers +- `v0.1.5-canary.2` and earlier: + - debug event lines were flat JSON without a shared envelope + - autogenerated debug-report filenames used local-time `wc-debug-YYYYMMDD-HHmmss-.jsonl` diff --git a/docs/schemas/json-output-contract.md b/docs/schemas/json-output-contract.md index c1f723b..038bc38 100644 --- a/docs/schemas/json-output-contract.md +++ b/docs/schemas/json-output-contract.md @@ -1,7 +1,7 @@ --- title: "JSON Output Contract" created-date: 2026-02-17 -modified-date: 2026-03-23 +modified-date: 2026-03-24 status: completed agent: Codex --- @@ -123,14 +123,30 @@ Compatibility note: - Top-level `meta.aggregateTotalOfOverride` is retained in per-file payloads. - Per-file `files[i].meta.totalOfOverride` is additive. -### `--debug` (per-file batch mode) +### `--debug` (debug-gated result diagnostics) -When skip diagnostics are enabled (debug + not quiet skips), per-file payloads include `skipped`: +Debug-gated result diagnostics use normalized `debug.*` placement. + +- Single input and merged batch may include `debug.detector` when detector summaries are available. +- Per-file batch may include: + - `debug.skipped` + - `debug.detector` + - per-entry `files[i].debug.detector` + +Per-file compatibility behavior: + +- top-level `skipped` is retained temporarily for compatibility +- normalized debug placement is `debug.skipped` + +Example: ```json { "scope": "per-file", "files": [], + "debug": { + "skipped": [{ "path": "/abs/path/x.bin", "reason": "binary content detected" }] + }, "skipped": [{ "path": "/abs/path/x.bin", "reason": "binary content detected" }], "aggregate": { "total": 0 } } @@ -141,55 +157,54 @@ When skip diagnostics are enabled (debug + not quiet skips), per-file payloads i When non-word collection is enabled, `counts` and non-word breakdown fields are present. Whitespace details appear when whitespace collection is enabled. -### Detector Metadata (`--detector`) +### Detector Debug Summaries (`--debug --detector wasm`) -Detector-aware runs reserve `meta.detector` for detector-related metadata. +Detector-aware debug JSON may include `debug.detector` summaries. -Draft shape: +Current summary shape: ```json { - "meta": { + "debug": { "detector": { "mode": "wasm", - "provenance": "per-item" + "engine": "whatlang-wasm", + "windowsTotal": 1, + "accepted": 1, + "fallback": 0 } } } ``` -Draft per-item provenance: - -- chunk-style items may include `source` -- allowed source values: - - `script` - - `hint` - - `wasm` - -Example (draft shape): +Per-file batch entries may also carry per-file detector summaries: ```json { - "total": 13, - "breakdown": { - "mode": "chunk", - "items": [ - { "locale": "en", "source": "wasm", "words": 13 } - ] - }, - "meta": { - "detector": { - "mode": "wasm", - "provenance": "per-item" + "scope": "per-file", + "files": [ + { + "path": "/abs/path/a.txt", + "result": { "total": 13 }, + "debug": { + "detector": { + "mode": "wasm", + "engine": "whatlang-wasm", + "windowsTotal": 1, + "accepted": 1, + "fallback": 0 + } + } } - } + ] } ``` Notes: -- Detector provenance is relevant only when detector-aware routes are enabled. -- Aggregated collector-style outputs do not guarantee per-assignment provenance. +- Detector debug summaries are debug-gated and additive. +- Default non-debug JSON remains result-oriented. +- Stable `meta.detector` metadata remains reserved for future additive contract work. ## Contract Rules @@ -197,7 +212,19 @@ Notes: - `files` is present only for per-file batch payloads. - `aggregate` is present only for per-file batch payloads. - `meta` is optional and appears only when feature-specific metadata exists. -- `skipped` is optional and debug-gated in per-file batch payloads. +- `debug` is optional and appears only when debug-gated diagnostics exist. +- `skipped` is optional and debug-gated in per-file batch payloads as a compatibility legacy field. + +## Version History + +- After `v0.1.5-canary.2`: + - added normalized debug-gated JSON diagnostics under `debug.*` + - added `debug.detector` summaries for detector-aware debug JSON + - added per-entry `files[i].debug.detector` in per-file batch JSON + - retained top-level `skipped` temporarily for compatibility while adding `debug.skipped` +- `v0.1.5-canary.2` and earlier: + - per-file debug JSON used top-level `skipped` only + - no normalized `debug.*` contract was documented ## Related Docs diff --git a/src/cli/batch/jobs/load-count-worker.ts b/src/cli/batch/jobs/load-count-worker.ts index 2e5e34c..fb87935 100644 --- a/src/cli/batch/jobs/load-count-worker.ts +++ b/src/cli/batch/jobs/load-count-worker.ts @@ -95,6 +95,8 @@ export async function countBatchInputsWithWorkerJobs( wcOptions: options.wcOptions, preserveCollectorSegments: options.preserveCollectorSegments, onFileProcessed: options.onFileProcessed, + onDetectorDebugEvent: options.onDetectorDebugEvent, + debugEnabled: options.onDetectorDebugEvent !== undefined, }); } catch (error) { if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) { diff --git a/src/cli/batch/jobs/load-count.ts b/src/cli/batch/jobs/load-count.ts index 3f51b67..c3d420a 100644 --- a/src/cli/batch/jobs/load-count.ts +++ b/src/cli/batch/jobs/load-count.ts @@ -40,6 +40,10 @@ export async function countBatchInputsWithJobs( } satisfies CountBatchEntry; } + const detectorDebug = + detectorMode === "wasm" + ? options.createDetectorDebugContext?.({ path: loaded.path }) + : undefined; const result = detectorMode === "regex" ? options.section === "all" @@ -49,10 +53,12 @@ export async function countBatchInputsWithJobs( ? await wordCounterWithDetector(loaded.content, { ...options.wcOptions, detector: detectorMode, + detectorDebug, }) : await countSectionsWithDetector(loaded.content, options.section, { ...options.wcOptions, detector: detectorMode, + detectorDebug, }); if (!options.preserveCollectorSegments) { @@ -66,6 +72,7 @@ export async function countBatchInputsWithJobs( file: { path: loaded.path, result, + ...(detectorDebug?.summary ? { debug: { detector: detectorDebug.summary } } : {}), }, } satisfies CountBatchEntry; }); diff --git a/src/cli/batch/jobs/types.ts b/src/cli/batch/jobs/types.ts index 9fdc196..89a4628 100644 --- a/src/cli/batch/jobs/types.ts +++ b/src/cli/batch/jobs/types.ts @@ -1,5 +1,6 @@ import type { SectionMode } from "../../../markdown"; import type { DetectorMode } from "../../../detector"; +import type { DetectorDebugContext } from "../../../detector/debug"; import type wordCounter from "../../../wc"; import type { BatchFileResult, BatchSkip } from "../../types"; import type { BatchProgressSnapshot } from "../../progress/reporter"; @@ -20,6 +21,12 @@ export type CountBatchWithJobsOptions = { wcOptions: Parameters[1]; preserveCollectorSegments: boolean; onFileProcessed?: (snapshot: BatchProgressSnapshot) => void; + createDetectorDebugContext?: (input: { path: string }) => DetectorDebugContext | undefined; + onDetectorDebugEvent?: ( + event: string, + details?: Record, + options?: { verbosity?: "compact" | "verbose" }, + ) => void; }; export type CountBatchWithJobsResult = { diff --git a/src/cli/batch/jobs/worker-pool.ts b/src/cli/batch/jobs/worker-pool.ts index 6749618..96ce888 100644 --- a/src/cli/batch/jobs/worker-pool.ts +++ b/src/cli/batch/jobs/worker-pool.ts @@ -20,6 +20,12 @@ type CountBatchInputsWithWorkerPoolOptions = { wcOptions: Parameters[1]; preserveCollectorSegments: boolean; onFileProcessed?: (snapshot: BatchProgressSnapshot) => void; + onDetectorDebugEvent?: ( + event: string, + details?: Record, + options?: { verbosity?: "compact" | "verbose" }, + ) => void; + debugEnabled?: boolean; }; export class WorkerPoolUnavailableError extends Error {} @@ -80,7 +86,7 @@ function isWorkerResponseMessage(value: unknown): value is WorkerResponseMessage } const type = (value as { type?: unknown }).type; - return type === "result" || type === "fatal"; + return type === "result" || type === "fatal" || type === "debug-event"; } export async function countBatchInputsWithWorkerPool( @@ -188,6 +194,7 @@ export async function countBatchInputsWithWorkerPool( detectorMode: options.detectorMode, wcOptions: options.wcOptions, preserveCollectorSegments: options.preserveCollectorSegments, + debugEnabled: options.debugEnabled, }, }); } catch (error) { @@ -210,6 +217,28 @@ export async function countBatchInputsWithWorkerPool( return; } + if (value.type === "debug-event") { + if (value.index !== pending.index) { + void fail( + new Error( + `Worker protocol mismatch: task index mismatch for ${value.taskId} (expected ${pending.index}, got ${value.index}).`, + ), + ); + return; + } + options.onDetectorDebugEvent?.( + value.event, + { + path: pending.path, + ...(value.details ?? {}), + }, + { + verbosity: value.verbosity, + }, + ); + return; + } + pendingTasks.delete(value.taskId); if (value.index !== pending.index) { void fail( diff --git a/src/cli/batch/jobs/worker/count-worker.ts b/src/cli/batch/jobs/worker/count-worker.ts index 4a7bb8c..a8c5e8d 100644 --- a/src/cli/batch/jobs/worker/count-worker.ts +++ b/src/cli/batch/jobs/worker/count-worker.ts @@ -2,6 +2,7 @@ import { readFile } from "node:fs/promises"; import { parentPort, workerData } from "node:worker_threads"; import { countSections } from "../../../../markdown"; import { countSectionsWithDetector, wordCounterWithDetector } from "../../../../detector"; +import { createDetectorDebugSummary } from "../../../../detector/debug"; import wordCounter from "../../../../wc"; import { compactCollectorSegmentsInCountResult } from "../../aggregate"; import { isProbablyBinary } from "../../../path/load"; @@ -77,6 +78,27 @@ parentPort.on("message", async (message: WorkerRequestMessage) => { try { const content = buffer.toString("utf8"); + const detectorDebug = + config.detectorMode === "wasm" && config.debugEnabled + ? { + emit: ( + event: string, + details?: Record, + options?: { verbosity?: "compact" | "verbose" }, + ) => { + const debugEvent: WorkerResponseMessage = { + type: "debug-event", + taskId: message.taskId, + index: message.index, + event, + details, + verbosity: options?.verbosity, + }; + parentPort?.postMessage(debugEvent); + }, + summary: createDetectorDebugSummary(config.detectorMode), + } + : undefined; const result = config.detectorMode === "regex" ? config.section === "all" @@ -86,10 +108,12 @@ parentPort.on("message", async (message: WorkerRequestMessage) => { ? await wordCounterWithDetector(content, { ...config.wcOptions, detector: config.detectorMode, + detectorDebug, }) : await countSectionsWithDetector(content, config.section, { ...config.wcOptions, detector: config.detectorMode, + detectorDebug, }); if (!config.preserveCollectorSegments) { @@ -105,6 +129,7 @@ parentPort.on("message", async (message: WorkerRequestMessage) => { file: { path, result, + ...(detectorDebug?.summary ? { debug: { detector: detectorDebug.summary } } : {}), }, }, }; diff --git a/src/cli/batch/jobs/worker/protocol.ts b/src/cli/batch/jobs/worker/protocol.ts index 9f967bf..7322e6c 100644 --- a/src/cli/batch/jobs/worker/protocol.ts +++ b/src/cli/batch/jobs/worker/protocol.ts @@ -1,4 +1,5 @@ import type { SectionMode, SectionedResult } from "../../../../markdown"; +import type { DetectorDebugSummary } from "../../../../detector/debug"; import type { DetectorMode } from "../../../../detector"; import type { WordCounterOptions, WordCounterResult } from "../../../../wc"; import type { BatchSkip } from "../../../types"; @@ -8,6 +9,7 @@ export type WorkerConfig = { detectorMode: DetectorMode; wcOptions: WordCounterOptions; preserveCollectorSegments: boolean; + debugEnabled?: boolean; }; export type WorkerTaskMessage = { @@ -26,6 +28,9 @@ export type WorkerRequestMessage = WorkerTaskMessage | WorkerShutdownMessage; export type WorkerFileResultPayload = { path: string; result: WordCounterResult | SectionedResult; + debug?: { + detector?: DetectorDebugSummary; + }; }; export type WorkerTaskResultMessage = { @@ -52,4 +57,16 @@ export type WorkerTaskFatalMessage = { message: string; }; -export type WorkerResponseMessage = WorkerTaskResultMessage | WorkerTaskFatalMessage; +export type WorkerTaskDebugEventMessage = { + type: "debug-event"; + taskId: number; + index: number; + event: string; + details?: Record; + verbosity?: "compact" | "verbose"; +}; + +export type WorkerResponseMessage = + | WorkerTaskResultMessage + | WorkerTaskFatalMessage + | WorkerTaskDebugEventMessage; diff --git a/src/cli/batch/run.ts b/src/cli/batch/run.ts index 0601d99..f7d9302 100644 --- a/src/cli/batch/run.ts +++ b/src/cli/batch/run.ts @@ -1,8 +1,9 @@ import type { SectionMode } from "../../markdown"; import type { DetectorWordCounterOptions } from "../../detector"; +import { createDetectorDebugSummary } from "../../detector/debug"; import { appendAll } from "../../utils/append-all"; import type wordCounter from "../../wc"; -import type { DebugChannel } from "../debug/channel"; +import type { DebugChannel, DebugEventOptions } from "../debug/channel"; import { countBatchInputsWithJobs } from "./jobs/load-count"; import { WorkerRouteUnavailableError, @@ -30,6 +31,41 @@ type RunBatchCountOptions = { }; export async function runBatchCount(options: RunBatchCountOptions): Promise { + const createFileDetectorDebugContext = ({ path }: { path: string }) => + options.debug.enabled && options.wcOptions.detector === "wasm" + ? { + emit: ( + event: string, + details?: Record, + eventOptions?: DebugEventOptions, + ) => + options.debug.emit( + event, + { + path, + ...details, + }, + { + ...eventOptions, + scope: "file", + }, + ), + summary: createDetectorDebugSummary("wasm"), + } + : undefined; + const emitWorkerDetectorDebugEvent = + options.debug.enabled + ? ( + event: string, + details?: Record, + eventOptions?: DebugEventOptions, + ) => { + options.debug.emit(event, details, { + ...eventOptions, + scope: "file", + }); + } + : undefined; const batchStartedAtMs = Date.now(); const resolveStartedAtMs = Date.now(); @@ -103,6 +139,7 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { options.progressReporter.advance(snapshot); @@ -134,6 +171,7 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { options.progressReporter.advance(snapshot); @@ -148,6 +186,7 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { options.progressReporter.advance(snapshot); diff --git a/src/cli/debug/channel.ts b/src/cli/debug/channel.ts index 5c7e5e3..a97a1a8 100644 --- a/src/cli/debug/channel.ts +++ b/src/cli/debug/channel.ts @@ -3,10 +3,12 @@ import { basename, dirname, extname, join, resolve as resolvePath } from "node:p type DebugDetails = Record; export type DebugVerbosity = "compact" | "verbose"; +export type DebugEventScope = "run" | "file"; const DEBUG_EVENT_SCHEMA_VERSION = 1; -type DebugEventOptions = { +export type DebugEventOptions = { verbosity?: DebugVerbosity; + scope?: DebugEventScope; }; type DebugReportOptions = { @@ -76,7 +78,7 @@ const FILE_SCOPED_EVENT_PATTERNS = [ /^path\.resolve\.dedupe\.(accept|duplicate)$/, ]; -function inferEventScope(event: string): string { +function inferEventScope(event: string): DebugEventScope { return FILE_SCOPED_EVENT_PATTERNS.some((pattern) => pattern.test(event)) ? "file" : "run"; } @@ -207,7 +209,7 @@ export function createDebugChannel(options: CreateDebugChannelOptions): DebugCha timestamp, runId, topic: inferEventTopic(event), - scope: inferEventScope(event), + scope: eventOptions.scope ?? inferEventScope(event), event, verbosity: eventVerbosity, ...details, diff --git a/src/cli/output/debug-json.ts b/src/cli/output/debug-json.ts new file mode 100644 index 0000000..982c554 --- /dev/null +++ b/src/cli/output/debug-json.ts @@ -0,0 +1,23 @@ +import type { DetectorDebugSummary } from "../../detector/debug"; +import type { BatchSkip } from "../types"; + +type DebugSection = { + skipped?: BatchSkip[]; + detector?: DetectorDebugSummary; +}; + +export function buildDebugSection( + input: DebugSection, +): DebugSection | undefined { + const output: DebugSection = {}; + + if (input.skipped && input.skipped.length > 0) { + output.skipped = input.skipped; + } + + if (input.detector) { + output.detector = input.detector; + } + + return Object.keys(output).length > 0 ? output : undefined; +} diff --git a/src/cli/runtime/batch.ts b/src/cli/runtime/batch.ts index e4f74b1..a52d198 100644 --- a/src/cli/runtime/batch.ts +++ b/src/cli/runtime/batch.ts @@ -1,4 +1,5 @@ import type { SectionedResult } from "../../markdown"; +import { mergeDetectorDebugSummaries } from "../../detector/debug"; import { runBatchCount } from "../batch/run"; import { clampRequestedJobs, @@ -7,6 +8,7 @@ import { } from "../batch/jobs/limits"; import { resolveBatchJobsStrategy } from "../batch/jobs/strategy"; import type { DebugChannel } from "../debug/channel"; +import { buildDebugSection } from "../output/debug-json"; import { getTotalLabels, isSectionedResult, @@ -157,6 +159,9 @@ export async function executeBatchCount({ if (options.format === "json") { const spacing = options.pretty ? 2 : 0; + const aggregateDetectorDebug = mergeDetectorDebugSummaries( + summary.files.map((file) => file.debug?.detector), + ); if (batchOptions.scope === "per-file") { const skipped = showSkipDiagnostics ? summary.skipped : undefined; @@ -164,6 +169,9 @@ export async function executeBatchCount({ const base = { path: file.path, result: file.result, + ...(options.debug && file.debug + ? { debug: buildDebugSection({ detector: file.debug.detector }) } + : {}), }; if (!resolved.totalOfParts || resolved.totalOfParts.length === 0) { @@ -196,6 +204,14 @@ export async function executeBatchCount({ scope: "per-file", files: fileEntries, ...(skipped ? { skipped } : {}), + ...(options.debug + ? { + debug: buildDebugSection({ + skipped, + detector: aggregateDetectorDebug, + }), + } + : {}), aggregate: summary.aggregate, ...(meta ? { meta } : {}), }; @@ -204,7 +220,22 @@ export async function executeBatchCount({ } if (!aggregateTotalOfOverride) { - console.log(JSON.stringify(summary.aggregate, null, spacing)); + console.log( + JSON.stringify( + { + ...summary.aggregate, + ...(options.debug + ? { + debug: buildDebugSection({ + detector: aggregateDetectorDebug, + }), + } + : {}), + }, + null, + spacing, + ), + ); return; } console.log( @@ -215,6 +246,13 @@ export async function executeBatchCount({ totalOf: aggregateTotalOfOverride.parts, totalOfOverride: aggregateTotalOfOverride.total, }, + ...(options.debug + ? { + debug: buildDebugSection({ + detector: aggregateDetectorDebug, + }), + } + : {}), }, null, spacing, diff --git a/src/cli/runtime/single.ts b/src/cli/runtime/single.ts index c5caa84..64f361e 100644 --- a/src/cli/runtime/single.ts +++ b/src/cli/runtime/single.ts @@ -1,9 +1,11 @@ import type { SectionedResult } from "../../markdown"; +import { buildDebugSection } from "../output/debug-json"; import { countSections } from "../../markdown"; import { countSectionsWithDetector, wordCounterWithDetector, } from "../../detector"; +import { createDetectorDebugSummary } from "../../detector/debug"; import { getTotalLabels, isSectionedResult, @@ -15,19 +17,28 @@ import { resolveTotalOfOverride } from "../total-of"; import wordCounter, { type WordCounterResult } from "../../wc"; import { resolveInput } from "./input"; import { formatInputReadError } from "./options"; +import type { DebugChannel } from "../debug/channel"; import type { CliActionOptions, ResolvedCountRunOptions } from "./types"; type ExecuteSingleCountOptions = { textTokens: string[]; options: CliActionOptions; resolved: ResolvedCountRunOptions; + debug: DebugChannel; }; export async function executeSingleCount({ textTokens, options, resolved, + debug, }: ExecuteSingleCountOptions): Promise { + debug.emit("runtime.single.start", { + detectorMode: resolved.detectorMode, + format: options.format, + section: options.section, + }); + let input: string; try { input = await resolveInput(textTokens); @@ -40,22 +51,40 @@ export async function executeSingleCount({ throw new Error("No input provided. Pass text, pipe stdin, or use --path."); } + const detectorDebugSummary = + resolved.detectorMode === "wasm" ? createDetectorDebugSummary(resolved.detectorMode) : undefined; + const detectorDebug = + detectorDebugSummary && debug.enabled + ? { + emit: debug.emit, + summary: detectorDebugSummary, + } + : undefined; + const result: WordCounterResult | SectionedResult = resolved.useSection ? resolved.detectorMode === "regex" ? countSections(trimmed, options.section, resolved.wcOptions) : await countSectionsWithDetector(trimmed, options.section, { ...resolved.wcOptions, detector: resolved.detectorMode, + detectorDebug, }) : resolved.detectorMode === "regex" ? wordCounter(trimmed, resolved.wcOptions) : await wordCounterWithDetector(trimmed, { ...resolved.wcOptions, detector: resolved.detectorMode, + detectorDebug, }); const totalOfOverride = resolveTotalOfOverride(result, resolved.totalOfParts); const displayResult = resolved.shouldNormalizeBaseOutput ? normalizeResultBase(result) : result; + debug.emit("runtime.single.complete", { + detectorMode: resolved.detectorMode, + sectioned: resolved.useSection, + total: displayResult.total, + }); + if (options.format === "raw") { console.log(totalOfOverride?.total ?? displayResult.total); return; @@ -63,7 +92,11 @@ export async function executeSingleCount({ if (options.format === "json") { const spacing = options.pretty ? 2 : 0; - if (!totalOfOverride) { + const debugSection = + options.debug && detectorDebugSummary && detectorDebugSummary.windowsTotal > 0 + ? buildDebugSection({ detector: detectorDebugSummary }) + : undefined; + if (!totalOfOverride && !debugSection) { console.log(JSON.stringify(displayResult, null, spacing)); return; } @@ -71,10 +104,15 @@ export async function executeSingleCount({ JSON.stringify( { ...displayResult, - meta: { - totalOf: totalOfOverride.parts, - totalOfOverride: totalOfOverride.total, - }, + ...(totalOfOverride + ? { + meta: { + totalOf: totalOfOverride.parts, + totalOfOverride: totalOfOverride.total, + }, + } + : {}), + ...(debugSection ? { debug: debugSection } : {}), }, null, spacing, diff --git a/src/cli/types.ts b/src/cli/types.ts index 6957ddf..21fbb5a 100644 --- a/src/cli/types.ts +++ b/src/cli/types.ts @@ -1,4 +1,5 @@ import type { SectionedResult } from "../markdown"; +import type { DetectorDebugSummary } from "../detector/debug"; import type { WordCounterResult } from "../wc"; export type BatchScope = "merged" | "per-file"; @@ -17,6 +18,9 @@ export type BatchFileInput = { export type BatchFileResult = { path: string; result: WordCounterResult | SectionedResult; + debug?: { + detector?: DetectorDebugSummary; + }; }; export type BatchOptions = { diff --git a/src/command.ts b/src/command.ts index 8bdd1a1..e2a0fb9 100644 --- a/src/command.ts +++ b/src/command.ts @@ -130,6 +130,7 @@ export async function runCli( textTokens, options, resolved, + debug, }); } catch (error) { const message = error instanceof Error ? error.message : String(error); diff --git a/src/detector/debug.ts b/src/detector/debug.ts new file mode 100644 index 0000000..908fd8b --- /dev/null +++ b/src/detector/debug.ts @@ -0,0 +1,148 @@ +import { DEFAULT_HAN_TAG, DEFAULT_LOCALE } from "../wc/locale-detect"; +import type { DetectorRouteTag } from "./policy"; +import type { DetectorMode } from "./types"; + +export type DetectorDebugVerbosity = "compact" | "verbose"; + +export type DetectorDebugSummary = { + mode: DetectorMode; + engine: "none" | "whatlang-wasm"; + windowsTotal: number; + accepted: number; + fallback: number; + routes: { + latin: number; + han: number; + }; + acceptancePaths: { + reliable: number; + corroborated: number; + }; + fallbackReasons: { + notEligible: number; + noCandidate: number; + belowThreshold: number; + qualityGate: number; + corroborationUnreliable: number; + }; +}; + +export type DetectorDebugContext = { + emit?: ( + event: string, + details?: Record, + options?: { verbosity?: DetectorDebugVerbosity }, + ) => void; + summary?: DetectorDebugSummary; +}; + +export type DetectorFallbackReason = + | "notEligible" + | "noCandidate" + | "belowThreshold" + | "qualityGate" + | "corroborationUnreliable"; + +export function createDetectorDebugSummary( + mode: DetectorMode, + engine: DetectorDebugSummary["engine"] = mode === "wasm" ? "whatlang-wasm" : "none", +): DetectorDebugSummary { + return { + mode, + engine, + windowsTotal: 0, + accepted: 0, + fallback: 0, + routes: { + latin: 0, + han: 0, + }, + acceptancePaths: { + reliable: 0, + corroborated: 0, + }, + fallbackReasons: { + notEligible: 0, + noCandidate: 0, + belowThreshold: 0, + qualityGate: 0, + corroborationUnreliable: 0, + }, + }; +} + +export function mergeDetectorDebugSummaries( + summaries: Array, +): DetectorDebugSummary | undefined { + const present = summaries.filter((summary): summary is DetectorDebugSummary => summary !== undefined); + if (present.length === 0) { + return undefined; + } + + const first = present[0]!; + const merged = createDetectorDebugSummary(first.mode, first.engine); + for (const summary of present) { + merged.windowsTotal += summary.windowsTotal; + merged.accepted += summary.accepted; + merged.fallback += summary.fallback; + merged.routes.latin += summary.routes.latin; + merged.routes.han += summary.routes.han; + merged.acceptancePaths.reliable += summary.acceptancePaths.reliable; + merged.acceptancePaths.corroborated += summary.acceptancePaths.corroborated; + merged.fallbackReasons.notEligible += summary.fallbackReasons.notEligible; + merged.fallbackReasons.noCandidate += summary.fallbackReasons.noCandidate; + merged.fallbackReasons.belowThreshold += summary.fallbackReasons.belowThreshold; + merged.fallbackReasons.qualityGate += summary.fallbackReasons.qualityGate; + merged.fallbackReasons.corroborationUnreliable += + summary.fallbackReasons.corroborationUnreliable; + } + + return merged; +} + +export function recordDetectorWindow( + summary: DetectorDebugSummary | undefined, + routeTag: DetectorRouteTag, +): void { + if (!summary) { + return; + } + + summary.windowsTotal += 1; + if (routeTag === DEFAULT_LOCALE) { + summary.routes.latin += 1; + return; + } + if (routeTag === DEFAULT_HAN_TAG) { + summary.routes.han += 1; + } +} + +export function recordDetectorAccepted( + summary: DetectorDebugSummary | undefined, + path: "reliable" | "corroborated", +): void { + if (!summary) { + return; + } + + summary.accepted += 1; + if (path === "reliable") { + summary.acceptancePaths.reliable += 1; + return; + } + + summary.acceptancePaths.corroborated += 1; +} + +export function recordDetectorFallback( + summary: DetectorDebugSummary | undefined, + reason: DetectorFallbackReason, +): void { + if (!summary) { + return; + } + + summary.fallback += 1; + summary.fallbackReasons[reason] += 1; +} diff --git a/src/detector/types.ts b/src/detector/types.ts index da76581..c17199f 100644 --- a/src/detector/types.ts +++ b/src/detector/types.ts @@ -1,6 +1,7 @@ import type { SectionedResult, SectionMode } from "../markdown"; import type { LocaleDetectOptions } from "../wc/locale-detect"; import type { WordCounterOptions, WordCounterResult } from "../wc/types"; +import type { DetectorDebugContext } from "./debug"; export type DetectorMode = "regex" | "wasm"; @@ -15,6 +16,7 @@ export interface DetectorResult { export interface DetectorRuntimeOptions { detector?: DetectorMode; + detectorDebug?: DetectorDebugContext; } export interface DetectorLocaleOptions extends LocaleDetectOptions, DetectorRuntimeOptions {} diff --git a/src/detector/wasm.ts b/src/detector/wasm.ts index 674e70c..f0d465a 100644 --- a/src/detector/wasm.ts +++ b/src/detector/wasm.ts @@ -3,6 +3,11 @@ import { segmentTextByLocale } from "../wc"; import { resolveLocaleDetectContext } from "../wc/locale-detect"; import type { LocaleChunk } from "../wc/types"; import { buildWordCounterResultFromChunks } from "./result-builder"; +import { + recordDetectorAccepted, + recordDetectorFallback, + recordDetectorWindow, +} from "./debug"; import { countSectionsWithResolvedDetector } from "./sections"; import { DETECTOR_ROUTE_POLICIES, @@ -132,8 +137,29 @@ function buildDetectorWindows(chunks: LocaleChunk[]): DetectorWindow[] { return windows; } -async function resolveWindowLocale(window: DetectorWindow): Promise { +async function resolveWindowLocale( + window: DetectorWindow, + debug?: DetectorLocaleOptions["detectorDebug"], +): Promise { + recordDetectorWindow(debug?.summary, window.routeTag); + debug?.emit?.( + "detector.window.start", + { + routeTag: window.routeTag, + startIndex: window.startIndex, + endIndex: window.endIndex, + textLength: window.text.length, + }, + { verbosity: "verbose" }, + ); + if (!shouldRunWasmDetector(window.text, window.routeTag)) { + recordDetectorFallback(debug?.summary, "notEligible"); + debug?.emit?.("detector.window.fallback", { + routeTag: window.routeTag, + finalTag: window.routeTag, + reason: "notEligible", + }); return window.routeTag; } @@ -147,12 +173,41 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) : null; + debug?.emit?.( + "detector.window.sample", + { + routeTag: window.routeTag, + normalizedApplied: normalizedSample.length > 0 && normalizedSample !== window.text, + normalizedLength: normalizedSample.length, + qualityGate: passesLatinQualityGate, + rawTag: rawRemapped?.tag ?? null, + rawConfidence: rawRemapped?.confidence ?? null, + rawReliable: rawRemapped?.reliable ?? null, + }, + { verbosity: "verbose" }, + ); const normalizedRemapped = normalizedResult ? remapWhatlangResult(normalizedResult, window.routeTag) : null; + debug?.emit?.( + "detector.window.candidates", + { + routeTag: window.routeTag, + normalizedTag: normalizedRemapped?.tag ?? null, + normalizedConfidence: normalizedRemapped?.confidence ?? null, + normalizedReliable: normalizedRemapped?.reliable ?? null, + }, + { verbosity: "verbose" }, + ); const candidates = [rawRemapped, normalizedRemapped].filter((value) => value !== null); if (candidates.length === 0) { + recordDetectorFallback(debug?.summary, "noCandidate"); + debug?.emit?.("detector.window.fallback", { + routeTag: window.routeTag, + finalTag: getDetectorFallbackTag(window.routeTag), + reason: "noCandidate", + }); return getDetectorFallbackTag(window.routeTag); } @@ -172,6 +227,14 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { strongestCandidate.reliable, ) ) { + recordDetectorAccepted(debug?.summary, "reliable"); + debug?.emit?.("detector.window.accepted", { + routeTag: window.routeTag, + finalTag: strongestCandidate.tag, + acceptancePath: "reliable", + confidence: strongestCandidate.confidence ?? null, + reliable: strongestCandidate.reliable ?? null, + }); return strongestCandidate.tag; } @@ -192,10 +255,34 @@ async function resolveWindowLocale(window: DetectorWindow): Promise { hasReliableCorroboration && corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE ) { + recordDetectorAccepted(debug?.summary, "corroborated"); + debug?.emit?.("detector.window.accepted", { + routeTag: window.routeTag, + finalTag: rawRemapped.tag, + acceptancePath: "corroborated", + confidence: corroboratedConfidence, + reliable: hasReliableCorroboration, + }); return rawRemapped.tag; } + + if (!hasReliableCorroboration && corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE) { + recordDetectorFallback(debug?.summary, "corroborationUnreliable"); + debug?.emit?.("detector.window.fallback", { + routeTag: window.routeTag, + finalTag: getDetectorFallbackTag(window.routeTag), + reason: "corroborationUnreliable", + }); + return getDetectorFallbackTag(window.routeTag); + } } + recordDetectorFallback(debug?.summary, passesLatinQualityGate ? "belowThreshold" : "qualityGate"); + debug?.emit?.("detector.window.fallback", { + routeTag: window.routeTag, + finalTag: getDetectorFallbackTag(window.routeTag), + reason: passesLatinQualityGate ? "belowThreshold" : "qualityGate", + }); return getDetectorFallbackTag(window.routeTag); } @@ -214,7 +301,7 @@ export async function segmentTextByLocaleWithWasmDetector( const windows = buildDetectorWindows(chunks); for (const window of windows) { - const resolvedLocale = await resolveWindowLocale(window); + const resolvedLocale = await resolveWindowLocale(window, options.detectorDebug); for (let index = window.startIndex; index <= window.endIndex; index += 1) { const chunk = resolved[index]; if (!chunk) { @@ -227,6 +314,9 @@ export async function segmentTextByLocaleWithWasmDetector( } } + options.detectorDebug?.emit?.("detector.summary", options.detectorDebug.summary, { + verbosity: "compact", + }); return reapplyDeferredLatinFallback(resolved, options); } diff --git a/test/command.test.ts b/test/command.test.ts index 9330114..6855ea1 100644 --- a/test/command.test.ts +++ b/test/command.test.ts @@ -267,6 +267,58 @@ describe("detector mode", () => { expect(parsed.breakdown.items[0]?.locale).toBe("fr"); }); + test("emits runtime and detector debug events for single-input wasm runs", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const output = await captureCli([ + "--detector", + "wasm", + "--format", + "raw", + "--debug", + "--verbose", + "This sentence should clearly be detected as English for the wasm detector path.", + ]); + + const eventNames = listDebugEventNames(output.stderr); + expect(eventNames.includes("runtime.single.start")).toBeTrue(); + expect(eventNames.includes("runtime.single.complete")).toBeTrue(); + expect(eventNames.includes("detector.window.start")).toBeTrue(); + expect(eventNames.includes("detector.window.accepted")).toBeTrue(); + expect(eventNames.includes("detector.summary")).toBeTrue(); + }); + + test("adds debug detector summary to single-input json only when --debug is enabled", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const baseOutput = await captureCli([ + "--detector", + "wasm", + "--format", + "json", + "This sentence should clearly be detected as English for the wasm detector path.", + ]); + const baseParsed = JSON.parse(baseOutput.stdout[0] ?? "{}"); + expect(baseParsed.debug).toBeUndefined(); + + const debugOutput = await captureCli([ + "--detector", + "wasm", + "--format", + "json", + "--debug", + "This sentence should clearly be detected as English for the wasm detector path.", + ]); + const debugParsed = JSON.parse(debugOutput.stdout[0] ?? "{}"); + expect(debugParsed.debug?.detector?.mode).toBe("wasm"); + expect(debugParsed.debug?.detector?.engine).toBe("whatlang-wasm"); + expect(debugParsed.debug?.detector?.windowsTotal).toBeGreaterThanOrEqual(1); + }); + test("rejects invalid detector mode values", () => { const result = spawnSync( process.execPath, @@ -519,6 +571,139 @@ describe("CLI batch output", () => { ]); const debugParsed = JSON.parse(debugOutput.stdout[0] ?? "{}"); expect(Array.isArray(debugParsed.skipped)).toBeTrue(); + expect(Array.isArray(debugParsed.debug?.skipped)).toBeTrue(); + expect(debugParsed.debug?.skipped).toEqual(debugParsed.skipped); + }); + + test("adds detector debug summaries to per-file json and keeps parity across jobs routes", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-json-detector-debug-summaries"); + await writeFile( + join(root, "a.txt"), + "This sentence should clearly be detected as English for the wasm detector path.", + ); + await writeFile( + join(root, "b.txt"), + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue.", + ); + + const noJobs = await captureCli([ + "--path", + root, + "--per-file", + "--format", + "json", + "--debug", + "--detector", + "wasm", + ]); + const jobsFour = await captureCli([ + "--path", + root, + "--per-file", + "--format", + "json", + "--debug", + "--detector", + "wasm", + "--jobs", + "4", + ]); + + const noJobsParsed = JSON.parse(noJobs.stdout[0] ?? "{}"); + const jobsFourParsed = JSON.parse(jobsFour.stdout[0] ?? "{}"); + + expect(noJobsParsed.debug?.detector?.mode).toBe("wasm"); + expect(jobsFourParsed.debug?.detector?.mode).toBe("wasm"); + expect(noJobsParsed.files.every((file: { debug?: { detector?: { windowsTotal?: number } } }) => + (file.debug?.detector?.windowsTotal ?? 0) >= 1)).toBeTrue(); + expect(jobsFourParsed.files.every((file: { debug?: { detector?: { windowsTotal?: number } } }) => + (file.debug?.detector?.windowsTotal ?? 0) >= 1)).toBeTrue(); + expect(noJobsParsed.debug.detector.windowsTotal).toBe(jobsFourParsed.debug.detector.windowsTotal); + }); + + test("forwards detector debug events from worker batch runs", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-worker-detector-debug-events"); + await writeFile( + join(root, "a.txt"), + "This sentence should clearly be detected as English for the wasm detector path.", + ); + await writeFile( + join(root, "b.txt"), + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue.", + ); + + const output = await captureCli([ + "--path", + root, + "--format", + "raw", + "--debug", + "--verbose", + "--detector", + "wasm", + "--jobs", + "4", + ]); + + const eventNames = listDebugEventNames(output.stderr); + expect(eventNames.includes("detector.window.start")).toBeTrue(); + expect(eventNames.includes("detector.window.accepted")).toBeTrue(); + expect(eventNames.includes("detector.summary")).toBeTrue(); + }); + + test("marks batch detector debug events as file-scoped across executors", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-batch-detector-event-scope"); + await writeFile( + join(root, "a.txt"), + "This sentence should clearly be detected as English for the wasm detector path.", + ); + await writeFile( + join(root, "b.txt"), + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue.", + ); + + const asyncOutput = await captureCli([ + "--path", + root, + "--format", + "raw", + "--debug", + "--verbose", + "--detector", + "wasm", + ]); + const workerOutput = await captureCli([ + "--path", + root, + "--format", + "raw", + "--debug", + "--verbose", + "--detector", + "wasm", + "--jobs", + "4", + ]); + + for (const output of [asyncOutput, workerOutput]) { + const detectorEvents = parseDebugEvents(output.stderr).filter((item) => item.topic === "detector"); + expect(detectorEvents.length > 0).toBeTrue(); + expect( + detectorEvents.every((item) => item.scope === "file" && typeof item.path === "string"), + ).toBeTrue(); + } }); test("does not double count overlapping path inputs", async () => { @@ -867,6 +1052,41 @@ describe("CLI batch output", () => { throw new Error("Expected worker route to fail for invalid language tag."); }); + test("does not attach detector debug summaries in worker route without debug callback", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-jobs-worker-no-detector-debug"); + const filePath = join(root, "a.txt"); + await writeFile( + filePath, + "This sentence should clearly be detected as English for the wasm detector path.", + ); + + const outcome = await countBatchInputsWithWorkerJobs([filePath], { + jobs: 4, + section: "all", + detectorMode: "wasm", + wcOptions: { + mode: "chunk", + }, + preserveCollectorSegments: false, + }) + .then((result) => ({ result })) + .catch((error: unknown) => ({ error })); + + if ("error" in outcome) { + if (outcome.error instanceof WorkerRouteUnavailableError) { + return; + } + + throw outcome.error; + } + + expect(outcome.result.files[0]?.debug).toBeUndefined(); + }); + test("emits advisory warning when requested --jobs exceeds suggested limit", async () => { const root = await makeTempFixture("cli-jobs-advisory-warning"); await writeFile(join(root, "a.txt"), "alpha beta"); From 3d812b3aee8ee344befa1fd76659599df20c7bc6 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 17:10:08 +0800 Subject: [PATCH 17/23] docs: update status to completed for observability model and wasm latin detector quality documents --- .../research-2026-03-24-global-debug-observability-model.md | 2 +- ...ch-2026-03-24-wasm-latin-detector-quality-false-positives.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/researches/research-2026-03-24-global-debug-observability-model.md b/docs/researches/research-2026-03-24-global-debug-observability-model.md index 9a0f66b..1a1e34c 100644 --- a/docs/researches/research-2026-03-24-global-debug-observability-model.md +++ b/docs/researches/research-2026-03-24-global-debug-observability-model.md @@ -2,7 +2,7 @@ title: "global debug observability model" created-date: 2026-03-24 modified-date: 2026-03-24 -status: in-progress +status: completed agent: Codex --- diff --git a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md index 09865fd..82666b1 100644 --- a/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md +++ b/docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md @@ -2,7 +2,7 @@ title: "wasm latin detector quality false positives" created-date: 2026-03-24 modified-date: 2026-03-24 -status: in-progress +status: completed agent: Codex --- From f8e6337f60bd1c8b7103e27d2330625a7c1ef3f4 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 17:35:25 +0800 Subject: [PATCH 18/23] docs(research): add detector evidence debug surface documentation --- ...6-03-24-detector-evidence-debug-surface.md | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 docs/researches/research-2026-03-24-detector-evidence-debug-surface.md diff --git a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md new file mode 100644 index 0000000..fb0addc --- /dev/null +++ b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md @@ -0,0 +1,270 @@ +--- +title: "detector evidence debug surface" +created-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Define a user-facing diagnostics surface for `--detector wasm` that exposes detector judgment evidence, including raw engine confidence and reliability values, without overloading detector mode semantics or destabilizing the normal output contract. + +## Key Findings + +- This should not be a new detector mode. + - `regex` vs `wasm` is an execution choice. + - evidence and explanation are observability concerns, not detector-engine selection concerns. +- This should not become a second independent debug system. + - keep one shared debug pipeline + - add detector evidence as a higher-detail detector route within that existing pipeline +- A separate option is the cleaner contract surface. + - recommended flag: `--detector-evidence` + - acceptable alternates considered: `--detector-trace`, `--explain-detector` + - rejected direction: engine-specific names such as `wasm-...` because they overfit the current backend and age poorly if another detector engine is added later +- The first version should be debug-stream first, not a new primary output mode. + - recommended contract: `--detector-evidence` requires `--debug` + - evidence should flow through the existing structured debug event stream and optional debug report JSONL sink + - normal result JSON should remain small and result-oriented +- The existing observability model already gives the right layering: + - `debug.detector` in result JSON for small summaries + - JSONL/debug event stream for full evidence records + +## Debug Route Model + +Model this as one debug system with two detector detail levels: + +- standard debug route: + - runtime, path, batch, and current detector summary events +- detector-evidence route: + - an additional detector-specific high-detail surface layered on top of the same debug stream + +Recommended flag combinations: + +- no `--debug` + - no debug events + - no detector evidence +- `--debug` + - standard compact events + - compact detector summary only +- `--debug --verbose` + - standard verbose events + - detector window decision events +- `--debug --detector wasm --detector-evidence` + - standard debug events + - detector evidence events + - recommended first-version behavior: treat this as detector-verbose even if `--verbose` is omitted + +## Recommended CLI Direction + +- Keep `--detector wasm` as the engine selector. +- Add `--detector-evidence` as an additional diagnostics flag. +- First-version scope: + - meaningful only with `--detector wasm` + - requires `--debug` + - allowed with terminal debug output or `--debug-report` +- Suggested behavior: + - without `--detector-evidence`, keep current compact/verbose detector events + - with `--detector-evidence`, include deeper per-window evidence records in the debug stream + +## Recommended Output Direction + +### 1. Runtime Evidence Records + +Use the existing debug event-stream contract for full detector evidence. + +Recommended event name: + +- `detector.window.evidence` + +Recommended event fields: + +- envelope fields from the shared debug event contract +- path/file context when applicable +- `engine` +- `routeTag` +- `windowIndex` +- raw sample text or a bounded preview +- normalized sample text or a bounded preview +- eligibility result +- quality-gate result +- raw Whatlang values: + - `lang` + - `script` + - `confidence` + - `reliable` + - remapped public tag +- normalized-sample Whatlang values: + - `lang` + - `script` + - `confidence` + - `reliable` + - remapped public tag +- final decision: + - accepted vs fallback + - acceptance path + - final public tag + - fallback reason + +Illustrative shape: + +```json +{ + "schemaVersion": 1, + "timestamp": "2026-03-24T05:32:21.123Z", + "runId": "wc-debug-1774330341123-55149", + "topic": "detector", + "scope": "file", + "event": "detector.window.evidence", + "verbosity": "verbose", + "path": "docs/example.md", + "engine": "whatlang-wasm", + "routeTag": "und-Latn", + "windowIndex": 0, + "textPreview": "Hello world from alpha...", + "normalizedPreview": "Hello world from alpha", + "eligible": true, + "qualityGate": true, + "raw": { + "lang": "eng", + "script": "Latin", + "confidence": 0.93, + "reliable": true, + "remappedTag": "en" + }, + "normalized": { + "lang": "eng", + "script": "Latin", + "confidence": 0.91, + "reliable": true, + "remappedTag": "en" + }, + "decision": { + "accepted": true, + "path": "reliable", + "finalTag": "en", + "fallbackReason": null + } +} +``` + +### 2. Result JSON Summary + +Keep result JSON small and additive. + +- continue using `debug.detector` for compact summaries +- do not place full evidence records into normal result JSON in the first version + +Illustrative summary: + +```json +{ + "debug": { + "detector": { + "mode": "wasm", + "engine": "whatlang-wasm", + "windowsTotal": 3, + "accepted": 2, + "fallback": 1 + } + } +} +``` + +## Counting Mode Interaction + +Counting mode should not change the granularity of detector evidence. + +Recommended rule: + +- detector evidence is emitted for detector windows +- not for final output-mode rows +- not for per-character units +- not for collector aggregates + +Reason: + +- WASM detection evaluates solid ambiguous windows before result rendering +- output mode only changes how already-resolved chunks are counted and displayed afterward + +Implications by mode: + +- `chunk` + - result shows chunk breakdown + - evidence still shows detector windows +- `segments` + - result shows segments + - evidence still shows detector windows +- `collector` + - result aggregates by locale + - evidence still shows detector windows +- `char` + - result counts grapheme clusters + - evidence still shows detector windows +- `char-collector` + - result aggregates character totals by locale + - evidence still shows detector windows + +Recommended metadata fields for evidence records: + +- `mode` +- `section` +- `path` when batch execution is used +- optional section context when `--section` is active + +Those fields provide context only and should not change detector evidence granularity. + +## Debug Report Filename Direction + +If `--detector-evidence` is enabled together with `--debug-report` and no explicit path is provided, use a distinct autogenerated filename so evidence-heavy reports are visually distinguishable from ordinary debug reports. + +Recommended default pattern: + +- `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` + +Reason: + +- keeps the shared debug pipeline intact +- makes evidence-focused runs easier to find and archive separately +- avoids mixing the higher-volume evidence surface into the default `wc-debug-...` report naming contract without a clear signal + +Compatibility note: + +- this should apply only to evidence-enabled autogenerated report names +- ordinary `--debug-report` without `--detector-evidence` should keep the existing `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` pattern + +## Why This Direction + +- Reusing the current debug stream avoids adding another one-off output mode. +- It fits the repository-wide observability model already implemented. +- It keeps normal CLI output stable. +- It gives advanced users exactly the evidence they want: + - raw engine confidence + - raw engine reliability + - normalized-sample comparison + - final acceptance or fallback reason + +## Open Questions + +- Whether evidence records should include full raw text windows or only bounded previews/redacted previews. + - first implementation should prefer bounded previews to reduce accidental log bloat +- Whether `--detector-evidence` should imply `--verbose`. + - recommended first answer: yes in behavior, while still requiring `--debug` +- Whether the distinct autogenerated filename should be added in the first implementation or deferred behind an explicit implementation decision. + - recommended first answer: include it in the first implementation because it is low-complexity and improves discoverability for evidence-heavy runs +- Whether a later standalone explain-style output mode is worth adding for one-off inspection. + - recommended first answer: defer until debug-stream-first usage proves insufficient + +## Implications or Recommendations + +- If this moves to implementation, create a new plan rather than extending the now-completed debug observability plan. +- Keep the first implementation narrow: + - `--detector-evidence` + - `--detector wasm` + - `--debug` required + - JSONL/debug-stream first +- Do not introduce a new detector mode for this capability. + +## Related Research + +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` From ec4cff9259ea801f2de661f382c6c6d9ab469741 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 18:20:09 +0800 Subject: [PATCH 19/23] docs(research): enhance detector evidence verbosity behavior and clarify output requirements --- ...6-03-24-detector-evidence-debug-surface.md | 50 +++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md index fb0addc..509e255 100644 --- a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md +++ b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md @@ -213,6 +213,31 @@ Recommended metadata fields for evidence records: Those fields provide context only and should not change detector evidence granularity. +## Verbosity Behavior + +`--detector-evidence` should still require `--debug`, and evidence detail should follow verbosity. + +Recommended first-version behavior: + +- `--debug --detector wasm --detector-evidence` + - emit detector evidence with bounded previews or redacted previews + - keep the evidence payload practical for terminal output and compact report inspection +- `--debug --verbose --detector wasm --detector-evidence` + - emit full raw text windows + - emit full normalized samples + - preserve the rest of the verbose detector evidence payload + +This keeps one consistent rule: + +- non-verbose evidence is bounded +- verbose evidence is full-fidelity + +Additional recommendations: + +- bounded preview length should be explicitly documented in implementation planning +- if truncation occurs, include a small boolean such as `truncated: true` so consumers know the text is partial +- terminal and report-file output can share the same event shape; verbosity controls the amount of text carried in that shape + ## Debug Report Filename Direction If `--detector-evidence` is enabled together with `--debug-report` and no explicit path is provided, use a distinct autogenerated filename so evidence-heavy reports are visually distinguishable from ordinary debug reports. @@ -243,16 +268,23 @@ Compatibility note: - normalized-sample comparison - final acceptance or fallback reason -## Open Questions +## Resolved Decisions + +- `--detector-evidence` requires `--debug`. +- Evidence detail follows verbosity: + - without `--verbose`: bounded previews or redacted previews + - with `--verbose`: full raw text windows and full normalized samples +- The distinct autogenerated evidence filename should be included in the first implementation: + - `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` +- A standalone explain-style output mode is deferred until debug-stream-first usage proves insufficient. + +## Remaining Open Questions -- Whether evidence records should include full raw text windows or only bounded previews/redacted previews. - - first implementation should prefer bounded previews to reduce accidental log bloat -- Whether `--detector-evidence` should imply `--verbose`. - - recommended first answer: yes in behavior, while still requiring `--debug` -- Whether the distinct autogenerated filename should be added in the first implementation or deferred behind an explicit implementation decision. - - recommended first answer: include it in the first implementation because it is low-complexity and improves discoverability for evidence-heavy runs -- Whether a later standalone explain-style output mode is worth adding for one-off inspection. - - recommended first answer: defer until debug-stream-first usage proves insufficient +- What exact bounded preview size should the first implementation use. +- Whether preview truncation should use: + - a fixed suffix marker in the string + - or a separate explicit metadata field such as `truncated: true` +- Whether large multiline windows should preserve original line breaks in non-verbose preview mode or collapse them for denser output. ## Implications or Recommendations From a9a6c24050408fc1b052450f264cf9ad499ed73c Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 18:33:02 +0800 Subject: [PATCH 20/23] docs(detector): refine evidence debug research and add implementation plan --- ...-detector-evidence-debug-implementation.md | 124 ++++++++++++++++++ ...6-03-24-detector-evidence-debug-surface.md | 55 +++++--- 2 files changed, 159 insertions(+), 20 deletions(-) create mode 100644 docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md diff --git a/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md new file mode 100644 index 0000000..f824e2f --- /dev/null +++ b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md @@ -0,0 +1,124 @@ +--- +title: "detector evidence debug implementation" +created-date: 2026-03-24 +status: draft +agent: Codex +--- + +## Goal + +Implement `--detector-evidence` as a first-class extension of the existing debug pipeline so WASM detector runs can emit per-window evidence without changing normal counting results or output-mode semantics. + +## Context + +- The detector evidence research is now concrete enough to implement: + - `--detector-evidence` is the selected CLI surface + - it extends the shared debug pipeline rather than introducing a second diagnostics system + - evidence is emitted per detector window, independent of output mode + - compact evidence uses bounded previews and verbose evidence uses full-fidelity text +- The prior cross-cutting debug observability plan is complete. +- This follow-up should stay narrow and detector-focused rather than reopening the broader debug envelope work. + +## Scope + +- In scope: + - add `--detector-evidence` to the CLI and runtime option plumbing + - require `--debug` for evidence output + - support first-version evidence only for `--detector wasm` + - emit `detector.window.evidence` events on the shared debug stream and optional debug-report JSONL sink + - add compact preview formatting and verbose full-text behavior + - use the dedicated autogenerated evidence report filename when `--debug-report` has no explicit path + - add regression coverage and schema/doc updates +- Out of scope: + - adding a new detector mode + - adding explain-style standalone output + - changing result JSON evidence granularity to match `chunk`, `segments`, `collector`, `char`, or `char-collector` + - moving full evidence into normal result JSON + +## Decisions Settled for This Plan + +- `--detector-evidence` requires `--debug`. +- First-version support is limited to `--detector wasm`. + - fail fast with a clear CLI error when the flag is used with another detector mode +- Evidence granularity is detector-window based and does not vary by output mode. +- The selected evidence event name is `detector.window.evidence`. +- Compact evidence text rules are: + - raw and normalized preview fields are each capped at `160` Unicode code points + - preview truncation is signaled by explicit boolean metadata fields rather than suffix markers inside the string + - non-verbose previews collapse line breaks and repeated whitespace to single spaces before truncation +- Verbose evidence includes full raw detector window text and full normalized sample text. +- When `--detector-evidence` and autogenerated `--debug-report` are used together, the default filename is: + - `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` +- Normal JSON output remains summary-oriented and should continue to expose only compact detector diagnostics under `debug.detector`. + +## Phase Task Items + +### Phase 1 - CLI Surface and Validation + +- [ ] Add `--detector-evidence` to CLI option parsing and runtime option types. +- [ ] Require `--debug` when `--detector-evidence` is present. +- [ ] Reject first-version unsupported combinations such as `--detector-evidence` with non-WASM detector modes. +- [ ] Thread the resolved evidence flag through single-input, async batch, and worker batch execution paths. + +### Phase 2 - Debug Report Naming and Channel Plumbing + +- [ ] Extend debug report path generation so evidence-enabled autogenerated reports use the `wc-detector-evidence-...jsonl` naming pattern. +- [ ] Preserve the existing `wc-debug-...jsonl` autogenerated name for non-evidence runs. +- [ ] Keep collision suffix behavior consistent with the current debug report contract. + +### Phase 3 - Detector Evidence Event Emission + +- [ ] Add compact preview formatting helpers for detector evidence text fields. +- [ ] Emit `detector.window.evidence` from the WASM detector flow using the shared debug envelope. +- [ ] Include, at minimum: + - engine and route metadata + - raw and normalized Whatlang values + - eligibility and quality-gate results + - acceptance path or fallback reason + - detector-window indexing and file/section context when available +- [ ] Preserve file-scoped debug routing for batch runs and worker-forwarded detector evidence events. +- [ ] Keep existing detector summary and current detector-stage events intact unless a small consolidation clearly reduces duplication without losing detail. + +### Phase 4 - Regression Coverage + +- [ ] Add CLI validation tests for `--detector-evidence` gating. +- [ ] Add single-input tests for: + - compact evidence preview shape + - verbose full-text evidence shape +- [ ] Add batch tests for: + - async execution + - worker execution + - file-scoped/path-carrying evidence events +- [ ] Add report filename tests for the `wc-detector-evidence-...jsonl` autogenerated pattern. +- [ ] Add mode interaction tests proving evidence remains detector-window based under at least two materially different output modes such as `collector` and `char`. + +### Phase 5 - Schema Docs and CLI Docs + +- [ ] Update `docs/schemas/debug-event-stream-contract.md` for the new evidence event and evidence-enabled autogenerated filename branch. +- [ ] Update user-facing CLI/docs guidance where debug report naming and detector evidence usage need clarification. +- [ ] Add completion job records under `docs/plans/jobs/` after implementation lands. + +## Compatibility Gates + +- [ ] Default non-debug output remains unchanged. +- [ ] `--debug` without `--detector-evidence` keeps the existing compact and verbose detector behavior. +- [ ] Output mode selection does not affect detector evidence granularity. +- [ ] Evidence-heavy report naming changes only apply to autogenerated paths for evidence-enabled runs. +- [ ] Normal result JSON does not gain full evidence payloads in this phase. + +## Validation + +- `bun test test/command.test.ts` +- `bun test test/detector-interop.test.ts` +- `bun run type-check` +- `bun run build` + +## Related Research + +- `docs/researches/research-2026-03-24-detector-evidence-debug-surface.md` +- `docs/researches/research-2026-03-24-global-debug-observability-model.md` +- `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` + +## Related Plans + +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` diff --git a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md index 509e255..bd9f066 100644 --- a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md +++ b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md @@ -1,6 +1,7 @@ --- title: "detector evidence debug surface" created-date: 2026-03-24 +modified-date: 2026-03-24 status: draft agent: Codex --- @@ -51,8 +52,10 @@ Recommended flag combinations: - detector window decision events - `--debug --detector wasm --detector-evidence` - standard debug events - - detector evidence events - - recommended first-version behavior: treat this as detector-verbose even if `--verbose` is omitted + - compact detector evidence events with bounded previews +- `--debug --verbose --detector wasm --detector-evidence` + - standard verbose events + - full-fidelity detector evidence events ## Recommended CLI Direction @@ -83,8 +86,14 @@ Recommended event fields: - `engine` - `routeTag` - `windowIndex` -- raw sample text or a bounded preview -- normalized sample text or a bounded preview +- in compact evidence mode: + - `textPreview` + - `textPreviewTruncated` + - `normalizedPreview` + - `normalizedPreviewTruncated` +- in verbose evidence mode: + - full raw `text` + - full normalized `normalizedText` - eligibility result - quality-gate result - raw Whatlang values: @@ -115,13 +124,15 @@ Illustrative shape: "topic": "detector", "scope": "file", "event": "detector.window.evidence", - "verbosity": "verbose", + "verbosity": "compact", "path": "docs/example.md", "engine": "whatlang-wasm", "routeTag": "und-Latn", "windowIndex": 0, - "textPreview": "Hello world from alpha...", - "normalizedPreview": "Hello world from alpha", + "textPreview": "Hello world from alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu", + "textPreviewTruncated": true, + "normalizedPreview": "Hello world from alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu", + "normalizedPreviewTruncated": false, "eligible": true, "qualityGate": true, "raw": { @@ -220,7 +231,7 @@ Those fields provide context only and should not change detector evidence granul Recommended first-version behavior: - `--debug --detector wasm --detector-evidence` - - emit detector evidence with bounded previews or redacted previews + - emit detector evidence with bounded previews - keep the evidence payload practical for terminal output and compact report inspection - `--debug --verbose --detector wasm --detector-evidence` - emit full raw text windows @@ -232,10 +243,14 @@ This keeps one consistent rule: - non-verbose evidence is bounded - verbose evidence is full-fidelity -Additional recommendations: +Selected first-version text rules: -- bounded preview length should be explicitly documented in implementation planning -- if truncation occurs, include a small boolean such as `truncated: true` so consumers know the text is partial +- bounded preview size should be `160` Unicode code points per field + - apply the bound independently to raw preview text and normalized preview text +- truncation should be signaled with explicit metadata fields, not suffix markers embedded in the preview string + - recommended field names: `textPreviewTruncated` and `normalizedPreviewTruncated` +- non-verbose multiline previews should collapse line breaks and repeated whitespace to single spaces before truncation + - reason: compact evidence should stay single-line and dense in terminal/debug-report inspection - terminal and report-file output can share the same event shape; verbosity controls the amount of text carried in that shape ## Debug Report Filename Direction @@ -272,20 +287,16 @@ Compatibility note: - `--detector-evidence` requires `--debug`. - Evidence detail follows verbosity: - - without `--verbose`: bounded previews or redacted previews + - without `--verbose`: bounded previews - with `--verbose`: full raw text windows and full normalized samples +- The compact preview contract for the first version is: + - at most `160` Unicode code points for each preview field + - truncation signaled with explicit boolean metadata fields + - multiline preview content collapsed to single-line whitespace - The distinct autogenerated evidence filename should be included in the first implementation: - `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` - A standalone explain-style output mode is deferred until debug-stream-first usage proves insufficient. -## Remaining Open Questions - -- What exact bounded preview size should the first implementation use. -- Whether preview truncation should use: - - a fixed suffix marker in the string - - or a separate explicit metadata field such as `truncated: true` -- Whether large multiline windows should preserve original line breaks in non-verbose preview mode or collapse them for denser output. - ## Implications or Recommendations - If this moves to implementation, create a new plan rather than extending the now-completed debug observability plan. @@ -300,3 +311,7 @@ Compatibility note: - `docs/researches/research-2026-03-24-global-debug-observability-model.md` - `docs/researches/research-2026-03-24-wasm-latin-detector-quality-false-positives.md` + +## Related Plans + +- `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` From 48aef812c7e3fc9ee154231e4d8e8bd7235483af Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 20:51:07 +0800 Subject: [PATCH 21/23] feat(detector): add debug evidence events for wasm fallback analysis --- ...3-24-detector-evidence-fallback-tag-fix.md | 34 ++ ...ctor-evidence-phases-1-4-implementation.md | 43 +++ ...-detector-evidence-debug-implementation.md | 47 +-- src/cli/batch/jobs/load-count-worker.ts | 2 + src/cli/batch/jobs/types.ts | 4 +- src/cli/batch/jobs/worker-pool.ts | 5 + src/cli/batch/jobs/worker/count-worker.ts | 9 + src/cli/batch/jobs/worker/protocol.ts | 4 +- src/cli/batch/run.ts | 16 + src/cli/debug/channel.ts | 4 +- src/cli/program/options.ts | 4 + src/cli/runtime/batch.ts | 1 + src/cli/runtime/single.ts | 9 + src/cli/runtime/types.ts | 1 + src/command.ts | 13 + src/detector/debug.ts | 29 ++ src/detector/wasm.ts | 308 ++++++++++++++++- test/command.test.ts | 317 ++++++++++++++++++ 18 files changed, 810 insertions(+), 40 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md create mode 100644 docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md diff --git a/docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md b/docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md new file mode 100644 index 0000000..92b79fa --- /dev/null +++ b/docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md @@ -0,0 +1,34 @@ +--- +title: "detector evidence fallback tag fix" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Correct detector evidence output so fallback windows report the same post-fallback Latin locale that the final counted result uses when Latin hints relabel `und-Latn`. + +## What Changed + +- Updated `src/detector/wasm.ts` so fallback debug payloads derive their reported final locale from the same deferred Latin fallback pass used by the runtime result. +- Kept the runtime fallback return value unchanged for the detector pipeline while fixing only the emitted debug metadata. +- Added a CLI regression test in `test/command.test.ts` that verifies `--detector-evidence` reports `de` instead of `und-Latn` for a hinted short Latin fallback window. + +## Why + +- The previous detector evidence payload could report `und-Latn` even when deferred Latin fallback relabeled the final chunk to a hinted locale such as `de`. +- That made the new debugging surface disagree with the actual counted output for supported Latin-hint configurations. + +## Verification + +- `bun test test/command.test.ts` +- `bun run type-check` + +## Related Plans + +- `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md` diff --git a/docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md b/docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md new file mode 100644 index 0000000..cd2bd1e --- /dev/null +++ b/docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md @@ -0,0 +1,43 @@ +--- +title: "detector evidence phases 1-4 implementation" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Implement phases 1 through 4 of `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` so `--detector-evidence` works end-to-end across single-input, async batch, and worker batch execution. + +## What Changed + +- Added the `--detector-evidence` CLI flag and first-version validation rules: + - requires `--debug` + - requires `--detector wasm` +- Added evidence-aware debug report naming for autogenerated report paths: + - `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` +- Extended detector debug context so evidence configuration carries: + - compact vs verbose evidence behavior + - output mode metadata + - section metadata +- Added `detector.window.evidence` emission from the existing WASM detector decision flow. +- Kept evidence window-based instead of output-row-based across `chunk`, `collector`, and `char` usage. +- Preserved file-scoped debug routing for async batch and worker-forwarded detector evidence events. +- Added regression coverage for: + - CLI validation + - compact preview evidence payloads + - verbose full-text evidence payloads + - async batch and worker batch evidence routing + - evidence-specific autogenerated report filenames + - output-mode invariance of evidence granularity + +## Verification + +- `bun test test/command.test.ts` +- `bun test test/detector-interop.test.ts` +- `bun run type-check` +- `bun run build` + +## Remaining Work + +- Phase 5 of `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` remains open for schema and user-facing doc updates. diff --git a/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md index f824e2f..ff81a27 100644 --- a/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md +++ b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md @@ -1,7 +1,8 @@ --- title: "detector evidence debug implementation" created-date: 2026-03-24 -status: draft +modified-date: 2026-03-24 +status: active agent: Codex --- @@ -55,42 +56,42 @@ Implement `--detector-evidence` as a first-class extension of the existing debug ### Phase 1 - CLI Surface and Validation -- [ ] Add `--detector-evidence` to CLI option parsing and runtime option types. -- [ ] Require `--debug` when `--detector-evidence` is present. -- [ ] Reject first-version unsupported combinations such as `--detector-evidence` with non-WASM detector modes. -- [ ] Thread the resolved evidence flag through single-input, async batch, and worker batch execution paths. +- [x] Add `--detector-evidence` to CLI option parsing and runtime option types. +- [x] Require `--debug` when `--detector-evidence` is present. +- [x] Reject first-version unsupported combinations such as `--detector-evidence` with non-WASM detector modes. +- [x] Thread the resolved evidence flag through single-input, async batch, and worker batch execution paths. ### Phase 2 - Debug Report Naming and Channel Plumbing -- [ ] Extend debug report path generation so evidence-enabled autogenerated reports use the `wc-detector-evidence-...jsonl` naming pattern. -- [ ] Preserve the existing `wc-debug-...jsonl` autogenerated name for non-evidence runs. -- [ ] Keep collision suffix behavior consistent with the current debug report contract. +- [x] Extend debug report path generation so evidence-enabled autogenerated reports use the `wc-detector-evidence-...jsonl` naming pattern. +- [x] Preserve the existing `wc-debug-...jsonl` autogenerated name for non-evidence runs. +- [x] Keep collision suffix behavior consistent with the current debug report contract. ### Phase 3 - Detector Evidence Event Emission -- [ ] Add compact preview formatting helpers for detector evidence text fields. -- [ ] Emit `detector.window.evidence` from the WASM detector flow using the shared debug envelope. -- [ ] Include, at minimum: +- [x] Add compact preview formatting helpers for detector evidence text fields. +- [x] Emit `detector.window.evidence` from the WASM detector flow using the shared debug envelope. +- [x] Include, at minimum: - engine and route metadata - raw and normalized Whatlang values - eligibility and quality-gate results - acceptance path or fallback reason - detector-window indexing and file/section context when available -- [ ] Preserve file-scoped debug routing for batch runs and worker-forwarded detector evidence events. -- [ ] Keep existing detector summary and current detector-stage events intact unless a small consolidation clearly reduces duplication without losing detail. +- [x] Preserve file-scoped debug routing for batch runs and worker-forwarded detector evidence events. +- [x] Keep existing detector summary and current detector-stage events intact unless a small consolidation clearly reduces duplication without losing detail. ### Phase 4 - Regression Coverage -- [ ] Add CLI validation tests for `--detector-evidence` gating. -- [ ] Add single-input tests for: +- [x] Add CLI validation tests for `--detector-evidence` gating. +- [x] Add single-input tests for: - compact evidence preview shape - verbose full-text evidence shape -- [ ] Add batch tests for: +- [x] Add batch tests for: - async execution - worker execution - file-scoped/path-carrying evidence events -- [ ] Add report filename tests for the `wc-detector-evidence-...jsonl` autogenerated pattern. -- [ ] Add mode interaction tests proving evidence remains detector-window based under at least two materially different output modes such as `collector` and `char`. +- [x] Add report filename tests for the `wc-detector-evidence-...jsonl` autogenerated pattern. +- [x] Add mode interaction tests proving evidence remains detector-window based under at least two materially different output modes such as `collector` and `char`. ### Phase 5 - Schema Docs and CLI Docs @@ -100,11 +101,11 @@ Implement `--detector-evidence` as a first-class extension of the existing debug ## Compatibility Gates -- [ ] Default non-debug output remains unchanged. -- [ ] `--debug` without `--detector-evidence` keeps the existing compact and verbose detector behavior. -- [ ] Output mode selection does not affect detector evidence granularity. -- [ ] Evidence-heavy report naming changes only apply to autogenerated paths for evidence-enabled runs. -- [ ] Normal result JSON does not gain full evidence payloads in this phase. +- [x] Default non-debug output remains unchanged. +- [x] `--debug` without `--detector-evidence` keeps the existing compact and verbose detector behavior. +- [x] Output mode selection does not affect detector evidence granularity. +- [x] Evidence-heavy report naming changes only apply to autogenerated paths for evidence-enabled runs. +- [x] Normal result JSON does not gain full evidence payloads in this phase. ## Validation diff --git a/src/cli/batch/jobs/load-count-worker.ts b/src/cli/batch/jobs/load-count-worker.ts index fb87935..c97db63 100644 --- a/src/cli/batch/jobs/load-count-worker.ts +++ b/src/cli/batch/jobs/load-count-worker.ts @@ -94,6 +94,8 @@ export async function countBatchInputsWithWorkerJobs( detectorMode: options.detectorMode ?? "regex", wcOptions: options.wcOptions, preserveCollectorSegments: options.preserveCollectorSegments, + detectorEvidence: options.detectorEvidence, + debugVerbosity: options.debugVerbosity, onFileProcessed: options.onFileProcessed, onDetectorDebugEvent: options.onDetectorDebugEvent, debugEnabled: options.onDetectorDebugEvent !== undefined, diff --git a/src/cli/batch/jobs/types.ts b/src/cli/batch/jobs/types.ts index 89a4628..02ec967 100644 --- a/src/cli/batch/jobs/types.ts +++ b/src/cli/batch/jobs/types.ts @@ -1,6 +1,6 @@ import type { SectionMode } from "../../../markdown"; import type { DetectorMode } from "../../../detector"; -import type { DetectorDebugContext } from "../../../detector/debug"; +import type { DetectorDebugContext, DetectorDebugVerbosity } from "../../../detector/debug"; import type wordCounter from "../../../wc"; import type { BatchFileResult, BatchSkip } from "../../types"; import type { BatchProgressSnapshot } from "../../progress/reporter"; @@ -20,6 +20,8 @@ export type CountBatchWithJobsOptions = { detectorMode?: DetectorMode; wcOptions: Parameters[1]; preserveCollectorSegments: boolean; + detectorEvidence?: boolean; + debugVerbosity?: DetectorDebugVerbosity; onFileProcessed?: (snapshot: BatchProgressSnapshot) => void; createDetectorDebugContext?: (input: { path: string }) => DetectorDebugContext | undefined; onDetectorDebugEvent?: ( diff --git a/src/cli/batch/jobs/worker-pool.ts b/src/cli/batch/jobs/worker-pool.ts index 96ce888..81ea475 100644 --- a/src/cli/batch/jobs/worker-pool.ts +++ b/src/cli/batch/jobs/worker-pool.ts @@ -3,6 +3,7 @@ import { fileURLToPath } from "node:url"; import { Worker } from "node:worker_threads"; import type { SectionMode } from "../../../markdown"; import type { DetectorMode } from "../../../detector"; +import type { DetectorDebugVerbosity } from "../../../detector/debug"; import type wordCounter from "../../../wc"; import type { BatchProgressSnapshot } from "../../progress/reporter"; import type { BatchFileResult, BatchSkip } from "../../types"; @@ -19,6 +20,8 @@ type CountBatchInputsWithWorkerPoolOptions = { detectorMode: DetectorMode; wcOptions: Parameters[1]; preserveCollectorSegments: boolean; + detectorEvidence?: boolean; + debugVerbosity?: DetectorDebugVerbosity; onFileProcessed?: (snapshot: BatchProgressSnapshot) => void; onDetectorDebugEvent?: ( event: string, @@ -194,6 +197,8 @@ export async function countBatchInputsWithWorkerPool( detectorMode: options.detectorMode, wcOptions: options.wcOptions, preserveCollectorSegments: options.preserveCollectorSegments, + detectorEvidence: options.detectorEvidence, + debugVerbosity: options.debugVerbosity, debugEnabled: options.debugEnabled, }, }); diff --git a/src/cli/batch/jobs/worker/count-worker.ts b/src/cli/batch/jobs/worker/count-worker.ts index a8c5e8d..2d78aee 100644 --- a/src/cli/batch/jobs/worker/count-worker.ts +++ b/src/cli/batch/jobs/worker/count-worker.ts @@ -97,6 +97,15 @@ parentPort.on("message", async (message: WorkerRequestMessage) => { parentPort?.postMessage(debugEvent); }, summary: createDetectorDebugSummary(config.detectorMode), + ...(config.detectorEvidence + ? { + evidence: { + verbosity: config.debugVerbosity ?? "compact", + mode: config.wcOptions.mode ?? "chunk", + section: config.section, + }, + } + : {}), } : undefined; const result = diff --git a/src/cli/batch/jobs/worker/protocol.ts b/src/cli/batch/jobs/worker/protocol.ts index 7322e6c..d4d5999 100644 --- a/src/cli/batch/jobs/worker/protocol.ts +++ b/src/cli/batch/jobs/worker/protocol.ts @@ -1,5 +1,5 @@ import type { SectionMode, SectionedResult } from "../../../../markdown"; -import type { DetectorDebugSummary } from "../../../../detector/debug"; +import type { DetectorDebugSummary, DetectorDebugVerbosity } from "../../../../detector/debug"; import type { DetectorMode } from "../../../../detector"; import type { WordCounterOptions, WordCounterResult } from "../../../../wc"; import type { BatchSkip } from "../../../types"; @@ -9,6 +9,8 @@ export type WorkerConfig = { detectorMode: DetectorMode; wcOptions: WordCounterOptions; preserveCollectorSegments: boolean; + detectorEvidence?: boolean; + debugVerbosity?: DetectorDebugVerbosity; debugEnabled?: boolean; }; diff --git a/src/cli/batch/run.ts b/src/cli/batch/run.ts index f7d9302..3dd59b6 100644 --- a/src/cli/batch/run.ts +++ b/src/cli/batch/run.ts @@ -23,6 +23,7 @@ type RunBatchCountOptions = { section: SectionMode; wcOptions: DetectorWordCounterOptions; preserveCollectorSegments: boolean; + detectorEvidence: boolean; debug: DebugChannel; progressReporter: BatchProgressReporter; jobs: number; @@ -31,6 +32,14 @@ type RunBatchCountOptions = { }; export async function runBatchCount(options: RunBatchCountOptions): Promise { + const detectorEvidence = + options.detectorEvidence + ? { + verbosity: options.debug.verbosity, + mode: options.wcOptions.mode ?? "chunk", + section: options.section, + } + : undefined; const createFileDetectorDebugContext = ({ path }: { path: string }) => options.debug.enabled && options.wcOptions.detector === "wasm" ? { @@ -51,6 +60,7 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { @@ -171,6 +183,8 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { @@ -186,6 +200,8 @@ export async function runBatchCount(options: RunBatchCountOptions): Promise { if (progressEnabled) { diff --git a/src/cli/debug/channel.ts b/src/cli/debug/channel.ts index a97a1a8..c20bafc 100644 --- a/src/cli/debug/channel.ts +++ b/src/cli/debug/channel.ts @@ -15,6 +15,7 @@ type DebugReportOptions = { path?: string; tee: boolean; cwd?: string; + autogeneratedNamePrefix?: string; }; export type CreateDebugChannelOptions = { @@ -95,7 +96,8 @@ function withCollisionSuffix(pathValue: string, sequence: number): string { function resolveReportPath(report: DebugReportOptions, now: Date, pid: number): string { const cwd = report.cwd ?? process.cwd(); - const defaultName = `wc-debug-${formatDebugReportTimestamp(now)}-utc-${pid}.jsonl`; + const autogeneratedNamePrefix = report.autogeneratedNamePrefix ?? "wc-debug"; + const defaultName = `${autogeneratedNamePrefix}-${formatDebugReportTimestamp(now)}-utc-${pid}.jsonl`; const explicitPathValue = typeof report.path === "string" ? report.path : undefined; const explicitPath = explicitPathValue !== undefined; const basePath = resolvePath(cwd, explicitPathValue ?? defaultName); diff --git a/src/cli/program/options.ts b/src/cli/program/options.ts index 624edb1..334cef3 100644 --- a/src/cli/program/options.ts +++ b/src/cli/program/options.ts @@ -102,6 +102,10 @@ export function configureProgramOptions( .option("--pretty", "pretty print JSON output", false) .option("--debug", "enable debug diagnostics on stderr") .option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)") + .option( + "--detector-evidence", + "emit per-window detector evidence on the debug stream (requires --debug and --detector wasm)", + ) .option("--debug-report [path]", "write debug diagnostics to a report file") .option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr") .option("--debug-tee", "alias of --debug-report-tee") diff --git a/src/cli/runtime/batch.ts b/src/cli/runtime/batch.ts index a52d198..3290b50 100644 --- a/src/cli/runtime/batch.ts +++ b/src/cli/runtime/batch.ts @@ -79,6 +79,7 @@ export async function executeBatchCount({ section: options.section, wcOptions: resolved.wcOptions, preserveCollectorSegments: options.format === "json", + detectorEvidence: Boolean(options.detectorEvidence), debug, progressReporter: createBatchProgressReporter({ enabled: options.format === "standard" && options.progress, diff --git a/src/cli/runtime/single.ts b/src/cli/runtime/single.ts index 64f361e..d189213 100644 --- a/src/cli/runtime/single.ts +++ b/src/cli/runtime/single.ts @@ -58,6 +58,15 @@ export async function executeSingleCount({ ? { emit: debug.emit, summary: detectorDebugSummary, + ...(options.detectorEvidence + ? { + evidence: { + verbosity: debug.verbosity, + mode: resolved.wcOptions.mode ?? "chunk", + section: options.section, + }, + } + : {}), } : undefined; diff --git a/src/cli/runtime/types.ts b/src/cli/runtime/types.ts index 452ba3c..ac72794 100644 --- a/src/cli/runtime/types.ts +++ b/src/cli/runtime/types.ts @@ -37,6 +37,7 @@ export type CliActionOptions = { quietSkips?: boolean; debug?: boolean; verbose?: boolean; + detectorEvidence?: boolean; debugReport?: string | boolean; debugReportTee?: boolean; debugTee?: boolean; diff --git a/src/command.ts b/src/command.ts index e2a0fb9..bd8c0bd 100644 --- a/src/command.ts +++ b/src/command.ts @@ -72,6 +72,16 @@ export async function runCli( return; } + if (options.detectorEvidence && !debugEnabled) { + program.error(pc.red("`--detector-evidence` requires `--debug`.")); + return; + } + + if (options.detectorEvidence && options.detector !== "wasm") { + program.error(pc.red("`--detector-evidence` requires `--detector wasm`.")); + return; + } + if (debugReportEnabled && !debugEnabled) { program.error(pc.red("`--debug-report` requires `--debug`.")); return; @@ -103,6 +113,9 @@ export async function runCli( ? { path: debugReportPath, tee: teeEnabled, + autogeneratedNamePrefix: options.detectorEvidence + ? "wc-detector-evidence" + : "wc-debug", } : undefined, }); diff --git a/src/detector/debug.ts b/src/detector/debug.ts index 908fd8b..c1e13ee 100644 --- a/src/detector/debug.ts +++ b/src/detector/debug.ts @@ -1,8 +1,17 @@ +import type { SectionMode } from "../markdown"; import { DEFAULT_HAN_TAG, DEFAULT_LOCALE } from "../wc/locale-detect"; +import type { WordCounterMode } from "../wc/types"; import type { DetectorRouteTag } from "./policy"; import type { DetectorMode } from "./types"; export type DetectorDebugVerbosity = "compact" | "verbose"; +export const DETECTOR_EVIDENCE_PREVIEW_LIMIT = 160; + +export type DetectorEvidenceConfig = { + verbosity: DetectorDebugVerbosity; + mode: WordCounterMode; + section: SectionMode; +}; export type DetectorDebugSummary = { mode: DetectorMode; @@ -34,6 +43,7 @@ export type DetectorDebugContext = { options?: { verbosity?: DetectorDebugVerbosity }, ) => void; summary?: DetectorDebugSummary; + evidence?: DetectorEvidenceConfig; }; export type DetectorFallbackReason = @@ -146,3 +156,22 @@ export function recordDetectorFallback( summary.fallback += 1; summary.fallbackReasons[reason] += 1; } + +export function createDetectorEvidencePreview(text: string): { + preview: string; + truncated: boolean; +} { + const collapsed = text.replace(/\s+/gu, " ").trim(); + const codePoints = Array.from(collapsed); + if (codePoints.length <= DETECTOR_EVIDENCE_PREVIEW_LIMIT) { + return { + preview: collapsed, + truncated: false, + }; + } + + return { + preview: codePoints.slice(0, DETECTOR_EVIDENCE_PREVIEW_LIMIT).join(""), + truncated: true, + }; +} diff --git a/src/detector/wasm.ts b/src/detector/wasm.ts index f0d465a..48b9eb7 100644 --- a/src/detector/wasm.ts +++ b/src/detector/wasm.ts @@ -4,18 +4,20 @@ import { resolveLocaleDetectContext } from "../wc/locale-detect"; import type { LocaleChunk } from "../wc/types"; import { buildWordCounterResultFromChunks } from "./result-builder"; import { + createDetectorEvidencePreview, recordDetectorAccepted, recordDetectorFallback, recordDetectorWindow, + type DetectorFallbackReason, } from "./debug"; import { countSectionsWithResolvedDetector } from "./sections"; import { DETECTOR_ROUTE_POLICIES, LATIN_WASM_CORROBORATED_MIN_CONFIDENCE, + countScriptBearingCharsForRoute, isAmbiguousDetectorRoute, normalizeDetectorSampleForRoute, shouldAcceptLatinDetectorWindow, - shouldRunWasmDetector, type DetectorRouteTag, } from "./policy"; import { detectWithWhatlangWasm, WASM_DETECTOR_RUNTIME_UNAVAILABLE_MESSAGE } from "./whatlang-wasm"; @@ -106,6 +108,142 @@ type DetectorWindow = { text: string; }; +function resolveFallbackDebugOutcome( + window: DetectorWindow, + options: DetectorLocaleOptions, +): { + finalTag: string; + finalLocales?: string[]; +} { + const fallbackTag = getDetectorFallbackTag(window.routeTag); + if (window.routeTag !== DEFAULT_LOCALE) { + return { finalTag: fallbackTag }; + } + + const relabeled = reapplyDeferredLatinFallback( + [ + { + locale: fallbackTag, + text: window.text, + }, + ], + options, + ); + const finalLocales = relabeled.map((chunk) => chunk.locale); + if (finalLocales.length === 1) { + return { + finalTag: finalLocales[0]!, + }; + } + + return finalLocales.length > 1 + ? { + finalTag: fallbackTag, + finalLocales, + } + : { + finalTag: fallbackTag, + }; +} + +function buildEvidenceSample( + result: Awaited> | null, + remappedTag: string | null, +) { + return { + lang: result?.lang ?? null, + script: result?.script ?? null, + confidence: result?.confidence ?? null, + reliable: result?.reliable ?? null, + remappedTag, + }; +} + +function emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate, + rawResult, + rawRemappedTag, + normalizedResult, + normalizedRemappedTag, + decision, + debug, +}: { + window: DetectorWindow; + windowIndex: number; + normalizedSample: string; + eligible: boolean; + qualityGate: boolean; + rawResult: Awaited> | null; + rawRemappedTag: string | null; + normalizedResult: Awaited> | null; + normalizedRemappedTag: string | null; + decision: { + accepted: boolean; + path: "reliable" | "corroborated" | null; + finalTag: string; + finalLocales?: string[]; + fallbackReason: DetectorFallbackReason | null; + }; + debug?: DetectorLocaleOptions["detectorDebug"]; +}): void { + const evidence = debug?.evidence; + if (!evidence || !debug.emit) { + return; + } + + const routePolicy = DETECTOR_ROUTE_POLICIES[window.routeTag]; + const baseDetails = { + engine: "whatlang-wasm", + routeTag: window.routeTag, + windowIndex, + startIndex: window.startIndex, + endIndex: window.endIndex, + mode: evidence.mode, + section: evidence.section, + textLength: window.text.length, + normalizedLength: normalizedSample.length, + normalizedApplied: normalizedSample !== window.text, + scriptChars: countScriptBearingCharsForRoute(window.text, window.routeTag), + minScriptChars: routePolicy.minScriptChars, + eligible, + qualityGate, + raw: buildEvidenceSample(rawResult, rawRemappedTag), + normalized: buildEvidenceSample(normalizedResult, normalizedRemappedTag), + decision, + }; + + if (evidence.verbosity === "verbose") { + debug.emit( + "detector.window.evidence", + { + ...baseDetails, + text: window.text, + normalizedText: normalizedSample, + }, + { verbosity: "verbose" }, + ); + return; + } + + const textPreview = createDetectorEvidencePreview(window.text); + const normalizedPreview = createDetectorEvidencePreview(normalizedSample); + debug.emit( + "detector.window.evidence", + { + ...baseDetails, + textPreview: textPreview.preview, + textPreviewTruncated: textPreview.truncated, + normalizedPreview: normalizedPreview.preview, + normalizedPreviewTruncated: normalizedPreview.truncated, + }, + { verbosity: "compact" }, + ); +} + function buildDetectorWindows(chunks: LocaleChunk[]): DetectorWindow[] { const windows: DetectorWindow[] = []; @@ -139,6 +277,8 @@ function buildDetectorWindows(chunks: LocaleChunk[]): DetectorWindow[] { async function resolveWindowLocale( window: DetectorWindow, + windowIndex: number, + options: DetectorLocaleOptions, debug?: DetectorLocaleOptions["detectorDebug"], ): Promise { recordDetectorWindow(debug?.summary, window.routeTag); @@ -153,11 +293,43 @@ async function resolveWindowLocale( { verbosity: "verbose" }, ); - if (!shouldRunWasmDetector(window.text, window.routeTag)) { + const routePolicy = DETECTOR_ROUTE_POLICIES[window.routeTag]; + const scriptChars = countScriptBearingCharsForRoute(window.text, window.routeTag); + const eligible = scriptChars >= routePolicy.minScriptChars; + const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag); + const passesLatinQualityGate = + window.routeTag !== DEFAULT_LOCALE || shouldAcceptLatinDetectorWindow(window.text, normalizedSample); + + if (!eligible) { recordDetectorFallback(debug?.summary, "notEligible"); + const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult: null, + rawRemappedTag: null, + normalizedResult: null, + normalizedRemappedTag: null, + decision: { + accepted: false, + path: null, + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), + fallbackReason: "notEligible", + }, + debug, + }); debug?.emit?.("detector.window.fallback", { routeTag: window.routeTag, - finalTag: window.routeTag, + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), reason: "notEligible", }); return window.routeTag; @@ -165,10 +337,6 @@ async function resolveWindowLocale( const rawResult = await detectWithWhatlangWasm(window.text, window.routeTag); const rawRemapped = rawResult ? remapWhatlangResult(rawResult, window.routeTag) : null; - - const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag); - const passesLatinQualityGate = - window.routeTag !== DEFAULT_LOCALE || shouldAcceptLatinDetectorWindow(window.text, normalizedSample); const normalizedResult = normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) @@ -203,9 +371,34 @@ async function resolveWindowLocale( const candidates = [rawRemapped, normalizedRemapped].filter((value) => value !== null); if (candidates.length === 0) { recordDetectorFallback(debug?.summary, "noCandidate"); + const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult, + rawRemappedTag: rawRemapped?.tag ?? null, + normalizedResult, + normalizedRemappedTag: normalizedRemapped?.tag ?? null, + decision: { + accepted: false, + path: null, + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), + fallbackReason: "noCandidate", + }, + debug, + }); debug?.emit?.("detector.window.fallback", { routeTag: window.routeTag, - finalTag: getDetectorFallbackTag(window.routeTag), + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), reason: "noCandidate", }); return getDetectorFallbackTag(window.routeTag); @@ -228,6 +421,24 @@ async function resolveWindowLocale( ) ) { recordDetectorAccepted(debug?.summary, "reliable"); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult, + rawRemappedTag: rawRemapped?.tag ?? null, + normalizedResult, + normalizedRemappedTag: normalizedRemapped?.tag ?? null, + decision: { + accepted: true, + path: "reliable", + finalTag: strongestCandidate.tag, + fallbackReason: null, + }, + debug, + }); debug?.emit?.("detector.window.accepted", { routeTag: window.routeTag, finalTag: strongestCandidate.tag, @@ -256,6 +467,24 @@ async function resolveWindowLocale( corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE ) { recordDetectorAccepted(debug?.summary, "corroborated"); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult, + rawRemappedTag: rawRemapped.tag, + normalizedResult, + normalizedRemappedTag: normalizedRemapped.tag, + decision: { + accepted: true, + path: "corroborated", + finalTag: rawRemapped.tag, + fallbackReason: null, + }, + debug, + }); debug?.emit?.("detector.window.accepted", { routeTag: window.routeTag, finalTag: rawRemapped.tag, @@ -268,20 +497,71 @@ async function resolveWindowLocale( if (!hasReliableCorroboration && corroboratedConfidence >= LATIN_WASM_CORROBORATED_MIN_CONFIDENCE) { recordDetectorFallback(debug?.summary, "corroborationUnreliable"); + const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult, + rawRemappedTag: rawRemapped.tag, + normalizedResult, + normalizedRemappedTag: normalizedRemapped.tag, + decision: { + accepted: false, + path: null, + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), + fallbackReason: "corroborationUnreliable", + }, + debug, + }); debug?.emit?.("detector.window.fallback", { routeTag: window.routeTag, - finalTag: getDetectorFallbackTag(window.routeTag), + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), reason: "corroborationUnreliable", }); return getDetectorFallbackTag(window.routeTag); } } - recordDetectorFallback(debug?.summary, passesLatinQualityGate ? "belowThreshold" : "qualityGate"); + const fallbackReason = passesLatinQualityGate ? "belowThreshold" : "qualityGate"; + recordDetectorFallback(debug?.summary, fallbackReason); + const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options); + emitDetectorWindowEvidence({ + window, + windowIndex, + normalizedSample, + eligible, + qualityGate: passesLatinQualityGate, + rawResult, + rawRemappedTag: rawRemapped?.tag ?? null, + normalizedResult, + normalizedRemappedTag: normalizedRemapped?.tag ?? null, + decision: { + accepted: false, + path: null, + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), + fallbackReason, + }, + debug, + }); debug?.emit?.("detector.window.fallback", { routeTag: window.routeTag, - finalTag: getDetectorFallbackTag(window.routeTag), - reason: passesLatinQualityGate ? "belowThreshold" : "qualityGate", + finalTag: fallbackDebugOutcome.finalTag, + ...(fallbackDebugOutcome.finalLocales + ? { finalLocales: fallbackDebugOutcome.finalLocales } + : {}), + reason: fallbackReason, }); return getDetectorFallbackTag(window.routeTag); } @@ -300,8 +580,8 @@ export async function segmentTextByLocaleWithWasmDetector( const resolved = [...chunks]; const windows = buildDetectorWindows(chunks); - for (const window of windows) { - const resolvedLocale = await resolveWindowLocale(window, options.detectorDebug); + for (const [windowIndex, window] of windows.entries()) { + const resolvedLocale = await resolveWindowLocale(window, windowIndex, options, options.detectorDebug); for (let index = window.startIndex; index <= window.endIndex; index += 1) { const chunk = resolved[index]; if (!chunk) { diff --git a/test/command.test.ts b/test/command.test.ts index 6855ea1..b0c2771 100644 --- a/test/command.test.ts +++ b/test/command.test.ts @@ -126,6 +126,13 @@ function listDebugEventNames(stderr: string[]): string[] { .filter((event): event is string => typeof event === "string"); } +function findDebugEvents( + stderr: string[], + eventName: string, +): Array> { + return parseDebugEvents(stderr).filter((item) => item.event === eventName); +} + describe("batch path resolution", () => { test("expands directory recursively with deterministic ordering", async () => { const root = await makeTempFixture("batch-order"); @@ -290,6 +297,180 @@ describe("detector mode", () => { expect(eventNames.includes("detector.summary")).toBeTrue(); }); + test("rejects --detector-evidence without --debug", async () => { + const result = spawnSync( + process.execPath, + [ + "run", + "src/bin.ts", + "--detector", + "wasm", + "--detector-evidence", + "This sentence should clearly be detected as English for the wasm detector path.", + ], + { + cwd: process.cwd(), + encoding: "utf8", + }, + ); + + expect(result.status).toBe(1); + expect(result.stderr).toContain("`--detector-evidence` requires `--debug`."); + }); + + test("rejects --detector-evidence without --detector wasm", async () => { + const result = spawnSync( + process.execPath, + [ + "run", + "src/bin.ts", + "--debug", + "--detector-evidence", + "This sentence should clearly be detected as English for the wasm detector path.", + ], + { + cwd: process.cwd(), + encoding: "utf8", + }, + ); + + expect(result.status).toBe(1); + expect(result.stderr).toContain("`--detector-evidence` requires `--detector wasm`."); + }); + + test("emits compact detector evidence previews without verbose mode", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const sample = [ + "This sentence should clearly be detected as English for the wasm detector path.", + "", + "This second sentence adds enough length to force compact preview truncation while remaining strong prose for the detector quality gate.", + "This third sentence keeps the sample comfortably above the preview cap and introduces extra spacing.", + ].join("\n"); + + const output = await captureCli([ + "--detector", + "wasm", + "--format", + "raw", + "--debug", + "--detector-evidence", + sample, + ]); + + const evidenceEvents = findDebugEvents(output.stderr, "detector.window.evidence"); + expect(evidenceEvents.length).toBe(1); + const evidence = evidenceEvents[0]!; + expect(evidence.verbosity).toBe("compact"); + expect(evidence.mode).toBe("chunk"); + expect(evidence.section).toBe("all"); + expect(typeof evidence.textPreview).toBe("string"); + expect(typeof evidence.normalizedPreview).toBe("string"); + expect(evidence.textPreviewTruncated).toBeTrue(); + expect(evidence.text).toBeUndefined(); + expect(evidence.normalizedText).toBeUndefined(); + expect(String(evidence.textPreview)).not.toContain("\n"); + expect(String(evidence.textPreview)).not.toContain(" "); + }); + + test("reports hinted Latin fallback tags in detector evidence", async () => { + const output = await captureCli([ + "--detector", + "wasm", + "--format", + "raw", + "--debug", + "--detector-evidence", + "--latin-language", + "de", + "Über", + ]); + + const evidenceEvents = findDebugEvents(output.stderr, "detector.window.evidence"); + expect(evidenceEvents.length).toBe(1); + const evidenceDecision = evidenceEvents[0]?.decision as Record | undefined; + expect(evidenceDecision?.finalTag).toBe("de"); + + const fallbackEvents = findDebugEvents(output.stderr, "detector.window.fallback"); + expect(fallbackEvents.length).toBe(1); + expect(fallbackEvents[0]?.finalTag).toBe("de"); + }); + + test("emits full detector evidence text in verbose mode", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const sample = [ + "This sentence should clearly be detected as English for the wasm detector path.", + "", + "This second sentence adds enough length to make the verbose detector evidence payload interesting.", + ].join("\n"); + + const output = await captureCli([ + "--detector", + "wasm", + "--format", + "raw", + "--debug", + "--verbose", + "--detector-evidence", + sample, + ]); + + const evidenceEvents = findDebugEvents(output.stderr, "detector.window.evidence"); + expect(evidenceEvents.length).toBe(1); + const evidence = evidenceEvents[0]!; + expect(evidence.verbosity).toBe("verbose"); + expect(evidence.text).toBe(sample); + expect(typeof evidence.normalizedText).toBe("string"); + expect(String(evidence.text)).toContain("\n\n"); + expect(evidence.textPreview).toBeUndefined(); + expect(evidence.normalizedPreview).toBeUndefined(); + }); + + test("keeps detector evidence window counts stable across output modes", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const sample = + "This sentence should clearly be detected as English for the wasm detector path. This follow-up sentence keeps the window long enough for detector evidence regardless of output mode."; + + const collectorOutput = await captureCli([ + "--detector", + "wasm", + "--mode", + "collector", + "--format", + "raw", + "--debug", + "--detector-evidence", + sample, + ]); + const charOutput = await captureCli([ + "--detector", + "wasm", + "--mode", + "char", + "--format", + "raw", + "--debug", + "--detector-evidence", + sample, + ]); + + const collectorEvidence = findDebugEvents(collectorOutput.stderr, "detector.window.evidence"); + const charEvidence = findDebugEvents(charOutput.stderr, "detector.window.evidence"); + + expect(collectorEvidence.length).toBeGreaterThan(0); + expect(collectorEvidence.length).toBe(charEvidence.length); + expect(collectorEvidence[0]?.mode).toBe("collector"); + expect(charEvidence[0]?.mode).toBe("char"); + }); + test("adds debug detector summary to single-input json only when --debug is enabled", async () => { if (!hasWasmDetectorRuntime()) { return; @@ -706,6 +887,86 @@ describe("CLI batch output", () => { } }); + test("emits file-scoped compact detector evidence events in async batch runs", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-batch-detector-evidence-async"); + await writeFile( + join(root, "a.txt"), + "This sentence should clearly be detected as English for the wasm detector path. This second sentence keeps the detector window long enough for evidence output.", + ); + await writeFile( + join(root, "b.txt"), + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue et emette une preuve utile.", + ); + + const output = await captureCli([ + "--path", + root, + "--format", + "raw", + "--debug", + "--detector", + "wasm", + "--detector-evidence", + ]); + + const evidenceEvents = findDebugEvents(output.stderr, "detector.window.evidence"); + expect(evidenceEvents.length).toBeGreaterThanOrEqual(2); + expect( + evidenceEvents.every( + (item) => + item.scope === "file" && + item.verbosity === "compact" && + typeof item.path === "string" && + typeof item.textPreview === "string", + ), + ).toBeTrue(); + }); + + test("forwards file-scoped detector evidence events from worker batch runs", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-batch-detector-evidence-worker"); + await writeFile( + join(root, "a.txt"), + "This sentence should clearly be detected as English for the wasm detector path. This second sentence keeps the detector window long enough for evidence output.", + ); + await writeFile( + join(root, "b.txt"), + "Ceci est une phrase francaise suffisamment longue pour que le detecteur identifie correctement la langue et emette une preuve utile.", + ); + + const output = await captureCli([ + "--path", + root, + "--format", + "raw", + "--debug", + "--detector", + "wasm", + "--detector-evidence", + "--jobs", + "4", + ]); + + const evidenceEvents = findDebugEvents(output.stderr, "detector.window.evidence"); + expect(evidenceEvents.length).toBeGreaterThanOrEqual(2); + expect( + evidenceEvents.every( + (item) => + item.scope === "file" && + item.verbosity === "compact" && + typeof item.path === "string" && + typeof item.textPreview === "string", + ), + ).toBeTrue(); + }); + test("does not double count overlapping path inputs", async () => { const root = await makeTempFixture("cli-overlap"); const explicitPath = join(root, "a.txt"); @@ -1886,6 +2147,35 @@ describe("CLI debug diagnostics", () => { expect(reports.length).toBe(1); }); + test("creates deterministic detector evidence debug report name in cwd", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const root = await makeTempFixture("cli-detector-evidence-report-default-name"); + const previousCwd = process.cwd(); + + process.chdir(root); + try { + await captureCli([ + "--debug", + "--debug-report", + "--detector", + "wasm", + "--detector-evidence", + "This sentence should clearly be detected as English for the wasm detector path.", + ]); + } finally { + process.chdir(previousCwd); + } + + const entries = await readdir(root); + const reports = entries.filter((entry) => + /^wc-detector-evidence-\d{8}-\d{6}-utc-\d+(-\d+)?\.jsonl$/.test(entry), + ); + expect(reports.length).toBe(1); + }); + test("adds collision suffix for default debug report filenames", async () => { const root = await makeTempFixture("cli-debug-report-collision"); const fixedNow = new Date(Date.UTC(2026, 1, 16, 12, 34, 56)); @@ -1908,6 +2198,33 @@ describe("CLI debug diagnostics", () => { const report = await readFile(expectedPath, "utf8"); expect(report.includes('"event":"batch.resolve.start"')).toBeTrue(); }); + + test("adds collision suffix for default detector evidence report filenames", async () => { + const root = await makeTempFixture("cli-detector-evidence-report-collision"); + const fixedNow = new Date(Date.UTC(2026, 1, 16, 12, 34, 56)); + const baseName = "wc-detector-evidence-20260216-123456-utc-4321.jsonl"; + await writeFile(join(root, baseName), "existing"); + + const debug = createDebugChannel({ + enabled: true, + verbosity: "compact", + report: { + tee: false, + cwd: root, + autogeneratedNamePrefix: "wc-detector-evidence", + }, + now: () => fixedNow, + pid: 4321, + }); + const expectedPath = join(root, "wc-detector-evidence-20260216-123456-utc-4321-1.jsonl"); + + expect(debug.reportPath).toBe(expectedPath); + debug.emit("detector.window.evidence", { windowIndex: 0 }); + await debug.close(); + + const report = await readFile(expectedPath, "utf8"); + expect(report.includes('"event":"detector.window.evidence"')).toBeTrue(); + }); }); describe("extension filters", () => { From dfd560ea06a1b2c89f760e000baf3e825c998cea Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 20:54:41 +0800 Subject: [PATCH 22/23] docs(detector): finalize evidence contract and close implementation plan --- README.md | 14 +++- ...tector-evidence-phase5-docs-and-closure.md | 43 ++++++++++++ ...-detector-evidence-debug-implementation.md | 14 ++-- ...6-03-24-detector-evidence-debug-surface.md | 2 +- docs/schemas/debug-event-stream-contract.md | 70 ++++++++++++++++++- 5 files changed, 136 insertions(+), 7 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-detector-evidence-phase5-docs-and-closure.md diff --git a/README.md b/README.md index 1e5cf19..fb7c77c 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,7 @@ word-counter --path ./examples/test-case-multi-files-support --debug --verbose Use `--debug-report [path]` to route debug diagnostics to a JSONL report file: - no path: writes to current working directory with pattern `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` +- no path with `--detector-evidence`: writes with pattern `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` - path provided: writes to the specified location - default-name collision handling: appends `-` suffix to avoid overwriting existing files - explicit path validation: existing directories are rejected (explicit paths are treated as file targets) @@ -295,7 +296,15 @@ Use `--debug-report [path]` to route debug diagnostics to a JSONL report file: By default with `--debug-report`, debug lines are file-only (not mirrored to terminal). Use `--debug-report-tee` (alias: `--debug-tee`) to mirror to both file and `stderr`. -Flag dependencies: `--verbose` requires `--debug`; `--debug-report` requires `--debug`; `--debug-report-tee`/`--debug-tee` requires `--debug-report`. +Flag dependencies: `--verbose` requires `--debug`; `--detector-evidence` requires `--debug` and `--detector wasm`; `--debug-report` requires `--debug`; `--debug-report-tee`/`--debug-tee` requires `--debug-report`. + +Use `--detector-evidence` to add per-window detector evidence onto the same debug stream: + +- only meaningful with `--detector wasm` +- compact mode emits bounded single-line previews plus detector decision metadata +- verbose mode emits full raw detector windows and full normalized samples +- evidence remains detector-window based even when output mode changes to `collector`, `char`, or another counting mode +- fallback evidence reports the post-fallback final tag used by downstream counting output; in rare split-relabel cases it may also include `finalLocales` Examples: @@ -304,6 +313,9 @@ word-counter --path ./examples/test-case-multi-files-support --debug --debug-rep word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-report-tee word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-tee +word-counter --detector wasm --debug --detector-evidence "This sentence should clearly be detected as English for the wasm detector path." +word-counter --detector wasm --debug --verbose --detector-evidence "This sentence should clearly be detected as English for the wasm detector path." +word-counter --detector wasm --debug --detector-evidence --debug-report ``` Skip details stay debug-gated and can be suppressed with `--quiet-skips`. diff --git a/docs/plans/jobs/2026-03-24-detector-evidence-phase5-docs-and-closure.md b/docs/plans/jobs/2026-03-24-detector-evidence-phase5-docs-and-closure.md new file mode 100644 index 0000000..cfa1990 --- /dev/null +++ b/docs/plans/jobs/2026-03-24-detector-evidence-phase5-docs-and-closure.md @@ -0,0 +1,43 @@ +--- +title: "detector evidence phase 5 docs and closure" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Finish phase 5 of `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` by documenting the detector evidence event contract, the evidence-specific debug-report naming branch, and the fallback-tag alignment behavior. + +## What Changed + +- Updated `docs/schemas/debug-event-stream-contract.md` to document: + - `detector.window.evidence` + - evidence-enabled autogenerated debug-report filenames + - compact vs verbose evidence payload rules + - optional `decision.finalLocales` and fallback-event `finalLocales` for split relabeling cases + - post-fallback `finalTag` behavior for hinted Latin fallback windows +- Updated `README.md` to document: + - `--detector-evidence` flag dependencies and examples + - evidence-specific autogenerated report filenames + - compact and verbose detector evidence behavior + - the fallback-tag alignment note and optional `finalLocales` disclosure +- Marked the detector-evidence implementation plan completed. + +## Why + +- The implementation was already complete through code and tests, but the published schema and user-facing docs still lagged the behavior. +- The fallback-tag fix introduced a small but important contract detail: debug evidence should report the same post-fallback tag that users see in final counting output, and split relabeling may surface as optional `finalLocales`. + +## Verification + +- Documentation-only pass; no code or tests were required for this phase. + +## Related Plans + +- `docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md` +- `docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md` diff --git a/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md index ff81a27..0c29d2f 100644 --- a/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md +++ b/docs/plans/plan-2026-03-24-detector-evidence-debug-implementation.md @@ -2,7 +2,7 @@ title: "detector evidence debug implementation" created-date: 2026-03-24 modified-date: 2026-03-24 -status: active +status: completed agent: Codex --- @@ -95,9 +95,9 @@ Implement `--detector-evidence` as a first-class extension of the existing debug ### Phase 5 - Schema Docs and CLI Docs -- [ ] Update `docs/schemas/debug-event-stream-contract.md` for the new evidence event and evidence-enabled autogenerated filename branch. -- [ ] Update user-facing CLI/docs guidance where debug report naming and detector evidence usage need clarification. -- [ ] Add completion job records under `docs/plans/jobs/` after implementation lands. +- [x] Update `docs/schemas/debug-event-stream-contract.md` for the new evidence event and evidence-enabled autogenerated filename branch. +- [x] Update user-facing CLI/docs guidance where debug report naming and detector evidence usage need clarification. +- [x] Add completion job records under `docs/plans/jobs/` after implementation lands. ## Compatibility Gates @@ -123,3 +123,9 @@ Implement `--detector-evidence` as a first-class extension of the existing debug ## Related Plans - `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` + +## Related Jobs + +- `docs/plans/jobs/2026-03-24-detector-evidence-phases-1-4-implementation.md` +- `docs/plans/jobs/2026-03-24-detector-evidence-fallback-tag-fix.md` +- `docs/plans/jobs/2026-03-24-detector-evidence-phase5-docs-and-closure.md` diff --git a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md index bd9f066..5216d59 100644 --- a/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md +++ b/docs/researches/research-2026-03-24-detector-evidence-debug-surface.md @@ -2,7 +2,7 @@ title: "detector evidence debug surface" created-date: 2026-03-24 modified-date: 2026-03-24 -status: draft +status: completed agent: Codex --- diff --git a/docs/schemas/debug-event-stream-contract.md b/docs/schemas/debug-event-stream-contract.md index edf53ad..89bd7ce 100644 --- a/docs/schemas/debug-event-stream-contract.md +++ b/docs/schemas/debug-event-stream-contract.md @@ -1,6 +1,7 @@ --- title: "Debug Event Stream Contract" created-date: 2026-03-24 +modified-date: 2026-03-24 status: completed agent: Codex --- @@ -96,12 +97,76 @@ Example: When `--debug-report` is used without an explicit path, the autogenerated filename is: -`wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` +- `wc-debug-YYYYMMDD-HHmmss-utc-.jsonl` for ordinary debug runs +- `wc-detector-evidence-YYYYMMDD-HHmmss-utc-.jsonl` when `--detector-evidence` is enabled Collision handling: - append `-` before `.jsonl` if the autogenerated path already exists +## Detector Evidence Event + +When `--debug --detector wasm --detector-evidence` is enabled, the debug stream may include `detector.window.evidence` events. + +Compact evidence example: + +```json +{ + "schemaVersion": 1, + "timestamp": "2026-03-24T05:32:21.123Z", + "runId": "wc-debug-1774330341123-55149", + "topic": "detector", + "scope": "file", + "event": "detector.window.evidence", + "verbosity": "compact", + "path": "docs/example.md", + "engine": "whatlang-wasm", + "routeTag": "und-Latn", + "windowIndex": 0, + "mode": "chunk", + "section": "all", + "textPreview": "Hello world from alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu", + "textPreviewTruncated": true, + "normalizedPreview": "Hello world from alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu", + "normalizedPreviewTruncated": false, + "eligible": true, + "qualityGate": true, + "raw": { + "lang": "eng", + "script": "Latin", + "confidence": 0.93, + "reliable": true, + "remappedTag": "en" + }, + "normalized": { + "lang": "eng", + "script": "Latin", + "confidence": 0.91, + "reliable": true, + "remappedTag": "en" + }, + "decision": { + "accepted": true, + "path": "reliable", + "finalTag": "en", + "fallbackReason": null + } +} +``` + +Field notes: + +- compact evidence uses single-line whitespace-collapsed previews capped at `160` Unicode code points per preview field +- verbose evidence replaces preview fields with full `text` and `normalizedText` +- `decision.finalTag` reports the post-fallback tag seen by downstream counting output +- `decision.finalLocales` is optional and only appears when deferred Latin fallback splits a fallback window into multiple relabeled locales instead of one final tag +- `raw.remappedTag` and `normalized.remappedTag` may be `null` when the detector produced no supported remap + +Related fallback event notes: + +- `detector.window.fallback` may also include `finalLocales` in the same split-relabel case +- `detector.window.fallback.finalTag` follows the same post-fallback rule as `detector.window.evidence.decision.finalTag` + ## Compatibility Notes - Version `1` is additive for event payload structure because event names and event-specific top-level fields remain intact. @@ -113,6 +178,9 @@ Collision handling: - added shared debug event envelope version `1` - added `runId`, `topic`, `scope`, `timestamp`, and `verbosity` - changed autogenerated debug-report filenames to the UTC `...-utc-...jsonl` pattern + - added `wc-detector-evidence-...jsonl` autogenerated filenames for evidence-enabled debug reports + - added `detector.window.evidence` as the high-detail detector evidence event + - compatibility note: detector fallback evidence now reports post-fallback hinted Latin tags and may include optional `finalLocales` - compatibility note: filename pattern changed for automation consumers - `v0.1.5-canary.2` and earlier: - debug event lines were flat JSON without a shared envelope From 9902c86c0d106c04c6ad1a14da9455c4fe1d5a21 Mon Sep 17 00:00:00 2001 From: nakolus Date: Tue, 24 Mar 2026 21:20:06 +0800 Subject: [PATCH 23/23] fix(detector): preserve latin hints and wrapped prose in wasm mode --- ...findings-wasm-detector-regression-fixes.md | 35 ++++++++++ src/detector/policy.ts | 65 ++++++++++++------- src/detector/wasm.ts | 44 ++++++++++++- test/word-counter.test.ts | 51 +++++++++++++++ 4 files changed, 172 insertions(+), 23 deletions(-) create mode 100644 docs/plans/jobs/2026-03-24-review-findings-wasm-detector-regression-fixes.md diff --git a/docs/plans/jobs/2026-03-24-review-findings-wasm-detector-regression-fixes.md b/docs/plans/jobs/2026-03-24-review-findings-wasm-detector-regression-fixes.md new file mode 100644 index 0000000..c4c91bd --- /dev/null +++ b/docs/plans/jobs/2026-03-24-review-findings-wasm-detector-regression-fixes.md @@ -0,0 +1,35 @@ +--- +title: "Review findings wasm detector regression fixes" +created-date: 2026-03-24 +status: completed +agent: Codex +--- + +## Goal + +Address the follow-up review findings in the WASM detector path without regressing existing CLI or library behavior. + +## What Changed + +- Updated `src/detector/wasm.ts` so accepted WASM Latin windows reapply rule-based Latin hint segmentation before fallback relabeling. +- Preserved detector-selected locales for the surrounding accepted Latin text while keeping built-in and custom hint-derived subspans such as Spanish or Polish chunks. +- Updated `src/detector/policy.ts` so the Latin quality gate evaluates contiguous non-technical prose blocks instead of requiring each physical line to meet the prose threshold on its own. +- Added regression coverage in `test/word-counter.test.ts` for: + - hard-wrapped English prose that should still be accepted by the WASM detector + - built-in Latin hint preservation inside accepted WASM windows + - custom Latin hint preservation inside accepted WASM windows + +## Why + +- Deferring all Latin hinting before detector routing caused accepted WASM windows to swallow more specific Latin hint spans. +- The line-by-line quality gate introduced false negatives for normal hard-wrapped markdown prose. + +## Verification + +- Ran `bun test test/word-counter.test.ts` +- Ran `bun test test/command.test.ts` + +## Related Plans + +- `docs/plans/plan-2026-03-24-wasm-mode-latin-hint-ordering.md` +- `docs/plans/plan-2026-03-24-debug-observability-and-wasm-latin-quality.md` diff --git a/src/detector/policy.ts b/src/detector/policy.ts index f6920db..0bdb577 100644 --- a/src/detector/policy.ts +++ b/src/detector/policy.ts @@ -81,22 +81,6 @@ function countLatinWords(text: string): number { return text.match(LATIN_WORD_REGEX)?.length ?? 0; } -function isSentenceLikeLatinLine( - line: string, - latinWords: number, - technicalLike: boolean, -): boolean { - if (latinWords < 4) { - return false; - } - - if (/[.!?]/u.test(line)) { - return true; - } - - return !technicalLike && latinWords >= 5; -} - function isTechnicalLikeLatinLine(line: string, latinWords: number): boolean { const trimmed = line.trim(); if (!trimmed) { @@ -126,6 +110,22 @@ function isTechnicalLikeLatinLine(line: string, latinWords: number): boolean { return false; } +function shouldTreatLatinProseBlockAsSentenceLike( + latinWords: number, + lineCount: number, + hasSentencePunctuation: boolean, +): boolean { + if (latinWords < 4) { + return false; + } + + if (hasSentencePunctuation) { + return true; + } + + return lineCount <= 1 ? latinWords >= 5 : latinWords >= 8; +} + export function shouldAcceptLatinDetectorWindow( text: string, normalizedSample: string, @@ -137,10 +137,30 @@ export function shouldAcceptLatinDetectorWindow( let proseWords = 0; let technicalWords = 0; + let proseBlockWords = 0; + let proseBlockLines = 0; + let proseBlockHasSentencePunctuation = false; + + const flushProseBlock = () => { + if ( + shouldTreatLatinProseBlockAsSentenceLike( + proseBlockWords, + proseBlockLines, + proseBlockHasSentencePunctuation, + ) + ) { + proseWords += proseBlockWords; + } + + proseBlockWords = 0; + proseBlockLines = 0; + proseBlockHasSentencePunctuation = false; + }; for (const rawLine of text.split(/\r?\n/u)) { const line = rawLine.trim(); if (!line || line === "---" || line === "```") { + flushProseBlock(); continue; } @@ -150,16 +170,17 @@ export function shouldAcceptLatinDetectorWindow( } const technicalLike = isTechnicalLikeLatinLine(line, latinWords); - const sentenceLike = isSentenceLikeLatinLine(line, latinWords, technicalLike); - - if (sentenceLike) { - proseWords += latinWords; - } - if (technicalLike) { + flushProseBlock(); technicalWords += latinWords; + continue; } + + proseBlockWords += latinWords; + proseBlockLines += 1; + proseBlockHasSentencePunctuation ||= /[.!?]/u.test(line); } + flushProseBlock(); return proseWords >= 4 && proseWords >= technicalWords; } diff --git a/src/detector/wasm.ts b/src/detector/wasm.ts index 48b9eb7..0bfd001 100644 --- a/src/detector/wasm.ts +++ b/src/detector/wasm.ts @@ -41,6 +41,17 @@ function createDeferredLatinPreSegmentOptions( }; } +function createRuleOnlyLatinOptions( + options: DetectorLocaleOptions, +): DetectorLocaleOptions { + return { + ...options, + latinLanguageHint: undefined, + latinTagHint: undefined, + latinLocaleHint: undefined, + }; +} + function mergeAdjacentChunks(chunks: LocaleChunk[]): LocaleChunk[] { if (chunks.length === 0) { return chunks; @@ -84,6 +95,36 @@ function reapplyDeferredLatinFallback( return mergeAdjacentChunks(relabeled); } +function reapplyResolvedLatinHintRules( + resolvedChunks: LocaleChunk[], + originalChunks: LocaleChunk[], + options: DetectorLocaleOptions, +): LocaleChunk[] { + const relabeled: LocaleChunk[] = []; + const ruleOnlyOptions = createRuleOnlyLatinOptions(options); + + for (let index = 0; index < resolvedChunks.length; index += 1) { + const chunk = resolvedChunks[index]; + const originalChunk = originalChunks[index]; + if (!chunk || !originalChunk) { + continue; + } + + if (originalChunk.locale !== DEFAULT_LOCALE || chunk.locale === DEFAULT_LOCALE) { + relabeled.push(chunk); + continue; + } + + const hintedChunks = segmentTextByLocale(chunk.text, ruleOnlyOptions).map((hintedChunk) => ({ + locale: hintedChunk.locale === DEFAULT_LOCALE ? chunk.locale : hintedChunk.locale, + text: hintedChunk.text, + })); + relabeled.push(...hintedChunks); + } + + return mergeAdjacentChunks(relabeled); +} + function shouldAcceptDetectorTag( routeTag: DetectorRouteTag, confidence: number | undefined, @@ -597,7 +638,8 @@ export async function segmentTextByLocaleWithWasmDetector( options.detectorDebug?.emit?.("detector.summary", options.detectorDebug.summary, { verbosity: "compact", }); - return reapplyDeferredLatinFallback(resolved, options); + const hintRelabeled = reapplyResolvedLatinHintRules(resolved, chunks, options); + return reapplyDeferredLatinFallback(hintRelabeled, options); } export async function wordCounterWithWasmDetector( diff --git a/test/word-counter.test.ts b/test/word-counter.test.ts index 7db5f59..1dea233 100644 --- a/test/word-counter.test.ts +++ b/test/word-counter.test.ts @@ -41,6 +41,16 @@ const WASM_LATIN_QUALITY_FIXTURES = [ ].join("\n"), expectedLocale: "en", }, + { + id: "latin-prose-en-hard-wrapped-short-lines", + text: [ + "This guide explains", + "expected behavior clearly", + "for detector quality", + "checks in docs", + ].join("\n"), + expectedLocale: "en", + }, { id: "latin-tech-cli-help", text: [ @@ -307,6 +317,24 @@ describe("detector entrypoint", () => { expect(chunks.map((chunk) => chunk.text)).toEqual(["el ", "niño"]); }); + test("preserves built-in Latin hint rules inside accepted wasm detector windows", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const chunks = await segmentTextByLocaleWithDetector( + [ + "This guide explains the feature clearly for readers and keeps the paragraph long enough for reliable English detection.", + "It also includes a borrowed word niño inside the same detector window to check hint preservation.", + ].join(" "), + { detector: "wasm" }, + ); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["en", "es"]); + expect(chunks[0]?.text).toContain("borrowed word "); + expect(chunks[1]?.text).toBe("niño inside the same detector window to check hint preservation."); + }); + test("reapplies custom Latin hint rules after unresolved wasm detector evaluation", async () => { const chunks = await segmentTextByLocaleWithDetector("Zażółć gęślą jaźń", { detector: "wasm", @@ -317,6 +345,29 @@ describe("detector entrypoint", () => { expect(chunks.map((chunk) => chunk.locale)).toEqual(["pl"]); }); + test("preserves custom Latin hint rules inside accepted wasm detector windows", async () => { + if (!hasWasmDetectorRuntime()) { + return; + } + + const chunks = await segmentTextByLocaleWithDetector( + [ + "This guide explains the feature clearly for readers and keeps the paragraph long enough for reliable English detection.", + "A custom hinted term Zażółć should remain Polish inside the accepted detector window.", + ].join(" "), + { + detector: "wasm", + latinHintRules: [{ tag: "pl", pattern: "[ąćęłńóśźżĄĆĘŁŃÓŚŹŻ]" }], + useDefaultLatinHints: false, + }, + ); + + expect(chunks.map((chunk) => chunk.locale)).toEqual(["en", "pl"]); + expect(chunks[1]?.text).toBe( + "Zażółć should remain Polish inside the accepted detector window.", + ); + }); + test("segments text through detector entrypoint", async () => { const chunks = await segmentTextByLocaleWithDetector("Hello 世界", { detector: "regex" });