From d5d4256d4bba50856d8a63f95963d1eebe81ed15 Mon Sep 17 00:00:00 2001
From: Arber Xhindoli <14798762+arberx@users.noreply.github.com>
Date: Wed, 3 Jun 2026 16:05:23 -0400
Subject: [PATCH 1/4] feat: surface critical per-page defects, make audit
output agent-native (2.0.0)
Resolves #42. Binary structural defects (an H1 count other than one, a
missing
, a missing meta description) were detected per page but
lost in sitemap aggregation: prioritizedFixes ranked by prevalence only,
the factor score averaged them away to a passing grade, and
crossCuttingIssues was keyed by factor. A homepage split across four H1s
appeared nowhere in the summary.
- Detect critical defects straight from the DOM, independent of scoring,
so no existing score, grade, or exit code changes. New criticalDefects
rollup on sitemap/static reports plus a Critical Defects section in the
text and markdown output, grouped by defect with every affected page
named (homepage and high sitemap-priority pages first). Shown even with
--top-issues.
- Make the output agent-native: prioritizedFixes is now a structured
PrioritizedFix[] (stable id, kind, severity, full affectedPages,
affectsHomepage, prevalencePct, summary) instead of prose strings, and
every report carries a schemaVersion so parsers detect shape drift.
- No truncation in the end-of-report sections: every issue and every
affected page is listed.
BREAKING CHANGE: SitemapAuditReport.prioritizedFixes is now PrioritizedFix[]
(read .summary for the previous one-liner); reports gain a required
schemaVersion field. Bumped to 2.0.0.
Co-Authored-By: Claude Opus 4.8
---
CHANGELOG.md | 12 +
docs/api.md | 19 +-
docs/cli.md | 8 +-
package.json | 2 +-
skills/aeo/SKILL.md | 12 +-
src/cli.ts | 3 +-
src/critical-defects.ts | 148 ++++++++++++
src/formatters/markdown.ts | 34 ++-
src/formatters/text.ts | 34 ++-
src/index.ts | 14 ++
src/schema.ts | 9 +
src/sitemap.ts | 78 ++++++-
src/static-audit.ts | 9 +-
src/types.ts | 103 +++++++-
test/critical-defects.test.ts | 363 +++++++++++++++++++++++++++++
test/sitemap-cross-cutting.test.ts | 2 +
test/static-audit.test.ts | 47 ++++
17 files changed, 874 insertions(+), 23 deletions(-)
create mode 100644 src/critical-defects.ts
create mode 100644 src/schema.ts
create mode 100644 test/critical-defects.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5008e8a..475c555 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
# Changelog
+## 2.0.0 (2026-06-03)
+
+### Breaking
+- **`SitemapAuditReport.prioritizedFixes` is now a structured `PrioritizedFix[]`, not `string[]`.** Each entry is a typed object — `{ kind, id, title, recommendation, severity?, affectedPages, affectsHomepage, prevalencePct, avgGrade?, summary }` — so an AI agent can act on the ranked to-do list without regex-parsing prose. The human-readable one-liner is preserved on `.summary`; migrate by reading `prioritizedFixes.map(f => f.summary)`. The text/markdown reports are unchanged in spirit (they render the structured fixes, now spelling out every affected page).
+- **New `schemaVersion` field on `AuditReport` and `SitemapAuditReport`** (exported `SCHEMA_VERSION`, currently `"1.0"`). It versions the report's JSON shape independently of the npm package version so agent parsers can detect breaking drift instead of failing silently. Treat the absence of the field as "pre-2.0 / legacy shape."
+
+### Added
+- **Critical per-page defects surfaced by impact, not prevalence (#42).** Sitemap and static-directory reports now include a `criticalDefects` rollup and a **Critical Defects** section (text + markdown) that lists binary, one-line-fix structural defects — an `
` count other than one, a missing ``, a missing meta description — **regardless of how few pages exhibit them**. Previously these were detected per page but lost in aggregation: `prioritizedFixes` ranked only by prevalence (so a defect on a single page was structurally excluded), the factor score averaged the defect away to a passing grade, and `crossCuttingIssues` was keyed by factor, never the specific defect. An unambiguous, high-impact defect on the most important page (e.g. a homepage split across four `
`s, or a `/contact-us` page with none) appeared nowhere in the top-level summary. Now each defect names **every** offending page (homepage and high sitemap-`priority` pages first), and critical-severity defects are promoted to the **top** of `prioritizedFixes`. Shown even with `--top-issues`.
+ - The end-of-report summaries no longer truncate: the Critical Defects block and each prioritized fix list **every** affected page (no "+N more"), and `prioritizedFixes` reports every cross-cutting issue ordered by prevalence rather than a top-5 slice — a fix the audit computed always reaches the report.
+ - New `detectCriticalDefects()`, `buildCriticalDefects()`, and `SCHEMA_VERSION` exports plus `CriticalDefect`, `CriticalDefectGroup`, `CriticalDefectAffectedPage`, `CriticalDefectId`, `CriticalDefectSeverity`, and `PrioritizedFix` types. `AuditReport` gains `criticalDefects` and `schemaVersion`; `SitemapAuditReport` gains `criticalDefects` and `schemaVersion`; `SitemapPageResult` gains the page's sitemap `priority`.
+ - Detection is independent of the weighted factor scores, so **no existing audit scores or grades change** (and exit codes are unaffected).
+
## 1.13.0 (2026-05-31)
### Added
diff --git a/docs/api.md b/docs/api.md
index 8f77b11..1a20e0b 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -33,14 +33,26 @@ const report = await runSitemapAudit('https://example.com', {
factors: ['schema-validity', 'structured-data'], // Optional subset
})
-console.log(report.aggregateGrade) // 'B+'
-console.log(report.pagesAudited) // 22
+console.log(report.schemaVersion) // '1.0', JSON shape version (see "Machine-readable output")
+console.log(report.aggregateGrade) // 'B+'
+console.log(report.pagesAudited) // 22
+console.log(report.criticalDefects) // Binary per-page defects (multiple/missing H1, missing title/meta), grouped by defect
console.log(report.crossCuttingIssues) // Per-factor rollup with affectedUrls for every recommendation
-console.log(report.prioritizedFixes) // Top 5 fixes ranked by site-wide impact
+console.log(report.prioritizedFixes) // Ranked PrioritizedFix[]: critical defects first, then cross-cutting by impact
```
Each entry in `crossCuttingIssues[].topIssues` carries a `recommendation` plus the exact `affectedUrls` so you can attribute each problem to specific pages, e.g. "FAQPage duplicate" pointing at every blog post that has it.
+`criticalDefects` surfaces **binary structural defects by impact, not prevalence**. The cross-cutting rollup ranks by how many pages a factor affects, so an unambiguous one-line-fix defect on a single important page (a homepage split across four `
`s, or a `/contact-us` page with none) would otherwise be averaged into a passing factor grade and excluded from `prioritizedFixes`. Each group names the offending pages (homepage and high sitemap-`priority` pages first), and the critical-severity ones lead `prioritizedFixes`.
+
+### Machine-readable output (for AI agents)
+
+`--format json` and these return values are the contract for programmatic use. The report is built to be acted on, not just rendered:
+
+- **`schemaVersion`** (on `AuditReport` and `SitemapAuditReport`, exported as `SCHEMA_VERSION`) versions the JSON shape independently of the npm version. Pin to it and treat a major bump as breaking; treat its absence as a pre-2.0 report.
+- **`prioritizedFixes: PrioritizedFix[]`** is the ranked, pre-computed to-do list, so an agent need not average factor scores and re-rank. Each fix carries a stable `id` (a defect id like `"multiple-h1"` or a factor id like `"technical-seo"`), `kind`, an optional `severity`, the complete `affectedPages` array (never truncated), `affectsHomepage`, `prevalencePct`, and a human `summary`.
+- **Stable identifiers** on the decision surface (`criticalDefects[].id`, `prioritizedFixes[].id` / `kind`) let integrations key on codes, not on matching message strings.
+
## Static output (offline, from disk)
```ts
@@ -55,6 +67,7 @@ if (result.kind === 'single') {
console.log(result.report.overallGrade) // single .html file → AuditReport
} else {
console.log(result.report.aggregateGrade) // directory → SitemapAuditReport shape
+ console.log(result.report.criticalDefects)
console.log(result.report.crossCuttingIssues)
}
```
diff --git a/docs/cli.md b/docs/cli.md
index d700aff..7786472 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -21,6 +21,8 @@ npx @ainyc/aeo-audit https://example.com --format json
npx @ainyc/aeo-audit https://example.com --format markdown
```
+`--format json` is the contract for programmatic and agent consumers: every report carries a `schemaVersion` (so a parser can detect breaking shape drift) and sitemap reports expose a `criticalDefects` rollup plus a ranked `prioritizedFixes` array of structured objects. See [api.md](api.md#machine-readable-output-for-ai-agents) for the field shapes.
+
## Running a subset of factors
```bash
@@ -76,7 +78,7 @@ npx @ainyc/aeo-audit https://example.com --sitemap https://example.com/sitemap.x
# Cap the number of pages (default 200, sorted by sitemap priority)
npx @ainyc/aeo-audit https://example.com --sitemap --limit 50
-# Skip per-page output and show only cross-cutting issues
+# Skip per-page output and show only the cross-cutting issues and critical defects
npx @ainyc/aeo-audit https://example.com --sitemap --top-issues
# Rewrite each 's origin to the target you named (audit staging with prod's sitemap)
@@ -92,6 +94,8 @@ Auto-discovery checks `/sitemap.xml` → `/sitemap-index.xml` → `Sitemap:` dir
When the sitemap has more URLs than `--limit`, the run audits the highest-priority pages and prints a notice to stderr listing how many were skipped and how to audit them all.
+A **Critical Defects** section lists binary, one-line-fix structural defects (an `
` count other than one, a missing ``, a missing meta description) surfaced **regardless of how few pages they affect**, with the offending pages named (homepage and high sitemap-`priority` pages first). These would otherwise be averaged into a passing factor grade and excluded from the prevalence-ranked fixes; the critical-severity ones also lead the prioritized fix list. The section is shown even with `--top-issues`. See the machine-readable shapes in [api.md](api.md#machine-readable-output-for-ai-agents).
+
The optional in-process factors are honored per page: pass `--include-geo` and/or `--include-agent-skills` to add them to every audited page. `--lighthouse` is the exception: it cannot be combined with `--sitemap` because each PageSpeed Insights call takes 15-30s.
## Static-output mode
@@ -191,7 +195,7 @@ When fetching `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml` t
| `--lighthouse` | Include the optional Lighthouse factor (Performance + Accessibility + Best Practices, mobile strategy) via Google PageSpeed Insights. Single-URL only; cannot combine with `--sitemap` or `--detect-platform`. Adds ~15-30s. Set `PAGESPEED_API_KEY` env var to lift anonymous rate limits. |
| `--sitemap [url]` | Audit all pages from the sitemap. Auto-discovery tries `/sitemap.xml`, then `/sitemap-index.xml`, then `Sitemap:` directives in `/robots.txt`. Pass an explicit URL to override. |
| `--limit ` | Max pages to audit in sitemap mode (default 200, sorted by sitemap priority) |
-| `--top-issues` | In sitemap mode, skip per-page output and show only cross-cutting issues |
+| `--top-issues` | In sitemap mode, skip per-page output and show only the cross-cutting issues and critical defects |
| `--detect-platform` | Identify the platform/CMS/framework powering the site instead of running an audit |
| `--urls ` | In `--detect-platform` mode, run on multiple URLs. `` is a file path (one URL per line), a comma-separated list, or `-` for stdin |
| `--concurrency ` | In `--detect-platform` batch mode, max in-flight fetches (default 5) |
diff --git a/package.json b/package.json
index 454d56a..768ff50 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@ainyc/aeo-audit",
- "version": "1.13.0",
+ "version": "2.0.0",
"description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 16 ranking factors that determine AI citation.",
"type": "module",
"main": "./dist/index.js",
diff --git a/skills/aeo/SKILL.md b/skills/aeo/SKILL.md
index 295f749..1d5a96f 100644
--- a/skills/aeo/SKILL.md
+++ b/skills/aeo/SKILL.md
@@ -108,7 +108,7 @@ npx @ainyc/aeo-audit@1 "" --sitemap --top-issues --format json
Flags:
- `--sitemap [url]` — auto-discover the sitemap (tries `/sitemap.xml`, then `/sitemap-index.xml`, then `Sitemap:` directives in `/robots.txt`) or provide an explicit URL
- `--limit ` — cap pages audited (default 200, sorted by sitemap priority)
-- `--top-issues` — skip per-page output, show only cross-cutting patterns
+- `--top-issues` — skip per-page output, show only cross-cutting patterns and critical defects
- `--rewrite-sitemap-origin` — rewrite every ``'s origin to the target URL's origin (preserving path/query) before crawling. Use when the sitemap hardcodes the prod/canonical domain but you want to audit a staging host or local dev server.
- `--require-meta` — force exit `1` if any audited page is missing ``, regardless of overall score (useful as a CI gate)
- `--include-geo` / `--include-agent-skills` — honored per page in sitemap mode (adds the optional geographic-signals / agent-skill-exposure factors). `--lighthouse` is not available with `--sitemap`.
@@ -117,9 +117,17 @@ Pages are audited with bounded concurrency (5 in flight) to avoid hammering the
Returns:
- Per-page scores and grades
+- **Critical defects** — binary, one-line-fix structural defects (an `
` count other than one, a missing ``, a missing meta description) surfaced **regardless of how few pages they affect**, with the offending pages named (homepage and high sitemap-`priority` pages first). These would otherwise be averaged into a passing factor grade; the JSON field is `criticalDefects` and critical-severity ones are also promoted to the top of `prioritizedFixes`. Shown even with `--top-issues`.
- Cross-cutting issues (factors failing across multiple pages)
- Aggregate score and grade
-- Prioritized fixes ranked by site-wide impact
+- Prioritized fixes (critical defects first, then ranked by site-wide impact)
+
+#### Machine-readable output (for agents)
+
+Use `--format json`; it is the contract for programmatic use. Key fields for acting on the result without parsing prose:
+- `schemaVersion` (on every audit report) versions the JSON shape independently of the package version — pin to it and treat a major bump as breaking; absence means a pre-2.0 report.
+- `prioritizedFixes` is a ranked array of objects, each with a stable `id`, `kind`, optional `severity`, the complete `affectedPages` list (never truncated), `affectsHomepage`, `prevalencePct`, and a human `summary`. It's the pre-computed to-do list — no need to re-rank factor scores yourself.
+- Stable identifiers (`criticalDefects[].id`, `prioritizedFixes[].id`) let integrations key on codes rather than message strings.
#### Auxiliary File Diagnostics
diff --git a/src/cli.ts b/src/cli.ts
index e0d4c8c..80d11c7 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -264,7 +264,8 @@ Options:
explicit URL to override. Pages are fetched with bounded concurrency (5).
--limit Max pages to audit in sitemap mode (default 200, sorted by sitemap priority).
When the sitemap exceeds the limit, a notice is printed to stderr.
- --top-issues In sitemap mode, skip per-page output and show only cross-cutting issues
+ --top-issues In sitemap mode, skip per-page output and show only the cross-cutting
+ issues and critical defects
--detect-platform Detect what platform/CMS/framework the site is built on (WordPress,
Webflow, Shopify, Next.js, etc.) instead of running a full audit.
--urls In --detect-platform mode, run on multiple URLs. can be a path
diff --git a/src/critical-defects.ts b/src/critical-defects.ts
new file mode 100644
index 0000000..73af59f
--- /dev/null
+++ b/src/critical-defects.ts
@@ -0,0 +1,148 @@
+import type {
+ AuditContext,
+ AuditReport,
+ CriticalDefect,
+ CriticalDefectGroup,
+ CriticalDefectId,
+ CriticalDefectSeverity,
+} from './types.js'
+
+/** Human-readable labels for each defect, used in rollups and formatters. */
+const DEFECT_TITLES: Record = {
+ 'missing-h1': 'Missing H1',
+ 'multiple-h1': 'Multiple H1 tags',
+ 'missing-title': 'Missing ',
+ 'missing-meta-description': 'Missing meta description',
+}
+
+const SEVERITY_RANK: Record = {
+ critical: 0,
+ warning: 1,
+}
+
+/**
+ * Detect binary structural defects on a single page straight from the DOM.
+ *
+ * These are deliberately independent of the weighted factor scores. The technical
+ * factors already fold an H1-count or meta-description check into a bundled score
+ * that can read "healthy" (issue #42) even when one sub-check fails; here each
+ * defect is an unambiguous, one-line-fixable yes/no, so it can be surfaced on its
+ * own merits regardless of how the surrounding factor happened to average out.
+ */
+export function detectCriticalDefects(context: AuditContext): CriticalDefect[] {
+ const defects: CriticalDefect[] = []
+
+ const h1Count = context.$('h1').length
+ if (h1Count === 0) {
+ defects.push({
+ id: 'missing-h1',
+ severity: 'critical',
+ detail: 'No H1 tag — AI models use the H1 as the primary page-topic signal.',
+ recommendation: 'Add exactly one H1 that clearly states the page topic.',
+ })
+ } else if (h1Count > 1) {
+ defects.push({
+ id: 'multiple-h1',
+ severity: 'critical',
+ detail: `${h1Count} H1 tags found (expected exactly one).`,
+ recommendation: `Consolidate to a single H1; ${h1Count} are present.`,
+ })
+ }
+
+ if (!context.pageTitle) {
+ defects.push({
+ id: 'missing-title',
+ severity: 'critical',
+ detail: 'No element — search and AI snippets have no canonical page name to use.',
+ recommendation: 'Add a concise that names the page.',
+ })
+ }
+
+ const metaDesc = context.$('meta[name="description"]').attr('content')?.trim() ?? ''
+ if (!metaDesc) {
+ defects.push({
+ id: 'missing-meta-description',
+ severity: 'warning',
+ detail: 'No meta description.',
+ recommendation: 'Add a meta description (150–160 characters) summarising the page.',
+ })
+ }
+
+ return defects
+}
+
+/** A URL is the homepage when its path is the site root and it carries no query. */
+export function isHomepageUrl(url: string): boolean {
+ try {
+ const parsed = new URL(url)
+ return (parsed.pathname === '/' || parsed.pathname === '') && parsed.search === ''
+ } catch {
+ return false
+ }
+}
+
+// Sitemaps without an explicit default to 0.5 per the protocol, so we
+// treat an absent priority the same way when ranking.
+const effectivePriority = (priority: number | undefined): number => priority ?? 0.5
+
+/**
+ * Roll per-page critical defects up across a sitemap/static run, grouped by
+ * defect. Pages within a group are ordered by importance (homepage first, then
+ * sitemap priority); groups are ordered by severity, then by whether they hit an
+ * important page — so the homepage's broken H1 leads even at 1-of-25 prevalence,
+ * which is exactly the case the prevalence-based ranking buries.
+ *
+ * `priorityByUrl` maps a page's final URL to its sitemap ``. It is
+ * optional: static-output mode has no sitemap priorities, and homepage detection
+ * (from the URL path) still works without it.
+ */
+export function buildCriticalDefects(
+ successPages: AuditReport[],
+ priorityByUrl: Map = new Map(),
+): CriticalDefectGroup[] {
+ const groups = new Map()
+
+ for (const page of successPages) {
+ for (const defect of page.criticalDefects ?? []) {
+ let group = groups.get(defect.id)
+ if (!group) {
+ group = {
+ id: defect.id,
+ severity: defect.severity,
+ title: DEFECT_TITLES[defect.id],
+ recommendation: defect.recommendation,
+ pages: [],
+ }
+ groups.set(defect.id, group)
+ }
+ group.pages.push({
+ url: page.finalUrl,
+ detail: defect.detail,
+ isHomepage: isHomepageUrl(page.finalUrl),
+ priority: priorityByUrl.get(page.finalUrl),
+ })
+ }
+ }
+
+ for (const group of groups.values()) {
+ group.pages.sort(
+ (a, b) =>
+ Number(b.isHomepage) - Number(a.isHomepage) ||
+ effectivePriority(b.priority) - effectivePriority(a.priority) ||
+ a.url.localeCompare(b.url),
+ )
+ }
+
+ const hasHomepage = (g: CriticalDefectGroup): number => (g.pages.some((p) => p.isHomepage) ? 1 : 0)
+ const maxPriority = (g: CriticalDefectGroup): number =>
+ g.pages.reduce((max, p) => Math.max(max, effectivePriority(p.priority)), 0)
+
+ return [...groups.values()].sort(
+ (a, b) =>
+ SEVERITY_RANK[a.severity] - SEVERITY_RANK[b.severity] ||
+ hasHomepage(b) - hasHomepage(a) ||
+ maxPriority(b) - maxPriority(a) ||
+ b.pages.length - a.pages.length ||
+ a.title.localeCompare(b.title),
+ )
+}
diff --git a/src/formatters/markdown.ts b/src/formatters/markdown.ts
index 303c974..6e80f2e 100644
--- a/src/formatters/markdown.ts
+++ b/src/formatters/markdown.ts
@@ -1,3 +1,4 @@
+import { isHomepageUrl } from '../critical-defects.js'
import type {
AuditReport,
BatchDetectionEntry,
@@ -97,6 +98,27 @@ export function formatSitemapMarkdown(report: SitemapAuditReport, topIssuesOnly
lines.push(``)
}
+ if (report.criticalDefects.length > 0) {
+ lines.push(`## Critical Defects`)
+ lines.push(``)
+ lines.push(`High-impact, binary structural defects — surfaced regardless of how few pages they affect.`)
+ lines.push(``)
+
+ for (const group of report.criticalDefects) {
+ const count = group.pages.length
+ lines.push(`### ${group.title} _(${group.severity}, ${count} page${count === 1 ? '' : 's'})_`)
+ lines.push(``)
+ lines.push(group.recommendation)
+ lines.push(``)
+ // List every affected page — a report must surface all issues, not a sample.
+ for (const page of group.pages) {
+ const home = page.isHomepage ? ' **(homepage)**' : ''
+ lines.push(`- \`${page.url}\`${home} — ${page.detail}`)
+ }
+ lines.push(``)
+ }
+ }
+
if (report.crossCuttingIssues.length > 0) {
lines.push(`## Cross-Cutting Issues`)
lines.push(``)
@@ -130,10 +152,18 @@ export function formatSitemapMarkdown(report: SitemapAuditReport, topIssuesOnly
}
if (report.prioritizedFixes.length > 0) {
- lines.push(`## Prioritized Fixes (by site-wide impact)`)
+ lines.push(`## Prioritized Fixes (critical defects first, then site-wide impact)`)
lines.push(``)
for (let i = 0; i < report.prioritizedFixes.length; i++) {
- lines.push(`${i + 1}. ${report.prioritizedFixes[i]}`)
+ const fix = report.prioritizedFixes[i]
+ const tag = fix.severity ? `**[${fix.severity}]** ` : ''
+ const grade = fix.avgGrade ? ` (avg ${fix.avgGrade})` : ''
+ lines.push(`${i + 1}. ${tag}**${fix.title}**${grade} _(${fix.prevalencePct}% of pages)_ — ${fix.recommendation}`)
+ // Spell out every affected page — agents and humans both need the full set.
+ for (const url of fix.affectedPages) {
+ const home = isHomepageUrl(url) ? ' **(homepage)**' : ''
+ lines.push(` - \`${url}\`${home}`)
+ }
}
lines.push(``)
}
diff --git a/src/formatters/text.ts b/src/formatters/text.ts
index 06e37d0..603576a 100644
--- a/src/formatters/text.ts
+++ b/src/formatters/text.ts
@@ -6,6 +6,7 @@ const YELLOW = '\x1b[33m'
const RED = '\x1b[31m'
const CYAN = '\x1b[36m'
+import { isHomepageUrl } from '../critical-defects.js'
import type {
AuditReport,
BatchDetectionEntry,
@@ -118,6 +119,26 @@ export function formatSitemapText(report: SitemapAuditReport, topIssuesOnly = fa
lines.push(``)
}
+ if (report.criticalDefects.length > 0) {
+ lines.push(`${BOLD}Critical Defects${RESET} ${DIM}(high-impact, shown regardless of prevalence)${RESET}`)
+ lines.push(`${'─'.repeat(70)}`)
+
+ for (const group of report.criticalDefects) {
+ const tag = group.severity === 'critical' ? `${RED}critical${RESET}` : `${YELLOW}warning${RESET}`
+ const count = group.pages.length
+ lines.push(` [${tag}] ${BOLD}${group.title}${RESET} ${DIM}(${count} page${count === 1 ? '' : 's'})${RESET}`)
+ lines.push(` ${DIM}→ ${group.recommendation}${RESET}`)
+ // List every affected page — a report must surface all issues, not a sample.
+ for (const page of group.pages) {
+ const home = page.isHomepage ? ` ${CYAN}(homepage)${RESET}` : ''
+ lines.push(` ${DIM}- ${page.url}${home}: ${page.detail}${RESET}`)
+ }
+ }
+
+ lines.push(`${'─'.repeat(70)}`)
+ lines.push(``)
+ }
+
if (report.crossCuttingIssues.length > 0) {
lines.push(`${BOLD}Cross-Cutting Issues${RESET}`)
lines.push(`${'─'.repeat(70)}`)
@@ -140,9 +161,18 @@ export function formatSitemapText(report: SitemapAuditReport, topIssuesOnly = fa
}
if (report.prioritizedFixes.length > 0) {
- lines.push(`${BOLD}Prioritized Fixes (by site-wide impact)${RESET}`)
+ lines.push(`${BOLD}Prioritized Fixes (critical defects first, then site-wide impact)${RESET}`)
for (let i = 0; i < report.prioritizedFixes.length; i++) {
- lines.push(` ${CYAN}${i + 1}.${RESET} ${report.prioritizedFixes[i]}`)
+ const fix = report.prioritizedFixes[i]
+ const tag = fix.severity ? `[${fix.severity === 'critical' ? RED : YELLOW}${fix.severity}${RESET}] ` : ''
+ const grade = fix.avgGrade ? `${DIM} avg ${fix.avgGrade}${RESET}` : ''
+ lines.push(` ${CYAN}${i + 1}.${RESET} ${tag}${BOLD}${fix.title}${RESET}${grade} ${DIM}(${fix.prevalencePct}% of pages)${RESET}`)
+ lines.push(` ${DIM}→ ${fix.recommendation}${RESET}`)
+ // Spell out every affected page — agents and humans both need the full set.
+ for (const url of fix.affectedPages) {
+ const home = isHomepageUrl(url) ? ` ${CYAN}(homepage)${RESET}` : ''
+ lines.push(` ${DIM}- ${url}${home}${RESET}`)
+ }
}
lines.push(``)
}
diff --git a/src/index.ts b/src/index.ts
index 161a655..5878549 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -21,6 +21,8 @@ import { analyzeSnippetEligibility } from './analyzers/snippet-eligibility.js'
import { analyzeAgentSkillExposure } from './analyzers/agent-skill-exposure.js'
import { analyzeLighthouse } from './analyzers/lighthouse.js'
import { getVisibleText, parseJsonLdScripts, countWords } from './analyzers/helpers.js'
+import { detectCriticalDefects } from './critical-defects.js'
+import { SCHEMA_VERSION } from './schema.js'
import { FACTOR_DEFINITIONS, OPTIONAL_FACTOR_DEFINITIONS, scoreFactors } from './scoring.js'
import type {
Analyzer,
@@ -34,10 +36,20 @@ import type {
export { runSitemapAudit } from './sitemap.js'
export { runStaticAudit } from './static-audit.js'
+export { detectCriticalDefects, buildCriticalDefects } from './critical-defects.js'
+export { SCHEMA_VERSION } from './schema.js'
export { detectPlatform, detectPlatformBatch } from './detect-platform.js'
export { SPEC_RULES, FACTOR_SPEC_RULES, SPEC_SITE, specCitation } from './spec-references.js'
export type { SpecRule, SpecRuleId, SpecStatus } from './spec-references.js'
export type { SitemapAuditReport, SitemapAuditOptions } from './types.js'
+export type {
+ CriticalDefect,
+ CriticalDefectAffectedPage,
+ CriticalDefectGroup,
+ CriticalDefectId,
+ CriticalDefectSeverity,
+ PrioritizedFix,
+} from './types.js'
export type { StaticAuditOptions, StaticAuditResult } from './static-audit.js'
export type {
BatchDetectionEntry,
@@ -175,6 +187,7 @@ export async function auditHtmlPage(page: AuditHtmlPageInput, options: RunAeoAud
const { overallScore, overallGrade, factors } = scoreFactors(rawFactorResults)
return {
+ schemaVersion: SCHEMA_VERSION,
url: page.inputUrl,
finalUrl: page.finalUrl,
auditedAt: new Date().toISOString(),
@@ -182,6 +195,7 @@ export async function auditHtmlPage(page: AuditHtmlPageInput, options: RunAeoAud
overallGrade,
summary: buildSummary(factors, overallGrade),
factors,
+ criticalDefects: detectCriticalDefects(context),
metadata: {
fetchTimeMs: page.fetchTimeMs,
pageTitle: context.pageTitle,
diff --git a/src/schema.ts b/src/schema.ts
new file mode 100644
index 0000000..bbe741f
--- /dev/null
+++ b/src/schema.ts
@@ -0,0 +1,9 @@
+/**
+ * Version of the report JSON shape (`AuditReport` / `SitemapAuditReport`),
+ * independent of the npm package version so agents can pin to a shape rather than
+ * a release. Bump the minor for additive fields, the major for breaking changes.
+ *
+ * Lives in its own module (not `index.ts`) so report builders can read it without
+ * importing the audit entry points — which test suites routinely mock.
+ */
+export const SCHEMA_VERSION = '1.0'
diff --git a/src/sitemap.ts b/src/sitemap.ts
index bac4c58..c5986eb 100644
--- a/src/sitemap.ts
+++ b/src/sitemap.ts
@@ -1,10 +1,14 @@
import { AeoAuditError } from './errors.js'
+import { buildCriticalDefects, isHomepageUrl } from './critical-defects.js'
import { normalizeTargetUrl } from './fetch-page.js'
import { runAeoAudit } from './index.js'
+import { SCHEMA_VERSION } from './schema.js'
import { scoreToGrade } from './scoring.js'
import type {
AuditReport,
+ CriticalDefectGroup,
CrossCuttingIssue,
+ PrioritizedFix,
RunAeoAuditOptions,
SitemapAuditOptions,
SitemapAuditReport,
@@ -322,14 +326,60 @@ function buildCrossCuttingIssues(successPages: AuditReport[]): CrossCuttingIssue
return issues
}
-function buildPrioritizedFixes(issues: CrossCuttingIssue[], totalPages: number): string[] {
- return issues
- .slice(0, 5)
- .map((issue) => {
- const pct = Math.round((issue.affectedPages / totalPages) * 100)
- const rec = issue.topRecommendations[0] || 'Review and improve this factor.'
- return `${issue.factorName} (avg ${issue.avgGrade}, affects ${pct}% of pages): ${rec}`
+function buildPrioritizedFixes(
+ issues: CrossCuttingIssue[],
+ totalPages: number,
+ criticalDefects: CriticalDefectGroup[] = [],
+): PrioritizedFix[] {
+ const pct = (n: number): number => (totalPages > 0 ? Math.round((n / totalPages) * 100) : 0)
+
+ // Lead with high-impact binary defects (issue #42). These are excluded from the
+ // prevalence ranking below because they typically hit only one or two pages, but
+ // they're unambiguous and one-line-fixable, so they belong at the top. Only
+ // critical-severity defects are promoted; warnings (e.g. a missing meta
+ // description) already flow into the prevalence ranking via factor recommendations.
+ const criticalFixes: PrioritizedFix[] = criticalDefects
+ .filter((group) => group.severity === 'critical')
+ .map((group): PrioritizedFix => {
+ const affectedPages = group.pages.map((p) => p.url)
+ const affectsHomepage = group.pages.some((p) => p.isHomepage)
+ const count = affectedPages.length
+ return {
+ kind: 'critical-defect',
+ id: group.id,
+ title: group.title,
+ recommendation: group.recommendation,
+ severity: group.severity,
+ affectedPages,
+ affectsHomepage,
+ prevalencePct: pct(count),
+ summary: `${group.title} (${group.severity}) — ${count} page${count === 1 ? '' : 's'}${affectsHomepage ? ', incl. homepage' : ''}: ${group.recommendation}`,
+ }
})
+
+ // Report every cross-cutting issue, ordered by prevalence — not a top-N slice.
+ // A fix the report computed must reach the report; truncating the tail silently
+ // drops real issues a consumer reading only this section would never see.
+ const crossCuttingFixes: PrioritizedFix[] = issues.map((issue): PrioritizedFix => {
+ const top = issue.topIssues[0]
+ const recommendation = issue.topRecommendations[0] ?? top?.recommendation ?? 'Review and improve this factor.'
+ const affectedPages = top?.affectedUrls ?? []
+ const affectsHomepage = affectedPages.some(isHomepageUrl)
+ const count = affectedPages.length
+ return {
+ kind: 'cross-cutting',
+ id: issue.factorId,
+ title: issue.factorName,
+ recommendation,
+ affectedPages,
+ affectsHomepage,
+ prevalencePct: pct(count),
+ avgGrade: issue.avgGrade,
+ summary: `${issue.factorName} (avg ${issue.avgGrade}) — ${count} page${count === 1 ? '' : 's'}: ${recommendation}`,
+ }
+ })
+
+ return [...criticalFixes, ...crossCuttingFixes]
}
export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptions = {}): Promise {
@@ -430,6 +480,7 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
status: 'success',
factors: report.factors,
metadata: report.metadata,
+ priority: entry.priority,
},
report,
}
@@ -442,6 +493,7 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
overallGrade: 'F',
status: 'error',
error: message,
+ priority: entry.priority,
},
report: null,
}
@@ -460,10 +512,19 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
? Math.round(successScores.reduce((a, b) => a + b, 0) / successScores.length)
: 0
+ // Map each successful page's final URL to its sitemap priority so the critical
+ // defect rollup can rank affected pages by importance (issue #42).
+ const priorityByUrl = new Map()
+ for (const page of pageResults) {
+ if (page.status === 'success') priorityByUrl.set(page.url, page.priority)
+ }
+
+ const criticalDefects = buildCriticalDefects(successReports, priorityByUrl)
const crossCuttingIssues = buildCrossCuttingIssues(successReports)
- const prioritizedFixes = buildPrioritizedFixes(crossCuttingIssues, successReports.length)
+ const prioritizedFixes = buildPrioritizedFixes(crossCuttingIssues, successReports.length, criticalDefects)
return {
+ schemaVersion: SCHEMA_VERSION,
sitemapUrl,
auditedAt: new Date().toISOString(),
pagesDiscovered: discovered,
@@ -475,6 +536,7 @@ export async function runSitemapAudit(rawUrl: string, options: SitemapAuditOptio
aggregateScore,
aggregateGrade: scoreToGrade(aggregateScore),
pages: pageResults,
+ criticalDefects,
crossCuttingIssues,
prioritizedFixes,
}
diff --git a/src/static-audit.ts b/src/static-audit.ts
index dce2377..13ad777 100644
--- a/src/static-audit.ts
+++ b/src/static-audit.ts
@@ -3,6 +3,8 @@ import path from 'node:path'
import { AeoAuditError } from './errors.js'
import { normalizeTargetUrl } from './fetch-page.js'
import { auditHtmlPage } from './index.js'
+import { buildCriticalDefects } from './critical-defects.js'
+import { SCHEMA_VERSION } from './schema.js'
import { buildCrossCuttingIssues, buildPrioritizedFixes, mapWithConcurrency } from './sitemap.js'
import { scoreToGrade } from './scoring.js'
import type {
@@ -272,10 +274,14 @@ export async function runStaticAudit(targetPath: string, options: StaticAuditOpt
? Math.round(successScores.reduce((a, b) => a + b, 0) / successScores.length)
: 0
+ // Static output has no sitemap , so the rollup ranks by homepage
+ // (derived from the file path → URL) only — no priority map is passed.
+ const criticalDefects = buildCriticalDefects(successReports)
const crossCuttingIssues = buildCrossCuttingIssues(successReports)
- const prioritizedFixes = buildPrioritizedFixes(crossCuttingIssues, successReports.length)
+ const prioritizedFixes = buildPrioritizedFixes(crossCuttingIssues, successReports.length, criticalDefects)
const report: SitemapAuditReport = {
+ schemaVersion: SCHEMA_VERSION,
sitemapUrl: resolved,
auditedAt: new Date().toISOString(),
pagesDiscovered: discovered,
@@ -287,6 +293,7 @@ export async function runStaticAudit(targetPath: string, options: StaticAuditOpt
aggregateScore,
aggregateGrade: scoreToGrade(aggregateScore),
pages: pageResults,
+ criticalDefects,
crossCuttingIssues,
prioritizedFixes,
}
diff --git a/src/types.ts b/src/types.ts
index ab2449c..c22d0fd 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -123,7 +123,36 @@ export interface AuditMetadata {
redirectChain: RedirectHop[]
}
+export type CriticalDefectId =
+ | 'missing-h1'
+ | 'multiple-h1'
+ | 'missing-title'
+ | 'missing-meta-description'
+
+export type CriticalDefectSeverity = 'critical' | 'warning'
+
+/**
+ * A binary, page-level structural defect (issue #42). Unlike the weighted factor
+ * scores — which bundle many sub-checks and can average a single bad signal away —
+ * these are detected directly from the DOM and are simply present or not. They are
+ * surfaced separately so a high-impact defect on one important page (e.g. a
+ * homepage with four `
`s) is never hidden by low prevalence or a passing grade.
+ */
+export interface CriticalDefect {
+ id: CriticalDefectId
+ severity: CriticalDefectSeverity
+ /** Page-specific description, e.g. `"4 H1 tags found (expected exactly one)."` */
+ detail: string
+ recommendation: string
+}
+
export interface AuditReport {
+ /**
+ * Version of the report's JSON shape, independent of the package version, so an
+ * agent parser can detect breaking shape drift. Bumps minor for additive fields,
+ * major for breaking changes. See `SCHEMA_VERSION`.
+ */
+ schemaVersion: string
url: string
finalUrl: string
auditedAt: string
@@ -131,6 +160,8 @@ export interface AuditReport {
overallGrade: string
summary: string
factors: ScoredFactor[]
+ /** Binary structural defects on this page, detected independently of scoring. */
+ criticalDefects: CriticalDefect[]
metadata: AuditMetadata
}
@@ -172,6 +203,63 @@ export interface SitemapPageResult {
error?: string
factors?: ScoredFactor[]
metadata?: AuditMetadata
+ /** Sitemap `` for this URL, when the sitemap declared one. Absent in static-output mode. */
+ priority?: number
+}
+
+export interface CriticalDefectAffectedPage {
+ url: string
+ /** Page-specific defect description carried up from the per-page audit. */
+ detail: string
+ /** True when this URL is the site root (`/`). Such pages are ranked first. */
+ isHomepage: boolean
+ /** Sitemap `` for this URL, when declared. */
+ priority?: number
+}
+
+/**
+ * A single binary defect (issue #42) rolled up across every page that exhibits
+ * it. Keyed by defect rather than by factor, so the specific actionable — and the
+ * exact pages it lives on — survives into the top-level report instead of being
+ * collapsed into a factor average.
+ */
+export interface CriticalDefectGroup {
+ id: CriticalDefectId
+ severity: CriticalDefectSeverity
+ /** Short human label, e.g. `"Multiple H1 tags"`. */
+ title: string
+ recommendation: string
+ /** Affected pages, most important first (homepage, then sitemap priority). */
+ pages: CriticalDefectAffectedPage[]
+}
+
+/**
+ * A single ranked, machine-readable fix — the unit of the prioritized to-do list.
+ * Carries stable identifiers and the complete affected-page set so an agent can
+ * act on it without parsing prose (issue #42). The ranking puts critical per-page
+ * defects first, then cross-cutting factor issues by prevalence.
+ */
+export interface PrioritizedFix {
+ /** Source of this fix: a binary per-page defect, or a cross-cutting factor issue. */
+ kind: 'critical-defect' | 'cross-cutting'
+ /** Stable machine code: a `CriticalDefectId` (e.g. `"multiple-h1"`) or a factor id (e.g. `"technical-seo"`). */
+ id: string
+ /** Short human label, e.g. `"Multiple H1 tags"` or `"Technical SEO"`. */
+ title: string
+ /** The single highest-priority recommendation to apply for this entry. */
+ recommendation: string
+ /** Severity, for critical-defect fixes. Cross-cutting entries are ranked by prevalence instead. */
+ severity?: CriticalDefectSeverity
+ /** Every page this fix applies to — the complete list, never truncated. */
+ affectedPages: string[]
+ /** Whether any affected page is the site homepage. */
+ affectsHomepage: boolean
+ /** Share of audited pages this fix applies to (0–100). */
+ prevalencePct: number
+ /** Average grade across audited pages for the factor (cross-cutting only). */
+ avgGrade?: string
+ /** Ready-to-display one-line headline (does not inline the page list). */
+ summary: string
}
export interface CrossCuttingIssueDetail {
@@ -191,6 +279,8 @@ export interface CrossCuttingIssue {
}
export interface SitemapAuditReport {
+ /** Version of the report's JSON shape; see `AuditReport.schemaVersion` and `SCHEMA_VERSION`. */
+ schemaVersion: string
sitemapUrl: string
auditedAt: string
pagesDiscovered: number
@@ -202,8 +292,19 @@ export interface SitemapAuditReport {
aggregateScore: number
aggregateGrade: string
pages: SitemapPageResult[]
+ /**
+ * High-impact binary defects surfaced regardless of prevalence (issue #42).
+ * These do not depend on the prevalence ranking that drives `prioritizedFixes`,
+ * so a defect on a single important page still appears here.
+ */
+ criticalDefects: CriticalDefectGroup[]
crossCuttingIssues: CrossCuttingIssue[]
- prioritizedFixes: string[]
+ /**
+ * The ranked, machine-readable to-do list: critical per-page defects first, then
+ * cross-cutting factor issues by prevalence. Each entry carries stable ids and the
+ * full affected-page set so an agent can act without parsing prose.
+ */
+ prioritizedFixes: PrioritizedFix[]
}
export interface SitemapAuditPlan {
diff --git a/test/critical-defects.test.ts b/test/critical-defects.test.ts
new file mode 100644
index 0000000..2c9cb0d
--- /dev/null
+++ b/test/critical-defects.test.ts
@@ -0,0 +1,363 @@
+import { describe, it, expect } from 'vitest'
+import { load } from 'cheerio'
+
+import { buildCriticalDefects, detectCriticalDefects, isHomepageUrl } from '../src/critical-defects.js'
+import { buildPrioritizedFixes } from '../src/sitemap.js'
+import { formatSitemapMarkdown } from '../src/formatters/markdown.js'
+import { formatSitemapText } from '../src/formatters/text.js'
+import { getVisibleText, parseJsonLdScripts } from '../src/analyzers/helpers.js'
+import type {
+ AuditContext,
+ AuditReport,
+ AuxiliaryResources,
+ CriticalDefect,
+ CriticalDefectGroup,
+ CrossCuttingIssue,
+ PrioritizedFix,
+ SitemapAuditReport,
+} from '../src/types.js'
+
+function aux(): AuxiliaryResources {
+ return {
+ llmsTxt: { state: 'missing', body: '' },
+ llmsFullTxt: { state: 'missing', body: '' },
+ robotsTxt: { state: 'missing', body: '' },
+ sitemapXml: { state: 'missing', body: '' },
+ }
+}
+
+function buildContext(html: string): AuditContext {
+ const $ = load(html)
+ return {
+ $,
+ html,
+ url: 'https://example.com/',
+ headers: {},
+ auxiliary: aux(),
+ structuredData: parseJsonLdScripts($),
+ textContent: getVisibleText($, html),
+ pageTitle: $('title').first().text().trim(),
+ }
+}
+
+const HEAD = 'Page'
+
+function report(url: string, criticalDefects: CriticalDefect[]): AuditReport {
+ return {
+ schemaVersion: '1.0',
+ url,
+ finalUrl: url,
+ auditedAt: '2026-04-18T00:00:00.000Z',
+ overallScore: 75,
+ overallGrade: 'C',
+ summary: '',
+ factors: [],
+ criticalDefects,
+ metadata: {
+ fetchTimeMs: 0,
+ pageTitle: '',
+ wordCount: 0,
+ auxiliary: { llmsTxt: 'missing', llmsFullTxt: 'missing', robotsTxt: 'missing', sitemapXml: 'missing' },
+ redirectChain: [],
+ },
+ }
+}
+
+const MULTIPLE_H1: CriticalDefect = {
+ id: 'multiple-h1',
+ severity: 'critical',
+ detail: '4 H1 tags found (expected exactly one).',
+ recommendation: 'Consolidate to a single H1; 4 are present.',
+}
+const MISSING_H1: CriticalDefect = {
+ id: 'missing-h1',
+ severity: 'critical',
+ detail: 'No H1 tag.',
+ recommendation: 'Add exactly one H1.',
+}
+const MISSING_META: CriticalDefect = {
+ id: 'missing-meta-description',
+ severity: 'warning',
+ detail: 'No meta description.',
+ recommendation: 'Add a meta description.',
+}
+
+describe('detectCriticalDefects', () => {
+ it('returns no defects for a structurally healthy page', () => {
+ const html = `${HEAD}
Topic
`
+ expect(detectCriticalDefects(buildContext(html))).toEqual([])
+ })
+
+ it('flags a missing H1 as critical', () => {
+ const html = `${HEAD}
`
+ const defects = detectCriticalDefects(buildContext(html))
+ const title = defects.find((d) => d.id === 'missing-title')
+ expect(title).toBeDefined()
+ expect(title?.severity).toBe('critical')
+ })
+
+ it('flags a missing meta description as a warning', () => {
+ const html = `Page
Topic
`
+ const defects = detectCriticalDefects(buildContext(html))
+ const meta = defects.find((d) => d.id === 'missing-meta-description')
+ expect(meta).toBeDefined()
+ expect(meta?.severity).toBe('warning')
+ })
+
+ it('detects several defects on one page', () => {
+ const html = `
nothing
`
+ const ids = detectCriticalDefects(buildContext(html)).map((d) => d.id).sort()
+ expect(ids).toEqual(['missing-h1', 'missing-meta-description', 'missing-title'])
+ })
+})
+
+describe('isHomepageUrl', () => {
+ it('treats the bare origin as the homepage', () => {
+ expect(isHomepageUrl('https://example.com/')).toBe(true)
+ expect(isHomepageUrl('https://example.com')).toBe(true)
+ })
+
+ it('rejects sub-paths and query strings', () => {
+ expect(isHomepageUrl('https://example.com/contact-us')).toBe(false)
+ expect(isHomepageUrl('https://example.com/?utm=1')).toBe(false)
+ })
+
+ it('returns false for unparseable input', () => {
+ expect(isHomepageUrl('not a url')).toBe(false)
+ })
+})
+
+describe('buildCriticalDefects', () => {
+ it('returns no groups when no page has a defect', () => {
+ expect(buildCriticalDefects([report('https://example.com/', [])])).toEqual([])
+ })
+
+ it('groups the same defect across pages and names each page', () => {
+ const pages = [
+ report('https://example.com/a', [MISSING_H1]),
+ report('https://example.com/b', [MISSING_H1]),
+ ]
+ const groups = buildCriticalDefects(pages)
+ expect(groups).toHaveLength(1)
+ expect(groups[0].id).toBe('missing-h1')
+ expect(groups[0].pages.map((p) => p.url)).toEqual(['https://example.com/a', 'https://example.com/b'])
+ })
+
+ it('surfaces a single-page defect on the homepage (issue #42 scenario)', () => {
+ // Homepage has 4 H1s; one deep page is missing its H1. Both are 1-of-N
+ // prevalence yet must both appear, with the homepage defect ranked first.
+ const pages = [
+ report('https://example.com/contact-us', [MISSING_H1]),
+ report('https://example.com/', [MULTIPLE_H1]),
+ ...Array.from({ length: 23 }, (_, i) => report(`https://example.com/p${i}`, [])),
+ ]
+ const groups = buildCriticalDefects(pages)
+ expect(groups.map((g) => g.id)).toEqual(['multiple-h1', 'missing-h1'])
+ expect(groups[0].pages[0].url).toBe('https://example.com/')
+ expect(groups[0].pages[0].isHomepage).toBe(true)
+ })
+
+ it('ranks the homepage first within a group regardless of input order', () => {
+ const pages = [
+ report('https://example.com/deep', [MISSING_H1]),
+ report('https://example.com/', [MISSING_H1]),
+ ]
+ const groups = buildCriticalDefects(pages)
+ expect(groups[0].pages[0].url).toBe('https://example.com/')
+ expect(groups[0].pages[0].isHomepage).toBe(true)
+ })
+
+ it('orders pages within a group by sitemap priority when no homepage is involved', () => {
+ const priorityByUrl = new Map([
+ ['https://example.com/low', 0.2],
+ ['https://example.com/high', 0.9],
+ ])
+ const pages = [
+ report('https://example.com/low', [MISSING_H1]),
+ report('https://example.com/high', [MISSING_H1]),
+ ]
+ const groups = buildCriticalDefects(pages, priorityByUrl)
+ expect(groups[0].pages.map((p) => p.url)).toEqual(['https://example.com/high', 'https://example.com/low'])
+ expect(groups[0].pages[0].priority).toBe(0.9)
+ })
+
+ it('orders critical-severity groups ahead of warnings', () => {
+ const pages = [
+ report('https://example.com/a', [MISSING_META]),
+ report('https://example.com/b', [MISSING_META, MISSING_H1]),
+ ]
+ const groups = buildCriticalDefects(pages)
+ expect(groups.map((g) => g.severity)).toEqual(['critical', 'warning'])
+ expect(groups[0].id).toBe('missing-h1')
+ })
+})
+
+describe('buildPrioritizedFixes with critical defects', () => {
+ function crossCutting(factorName = 'FAQ Content', affectedPages = 20): CrossCuttingIssue {
+ const rec = `Improve ${factorName}.`
+ return {
+ factorId: factorName.toLowerCase().replace(/\s+/g, '-'),
+ factorName,
+ avgScore: 40,
+ avgGrade: 'F',
+ affectedPages,
+ totalPages: 25,
+ topRecommendations: [rec],
+ topIssues: [{ recommendation: rec, affectedUrls: [] }],
+ }
+ }
+
+ it('reports every cross-cutting issue, not just the top five', () => {
+ const issues = Array.from({ length: 8 }, (_, i) => crossCutting(`Factor ${i}`, 20 - i))
+ const fixes = buildPrioritizedFixes(issues, 25, [])
+ expect(fixes).toHaveLength(8)
+ for (let i = 0; i < 8; i++) {
+ expect(fixes.some((f) => f.title === `Factor ${i}`)).toBe(true)
+ }
+ })
+
+ it('returns structured fixes with stable ids and a kind', () => {
+ const fixes = buildPrioritizedFixes([crossCutting('Technical SEO')], 25, [])
+ expect(fixes[0]).toMatchObject({
+ kind: 'cross-cutting',
+ id: 'technical-seo',
+ title: 'Technical SEO',
+ avgGrade: 'F',
+ })
+ expect(typeof fixes[0].summary).toBe('string')
+ expect(typeof fixes[0].prevalencePct).toBe('number')
+ })
+
+ it('prepends critical-severity defects above the prevalence-ranked fixes', () => {
+ const defects = buildCriticalDefects([
+ report('https://example.com/', [MULTIPLE_H1]),
+ report('https://example.com/contact-us', [MISSING_H1]),
+ ])
+ const fixes = buildPrioritizedFixes([crossCutting()], 25, defects)
+
+ expect(fixes[0]).toMatchObject({ kind: 'critical-defect', id: 'multiple-h1', severity: 'critical', affectsHomepage: true })
+ expect(fixes[0].affectedPages).toContain('https://example.com/')
+ expect(fixes[1]).toMatchObject({ id: 'missing-h1', affectsHomepage: false })
+ // The prevalence-ranked fix still follows the promoted defects.
+ expect(fixes[fixes.length - 1]).toMatchObject({ kind: 'cross-cutting', title: 'FAQ Content' })
+ })
+
+ it('does not promote warning-severity defects into prioritized fixes', () => {
+ const defects = buildCriticalDefects([report('https://example.com/a', [MISSING_META])])
+ const fixes = buildPrioritizedFixes([crossCutting()], 25, defects)
+ expect(fixes.every((f) => f.id !== 'missing-meta-description')).toBe(true)
+ })
+
+ it('spells out every affected page rather than truncating with a count', () => {
+ const defects = buildCriticalDefects([
+ report('https://example.com/', [MULTIPLE_H1]),
+ report('https://example.com/x', [MULTIPLE_H1]),
+ report('https://example.com/y', [MULTIPLE_H1]),
+ ])
+ const fixes = buildPrioritizedFixes([], 25, defects)
+ expect(fixes[0].affectedPages).toEqual([
+ 'https://example.com/',
+ 'https://example.com/x',
+ 'https://example.com/y',
+ ])
+ expect(fixes[0].summary).not.toContain('more page')
+ })
+})
+
+describe('formatters list every affected page (no truncation)', () => {
+ function sitemapReport(
+ criticalDefects: CriticalDefectGroup[],
+ prioritizedFixes: PrioritizedFix[] = [],
+ ): SitemapAuditReport {
+ return {
+ schemaVersion: '1.0',
+ sitemapUrl: 'https://example.com/sitemap.xml',
+ auditedAt: '2026-04-18T00:00:00.000Z',
+ pagesDiscovered: 0,
+ pagesAudited: 0,
+ pagesSkipped: 0,
+ pagesFiltered: 0,
+ pagesTruncated: 0,
+ effectiveLimit: 200,
+ aggregateScore: 50,
+ aggregateGrade: 'F',
+ pages: [],
+ criticalDefects,
+ crossCuttingIssues: [],
+ prioritizedFixes,
+ }
+ }
+
+ // More pages than the old display cap (10) to prove the cap is gone.
+ const manyPages = Array.from({ length: 14 }, (_, i) => ({
+ url: `https://example.com/page-${i}`,
+ detail: 'No H1 tag.',
+ isHomepage: false,
+ }))
+ const group: CriticalDefectGroup = {
+ id: 'missing-h1',
+ severity: 'critical',
+ title: 'Missing H1',
+ recommendation: 'Add exactly one H1.',
+ pages: manyPages,
+ }
+
+ it('renders all affected pages in text output without a "more pages" elision', () => {
+ const text = formatSitemapText(sitemapReport([group]))
+ for (const page of manyPages) expect(text).toContain(page.url)
+ expect(text).not.toMatch(/more page/i)
+ })
+
+ it('renders all affected pages in markdown output without a "more pages" elision', () => {
+ const md = formatSitemapMarkdown(sitemapReport([group]))
+ for (const page of manyPages) expect(md).toContain(page.url)
+ expect(md).not.toMatch(/more page/i)
+ })
+
+ const bigFix: PrioritizedFix = {
+ kind: 'cross-cutting',
+ id: 'technical-seo',
+ title: 'Technical SEO',
+ recommendation: 'Add a meta description.',
+ affectedPages: manyPages.map((p) => p.url),
+ affectsHomepage: false,
+ prevalencePct: 100,
+ avgGrade: 'F',
+ summary: 'Technical SEO (avg F) — 14 pages: Add a meta description.',
+ }
+
+ it('spells out every page of each prioritized fix in text output', () => {
+ const text = formatSitemapText(sitemapReport([], [bigFix]))
+ for (const page of manyPages) expect(text).toContain(page.url)
+ expect(text).not.toMatch(/more page/i)
+ })
+
+ it('spells out every page of each prioritized fix in markdown output', () => {
+ const md = formatSitemapMarkdown(sitemapReport([], [bigFix]))
+ for (const page of manyPages) expect(md).toContain(page.url)
+ expect(md).not.toMatch(/more page/i)
+ })
+})
diff --git a/test/sitemap-cross-cutting.test.ts b/test/sitemap-cross-cutting.test.ts
index 40cc4ce..597ed37 100644
--- a/test/sitemap-cross-cutting.test.ts
+++ b/test/sitemap-cross-cutting.test.ts
@@ -18,6 +18,7 @@ function factor(overrides: Partial & { id: string; name: string })
function report(url: string, factors: ScoredFactor[]): AuditReport {
return {
+ schemaVersion: '1.0',
url,
finalUrl: url,
auditedAt: '2026-04-18T00:00:00.000Z',
@@ -25,6 +26,7 @@ function report(url: string, factors: ScoredFactor[]): AuditReport {
overallGrade: 'C',
summary: '',
factors,
+ criticalDefects: [],
metadata: {
fetchTimeMs: 0,
pageTitle: '',
diff --git a/test/static-audit.test.ts b/test/static-audit.test.ts
index e5278f6..a6a024a 100644
--- a/test/static-audit.test.ts
+++ b/test/static-audit.test.ts
@@ -90,3 +90,50 @@ describe('runStaticAudit', () => {
})
})
})
+
+describe('runStaticAudit critical defects (issue #42)', () => {
+ let dir: string
+
+ beforeAll(async () => {
+ dir = await mkdtemp(path.join(os.tmpdir(), 'aeo-defects-'))
+ // Homepage with two H1s (a split headline) — a single-page defect that the
+ // prevalence ranking would otherwise bury.
+ await writeFile(
+ path.join(dir, 'index.html'),
+ 'Home'
+ + ''
+ + '
Build
faster
Some content for the analyzers.
',
+ )
+ // A clean page so the defect really is low-prevalence.
+ await writeFile(
+ path.join(dir, 'about.html'),
+ 'About'
+ + ''
+ + '
About
Some content for the analyzers.
',
+ )
+ })
+
+ afterAll(async () => {
+ await rm(dir, { recursive: true, force: true })
+ })
+
+ it('surfaces the homepage H1 defect in criticalDefects and at the top of prioritizedFixes', async () => {
+ const result = await runStaticAudit(dir, { baseUrl: 'https://example.com' })
+ if (result.kind !== 'multi') throw new Error('expected multi')
+
+ const multipleH1 = result.report.criticalDefects.find((g) => g.id === 'multiple-h1')
+ expect(multipleH1).toBeDefined()
+ expect(multipleH1?.pages[0].url).toBe('https://example.com/')
+ expect(multipleH1?.pages[0].isHomepage).toBe(true)
+
+ // The defect leads the prioritized fixes despite affecting only 1 of 2 pages.
+ const topFix = result.report.prioritizedFixes[0]
+ expect(topFix.kind).toBe('critical-defect')
+ expect(topFix.id).toBe('multiple-h1')
+ expect(topFix.affectsHomepage).toBe(true)
+ expect(topFix.affectedPages).toContain('https://example.com/')
+
+ // The report carries a schema version so agent parsers can detect shape drift.
+ expect(result.report.schemaVersion).toBe('1.0')
+ })
+})
From 45bacd539b1cd68910bec43d479b6daf0d02cf17 Mon Sep 17 00:00:00 2001
From: Arber Xhindoli <14798762+arberx@users.noreply.github.com>
Date: Wed, 3 Jun 2026 16:18:46 -0400
Subject: [PATCH 2/4] feat: add --format agent, a slim agent-native decision
output
Returns { schemaVersion, tool, mode, url, score, grade, pass,
criticalDefectCount, issues } as JSON, where issues is the ranked
PrioritizedFix[] and the per-factor/per-page detail is omitted, so an
agent can act on the decision without averaging and re-ranking scores.
Single-page reuses the sitemap critical-defect and cross-cutting
aggregation over a one-page site; --detect-platform falls back to JSON.
Co-Authored-By: Claude Opus 4.8
---
CHANGELOG.md | 1 +
docs/cli.md | 7 +-
skills/aeo/SKILL.md | 3 +-
src/agent-summary.ts | 48 ++++++++++
src/cli.ts | 17 +++-
src/formatters/agent.ts | 15 ++++
src/index.ts | 2 +
src/types.ts | 25 ++++++
test/agent-summary.test.ts | 177 +++++++++++++++++++++++++++++++++++++
9 files changed, 291 insertions(+), 4 deletions(-)
create mode 100644 src/agent-summary.ts
create mode 100644 src/formatters/agent.ts
create mode 100644 test/agent-summary.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 475c555..9c90051 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@
- **New `schemaVersion` field on `AuditReport` and `SitemapAuditReport`** (exported `SCHEMA_VERSION`, currently `"1.0"`). It versions the report's JSON shape independently of the npm package version so agent parsers can detect breaking drift instead of failing silently. Treat the absence of the field as "pre-2.0 / legacy shape."
### Added
+- **`--format agent` — a slim, agent-native decision output.** Returns `{ schemaVersion, tool, mode, url, score, grade, pass, criticalDefectCount, issues }` as JSON, where `issues` is the ranked `PrioritizedFix[]`, omitting the per-factor and per-page detail an agent would otherwise have to average and re-rank. Works for single-URL, sitemap, and static-output audits (single-page reuses the same critical-defect and cross-cutting aggregation over a one-page "site"); `--detect-platform` falls back to structured JSON. New `agentSummaryFromAudit()` / `agentSummaryFromSitemap()` exports, `AgentSummary` type, and `formatAgent` / `formatSitemapAgent` formatters.
- **Critical per-page defects surfaced by impact, not prevalence (#42).** Sitemap and static-directory reports now include a `criticalDefects` rollup and a **Critical Defects** section (text + markdown) that lists binary, one-line-fix structural defects — an `
` count other than one, a missing ``, a missing meta description — **regardless of how few pages exhibit them**. Previously these were detected per page but lost in aggregation: `prioritizedFixes` ranked only by prevalence (so a defect on a single page was structurally excluded), the factor score averaged the defect away to a passing grade, and `crossCuttingIssues` was keyed by factor, never the specific defect. An unambiguous, high-impact defect on the most important page (e.g. a homepage split across four `
`s, or a `/contact-us` page with none) appeared nowhere in the top-level summary. Now each defect names **every** offending page (homepage and high sitemap-`priority` pages first), and critical-severity defects are promoted to the **top** of `prioritizedFixes`. Shown even with `--top-issues`.
- The end-of-report summaries no longer truncate: the Critical Defects block and each prioritized fix list **every** affected page (no "+N more"), and `prioritizedFixes` reports every cross-cutting issue ordered by prevalence rather than a top-5 slice — a fix the audit computed always reaches the report.
- New `detectCriticalDefects()`, `buildCriticalDefects()`, and `SCHEMA_VERSION` exports plus `CriticalDefect`, `CriticalDefectGroup`, `CriticalDefectAffectedPage`, `CriticalDefectId`, `CriticalDefectSeverity`, and `PrioritizedFix` types. `AuditReport` gains `criticalDefects` and `schemaVersion`; `SitemapAuditReport` gains `criticalDefects` and `schemaVersion`; `SitemapPageResult` gains the page's sitemap `priority`.
diff --git a/docs/cli.md b/docs/cli.md
index 7786472..d99594e 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -19,10 +19,15 @@ npx @ainyc/aeo-audit https://example.com --format json
# Markdown report
npx @ainyc/aeo-audit https://example.com --format markdown
+
+# Agent summary: the slim JSON decision, not the full report
+npx @ainyc/aeo-audit https://example.com --sitemap --format agent
```
`--format json` is the contract for programmatic and agent consumers: every report carries a `schemaVersion` (so a parser can detect breaking shape drift) and sitemap reports expose a `criticalDefects` rollup plus a ranked `prioritizedFixes` array of structured objects. See [api.md](api.md#machine-readable-output-for-ai-agents) for the field shapes.
+`--format agent` returns just the decision, not the report: `{ schemaVersion, tool, mode, url, score, grade, pass, criticalDefectCount, issues }`, where `issues` is the ranked `PrioritizedFix[]` (critical defects first, then cross-cutting by prevalence). It omits the per-factor and per-page detail so an agent can act without averaging and re-ranking scores itself. Works for single-URL, sitemap, and static-output audits; in `--detect-platform` mode it falls back to the structured JSON.
+
## Running a subset of factors
```bash
@@ -188,7 +193,7 @@ When fetching `/llms.txt`, `/llms-full.txt`, `/robots.txt`, and `/sitemap.xml` t
| Flag | Description |
|------|-------------|
-| `--format ` | Output format: `text` (default), `json`, `markdown` |
+| `--format ` | Output format: `text` (default), `json`, `markdown`, `agent`. `agent` emits the slim JSON decision (score, pass gate, `criticalDefectCount`, ranked `issues`) for AI agents. |
| `--factors ` | Comma-separated factor IDs to run (runs all if omitted) |
| `--include-geo` | Include the optional geographic signals factor |
| `--include-agent-skills` | Include the optional agent skill exposure factor |
diff --git a/skills/aeo/SKILL.md b/skills/aeo/SKILL.md
index 1d5a96f..2f39ee8 100644
--- a/skills/aeo/SKILL.md
+++ b/skills/aeo/SKILL.md
@@ -52,6 +52,7 @@ If no mode is provided, default to `audit`.
- `audit https://example.com --sitemap`
- `audit https://example.com --sitemap --limit 10`
- `audit https://example.com --sitemap --top-issues`
+- `audit https://example.com --sitemap --format agent` (slim decision for agents)
- `audit https://example.com --lighthouse`
- `audit https://example.com --require-meta`
- `audit https://example.com --sitemap --require-meta`
@@ -124,7 +125,7 @@ Returns:
#### Machine-readable output (for agents)
-Use `--format json`; it is the contract for programmatic use. Key fields for acting on the result without parsing prose:
+Use `--format json` for the full report, or **`--format agent`** for just the decision: `{ schemaVersion, tool, mode, url, score, grade, pass, criticalDefectCount, issues }`, where `issues` is the ranked `prioritizedFixes` and the per-factor/per-page detail is omitted. Prefer `--format agent` when you only need to decide and act. Key fields for acting on the result without parsing prose:
- `schemaVersion` (on every audit report) versions the JSON shape independently of the package version — pin to it and treat a major bump as breaking; absence means a pre-2.0 report.
- `prioritizedFixes` is a ranked array of objects, each with a stable `id`, `kind`, optional `severity`, the complete `affectedPages` list (never truncated), `affectsHomepage`, `prevalencePct`, and a human `summary`. It's the pre-computed to-do list — no need to re-rank factor scores yourself.
- Stable identifiers (`criticalDefects[].id`, `prioritizedFixes[].id`) let integrations key on codes rather than message strings.
diff --git a/src/agent-summary.ts b/src/agent-summary.ts
new file mode 100644
index 0000000..0b70708
--- /dev/null
+++ b/src/agent-summary.ts
@@ -0,0 +1,48 @@
+import { buildCriticalDefects } from './critical-defects.js'
+import { buildCrossCuttingIssues, buildPrioritizedFixes } from './sitemap.js'
+import type { AgentSummary, AuditReport, SitemapAuditReport } from './types.js'
+
+const TOOL = '@ainyc/aeo-audit'
+
+// The score >= 70 gate, mirrored from the CLI's exit-code rule. Kept as a named
+// constant so the agent surface and the exit code can't drift apart.
+const PASS_THRESHOLD = 70
+
+/**
+ * Reduce a single-page `AuditReport` to the decision an agent acts on. The ranked
+ * `issues` list is computed by running the same critical-defect and cross-cutting
+ * aggregation used for sitemaps over a one-page "site", so single-URL and sitemap
+ * runs return the identical `PrioritizedFix` shape.
+ */
+export function agentSummaryFromAudit(report: AuditReport): AgentSummary {
+ const criticalDefects = buildCriticalDefects([report])
+ const crossCutting = buildCrossCuttingIssues([report])
+ const issues = buildPrioritizedFixes(crossCutting, 1, criticalDefects)
+
+ return {
+ schemaVersion: report.schemaVersion,
+ tool: TOOL,
+ mode: 'single',
+ url: report.finalUrl,
+ score: report.overallScore,
+ grade: report.overallGrade,
+ pass: report.overallScore >= PASS_THRESHOLD,
+ criticalDefectCount: criticalDefects.filter((g) => g.severity === 'critical').length,
+ issues,
+ }
+}
+
+/** Reduce a multi-page `SitemapAuditReport` to the same decision shape. */
+export function agentSummaryFromSitemap(report: SitemapAuditReport): AgentSummary {
+ return {
+ schemaVersion: report.schemaVersion,
+ tool: TOOL,
+ mode: 'sitemap',
+ url: report.sitemapUrl,
+ score: report.aggregateScore,
+ grade: report.aggregateGrade,
+ pass: report.aggregateScore >= PASS_THRESHOLD,
+ criticalDefectCount: report.criticalDefects.filter((g) => g.severity === 'critical').length,
+ issues: report.prioritizedFixes,
+ }
+}
diff --git a/src/cli.ts b/src/cli.ts
index 80d11c7..8858115 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -23,6 +23,7 @@ import {
formatSitemapText,
formatText,
} from './formatters/text.js'
+import { formatAgent, formatSitemapAgent } from './formatters/agent.js'
import type {
BatchPlatformDetectionReport,
PlatformConfidence,
@@ -33,28 +34,35 @@ import type {
SitemapPageResult,
} from './types.js'
+// `agent` is the slim machine-readable decision (score, pass gate, ranked fixes)
+// for audits. Platform-detection output has no decision list, so there `agent`
+// falls back to the already-structured JSON.
const FORMATTERS = {
json: formatJson,
markdown: formatMarkdown,
text: formatText,
+ agent: formatAgent,
}
const SITEMAP_FORMATTERS = {
json: (report: SitemapAuditReport, _topIssuesOnly: boolean) => formatSitemapJson(report),
markdown: (report: SitemapAuditReport, topIssuesOnly: boolean) => formatSitemapMarkdown(report, topIssuesOnly),
text: (report: SitemapAuditReport, topIssuesOnly: boolean) => formatSitemapText(report, topIssuesOnly),
+ agent: (report: SitemapAuditReport, _topIssuesOnly: boolean) => formatSitemapAgent(report),
}
const PLATFORM_FORMATTERS = {
json: (report: PlatformDetectionReport) => formatPlatformJson(report),
markdown: (report: PlatformDetectionReport) => formatPlatformMarkdown(report),
text: (report: PlatformDetectionReport) => formatPlatformText(report),
+ agent: (report: PlatformDetectionReport) => formatPlatformJson(report),
}
const BATCH_PLATFORM_FORMATTERS = {
json: (report: BatchPlatformDetectionReport) => formatBatchPlatformJson(report),
markdown: (report: BatchPlatformDetectionReport) => formatBatchPlatformMarkdown(report),
text: (report: BatchPlatformDetectionReport) => formatBatchPlatformText(report),
+ agent: (report: BatchPlatformDetectionReport) => formatBatchPlatformJson(report),
}
type FormatterName = keyof typeof FORMATTERS
@@ -251,7 +259,10 @@ Pass a URL to audit a live site, or a filesystem path (a .html file or a
directory of built HTML, e.g. ./out) to audit static output offline.
Options:
- --format Output format: text (default), json, markdown
+ --format Output format: text (default), json, markdown, agent.
+ 'agent' emits a slim JSON decision (score, pass gate,
+ criticalDefectCount, ranked issues[]) for AI agents —
+ none of the per-factor/per-page detail.
--factors Comma-separated factor IDs to run (runs all if omitted)
--include-geo Include optional geographic signals factor
--include-agent-skills Include optional agent skill exposure factor (Schema.org Action, MCP, form affordances)
@@ -293,6 +304,8 @@ Options:
Examples:
aeo-audit https://example.com
aeo-audit https://example.com --format json
+ aeo-audit https://example.com --format agent
+ aeo-audit https://example.com --sitemap --format agent
aeo-audit https://example.com --factors structured-data,faq-content
aeo-audit https://example.com --factors schema-validity
aeo-audit https://example.com --include-geo
@@ -335,7 +348,7 @@ export async function main(argv: string[] = process.argv): Promise {
}
if (!isFormatterName(args.format)) {
- console.error(`Error: Unknown format "${args.format}". Use: text, json, markdown`)
+ console.error(`Error: Unknown format "${args.format}". Use: text, json, markdown, agent`)
return 1
}
diff --git a/src/formatters/agent.ts b/src/formatters/agent.ts
new file mode 100644
index 0000000..4a704cc
--- /dev/null
+++ b/src/formatters/agent.ts
@@ -0,0 +1,15 @@
+import { agentSummaryFromAudit, agentSummaryFromSitemap } from '../agent-summary.js'
+import type { AuditReport, SitemapAuditReport } from '../types.js'
+
+/**
+ * `--format agent`: emit the pre-computed decision (score, pass gate, critical
+ * defect count, ranked fix list) as JSON, omitting the full per-factor and
+ * per-page detail an agent would otherwise have to average and re-rank itself.
+ */
+export function formatAgent(report: AuditReport): string {
+ return JSON.stringify(agentSummaryFromAudit(report), null, 2)
+}
+
+export function formatSitemapAgent(report: SitemapAuditReport): string {
+ return JSON.stringify(agentSummaryFromSitemap(report), null, 2)
+}
diff --git a/src/index.ts b/src/index.ts
index 5878549..32652be 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -37,12 +37,14 @@ import type {
export { runSitemapAudit } from './sitemap.js'
export { runStaticAudit } from './static-audit.js'
export { detectCriticalDefects, buildCriticalDefects } from './critical-defects.js'
+export { agentSummaryFromAudit, agentSummaryFromSitemap } from './agent-summary.js'
export { SCHEMA_VERSION } from './schema.js'
export { detectPlatform, detectPlatformBatch } from './detect-platform.js'
export { SPEC_RULES, FACTOR_SPEC_RULES, SPEC_SITE, specCitation } from './spec-references.js'
export type { SpecRule, SpecRuleId, SpecStatus } from './spec-references.js'
export type { SitemapAuditReport, SitemapAuditOptions } from './types.js'
export type {
+ AgentSummary,
CriticalDefect,
CriticalDefectAffectedPage,
CriticalDefectGroup,
diff --git a/src/types.ts b/src/types.ts
index c22d0fd..2e7fd54 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -262,6 +262,31 @@ export interface PrioritizedFix {
summary: string
}
+/**
+ * The slim, pre-computed decision an agent consumes via `--format agent`: the
+ * score, the pass/fail gate, and the ranked fix list, with none of the per-factor
+ * or per-page detail. Same underlying data as the full report, shaped as a
+ * decision an agent can act on directly instead of re-ranking factor scores.
+ */
+export interface AgentSummary {
+ /** Report schema version (see `AuditReport.schemaVersion`). */
+ schemaVersion: string
+ /** Package identity, for consumers aggregating output from multiple tools. */
+ tool: string
+ /** `single` for a one-URL/one-file audit, `sitemap` for a multi-page run. */
+ mode: 'single' | 'sitemap'
+ /** The audited page URL (single) or the sitemap/root URL (multi). */
+ url: string
+ score: number
+ grade: string
+ /** True when the score meets the >= 70 gate (the default exit-0 threshold). */
+ pass: boolean
+ /** Number of critical-severity binary defects (e.g. a missing or duplicated H1). */
+ criticalDefectCount: number
+ /** The ranked to-do list: critical defects first, then cross-cutting by prevalence. */
+ issues: PrioritizedFix[]
+}
+
export interface CrossCuttingIssueDetail {
recommendation: string
affectedUrls: string[]
diff --git a/test/agent-summary.test.ts b/test/agent-summary.test.ts
new file mode 100644
index 0000000..ba7a373
--- /dev/null
+++ b/test/agent-summary.test.ts
@@ -0,0 +1,177 @@
+import { describe, it, expect } from 'vitest'
+
+import { agentSummaryFromAudit, agentSummaryFromSitemap } from '../src/agent-summary.js'
+import { formatAgent, formatSitemapAgent } from '../src/formatters/agent.js'
+import type {
+ AuditReport,
+ CriticalDefect,
+ CriticalDefectGroup,
+ PrioritizedFix,
+ ScoredFactor,
+ SitemapAuditReport,
+} from '../src/types.js'
+
+function factor(overrides: Partial & { id: string; name: string }): ScoredFactor {
+ return {
+ id: overrides.id,
+ name: overrides.name,
+ weight: 8,
+ score: overrides.score ?? 40,
+ grade: overrides.grade ?? 'F',
+ status: overrides.status ?? 'fail',
+ findings: overrides.findings ?? [],
+ recommendations: overrides.recommendations ?? [],
+ }
+}
+
+function auditReport(overrides: Partial = {}): AuditReport {
+ return {
+ schemaVersion: '1.0',
+ url: 'https://example.com/',
+ finalUrl: 'https://example.com/',
+ auditedAt: '2026-04-18T00:00:00.000Z',
+ overallScore: 60,
+ overallGrade: 'D-',
+ summary: '',
+ factors: [],
+ criticalDefects: [],
+ metadata: {
+ fetchTimeMs: 0,
+ pageTitle: '',
+ wordCount: 0,
+ auxiliary: { llmsTxt: 'missing', llmsFullTxt: 'missing', robotsTxt: 'missing', sitemapXml: 'missing' },
+ redirectChain: [],
+ },
+ ...overrides,
+ }
+}
+
+const MULTIPLE_H1: CriticalDefect = {
+ id: 'multiple-h1',
+ severity: 'critical',
+ detail: '2 H1 tags found (expected exactly one).',
+ recommendation: 'Consolidate to a single H1; 2 are present.',
+}
+
+describe('agentSummaryFromAudit', () => {
+ it('reduces a single-page report to a decision with a ranked issue list', () => {
+ const report = auditReport({
+ criticalDefects: [MULTIPLE_H1],
+ factors: [factor({ id: 'faq-content', name: 'FAQ Content', score: 40, recommendations: ['Add FAQPage schema.'] })],
+ })
+ const summary = agentSummaryFromAudit(report)
+
+ expect(summary.mode).toBe('single')
+ expect(summary.url).toBe('https://example.com/')
+ expect(summary.score).toBe(60)
+ expect(summary.pass).toBe(false)
+ expect(summary.criticalDefectCount).toBe(1)
+ // Critical defect leads, then the cross-cutting factor fix.
+ expect(summary.issues[0]).toMatchObject({ kind: 'critical-defect', id: 'multiple-h1' })
+ expect(summary.issues.some((i) => i.id === 'faq-content')).toBe(true)
+ })
+
+ it('reports pass=true and no issues for a clean, passing page', () => {
+ const report = auditReport({
+ overallScore: 92,
+ overallGrade: 'A',
+ factors: [factor({ id: 'structured-data', name: 'Structured Data', score: 95, status: 'pass', grade: 'A', recommendations: [] })],
+ })
+ const summary = agentSummaryFromAudit(report)
+
+ expect(summary.pass).toBe(true)
+ expect(summary.criticalDefectCount).toBe(0)
+ expect(summary.issues).toEqual([])
+ })
+})
+
+describe('agentSummaryFromSitemap', () => {
+ function sitemapReport(
+ criticalDefects: CriticalDefectGroup[],
+ prioritizedFixes: PrioritizedFix[],
+ ): SitemapAuditReport {
+ return {
+ schemaVersion: '1.0',
+ sitemapUrl: 'https://example.com/sitemap.xml',
+ auditedAt: '2026-04-18T00:00:00.000Z',
+ pagesDiscovered: 25,
+ pagesAudited: 25,
+ pagesSkipped: 0,
+ pagesFiltered: 0,
+ pagesTruncated: 0,
+ effectiveLimit: 200,
+ aggregateScore: 64,
+ aggregateGrade: 'D',
+ pages: [],
+ criticalDefects,
+ crossCuttingIssues: [],
+ prioritizedFixes,
+ }
+ }
+
+ it('maps aggregate fields and forwards prioritizedFixes as issues', () => {
+ const group: CriticalDefectGroup = {
+ id: 'missing-h1',
+ severity: 'critical',
+ title: 'Missing H1',
+ recommendation: 'Add exactly one H1.',
+ pages: [{ url: 'https://example.com/contact', detail: 'No H1 tag.', isHomepage: false }],
+ }
+ const fix: PrioritizedFix = {
+ kind: 'critical-defect',
+ id: 'missing-h1',
+ title: 'Missing H1',
+ recommendation: 'Add exactly one H1.',
+ severity: 'critical',
+ affectedPages: ['https://example.com/contact'],
+ affectsHomepage: false,
+ prevalencePct: 4,
+ summary: 'Missing H1 (critical) — 1 page: Add exactly one H1.',
+ }
+ const summary = agentSummaryFromSitemap(sitemapReport([group], [fix]))
+
+ expect(summary.mode).toBe('sitemap')
+ expect(summary.url).toBe('https://example.com/sitemap.xml')
+ expect(summary.score).toBe(64)
+ expect(summary.pass).toBe(false)
+ expect(summary.criticalDefectCount).toBe(1)
+ expect(summary.issues).toEqual([fix])
+ })
+})
+
+describe('formatAgent / formatSitemapAgent', () => {
+ it('emits valid JSON with the decision keys and none of the heavy detail', () => {
+ const parsed = JSON.parse(formatAgent(auditReport({ factors: [factor({ id: 'x', name: 'X' })] })))
+ expect(Object.keys(parsed).sort()).toEqual(
+ ['criticalDefectCount', 'grade', 'issues', 'mode', 'pass', 'schemaVersion', 'score', 'tool', 'url'].sort(),
+ )
+ // The point of agent mode: no 27 pages of factor/page detail.
+ expect(parsed.factors).toBeUndefined()
+ expect(parsed.pages).toBeUndefined()
+ expect(parsed.tool).toBe('@ainyc/aeo-audit')
+ })
+
+ it('formatSitemapAgent emits valid JSON', () => {
+ const report: SitemapAuditReport = {
+ schemaVersion: '1.0',
+ sitemapUrl: 'https://example.com/sitemap.xml',
+ auditedAt: '2026-04-18T00:00:00.000Z',
+ pagesDiscovered: 1,
+ pagesAudited: 1,
+ pagesSkipped: 0,
+ pagesFiltered: 0,
+ pagesTruncated: 0,
+ effectiveLimit: 200,
+ aggregateScore: 80,
+ aggregateGrade: 'B-',
+ pages: [],
+ criticalDefects: [],
+ crossCuttingIssues: [],
+ prioritizedFixes: [],
+ }
+ const parsed = JSON.parse(formatSitemapAgent(report))
+ expect(parsed.mode).toBe('sitemap')
+ expect(parsed.pass).toBe(true)
+ expect(parsed.pages).toBeUndefined()
+ })
+})
From 0b42835c929f9115dec82c5420459d45637e96bb Mon Sep 17 00:00:00 2001
From: Arber X <14798762+arberx@users.noreply.github.com>
Date: Wed, 3 Jun 2026 16:53:04 -0400
Subject: [PATCH 3/4] feat: stable codes on every finding (2.1.0) (#45)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* feat: add stable codes to every finding (2.1.0)
Every AuditFinding now carries a `code` namespaced as
.[.] (e.g. technical-seo.h1.multiple), so
agents and integrations key on a stable machine identifier instead of
regex-matching the human message (which can change between releases).
212 codes across all 19 analyzers; the registry is in
docs/finding-codes.md. `code` is required on AuditFinding, so the
compiler guarantees coverage, and a test enforces the convention and
global uniqueness.
hasMissingMetaDescription (the --require-meta gate) now keys on the
technical-seo.meta-description.missing code rather than a message prefix
— the first consumer migrated to codes. schemaVersion bumped to 1.1
(additive: findings gained the code field).
Co-Authored-By: Claude Opus 4.8
* docs: correct schemaVersion example value to 1.1
The api.md runSitemapAudit example printed '1.0', but SCHEMA_VERSION
was bumped to '1.1' in 2.1.0. Align the doc with the actual value.
Co-Authored-By: Claude Opus 4.8
---------
Co-authored-by: Claude Opus 4.8
---
CHANGELOG.md | 9 +
README.md | 3 +-
docs/api.md | 4 +-
docs/finding-codes.md | 278 ++++++++++++++++++++++++
package.json | 2 +-
skills/aeo/SKILL.md | 2 +-
src/analyzers/agent-skill-exposure.ts | 30 +--
src/analyzers/ai-crawler-access.ts | 12 +-
src/analyzers/ai-readable-content.ts | 39 ++--
src/analyzers/citations.ts | 22 +-
src/analyzers/content-depth.ts | 28 +--
src/analyzers/content-extractability.ts | 28 +--
src/analyzers/content-freshness.ts | 32 +--
src/analyzers/definition-blocks.ts | 18 +-
src/analyzers/eeat-signals.ts | 26 +--
src/analyzers/entity-consistency.ts | 22 +-
src/analyzers/faq-content.ts | 22 +-
src/analyzers/geographic-signals.ts | 24 +-
src/analyzers/lighthouse.ts | 7 +-
src/analyzers/named-entities.ts | 18 +-
src/analyzers/schema-completeness.ts | 28 +--
src/analyzers/schema-validity.ts | 5 +
src/analyzers/snippet-eligibility.ts | 8 +
src/analyzers/structured-data.ts | 14 +-
src/analyzers/technical-seo.ts | 23 +-
src/cli.ts | 6 +-
src/schema.ts | 2 +-
src/types.ts | 7 +
test/agent-summary.test.ts | 6 +-
test/cli-require-meta.test.ts | 11 +-
test/critical-defects.test.ts | 4 +-
test/finding-codes.test.ts | 86 ++++++++
test/sitemap-cross-cutting.test.ts | 2 +-
test/static-audit.test.ts | 2 +-
34 files changed, 616 insertions(+), 214 deletions(-)
create mode 100644 docs/finding-codes.md
create mode 100644 test/finding-codes.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9c90051..a26b22e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,14 @@
# Changelog
+## 2.1.0 (2026-06-03)
+
+### Added
+- **Stable finding codes.** Every `AuditFinding` now carries a `code` namespaced as `.[.]` (e.g. `technical-seo.h1.multiple`, `schema-validity.singleton.duplicate`), so agents and integrations key on a stable machine identifier instead of regex-matching the human `message` (which can change between releases). 212 codes across all 19 analyzers; the full registry is in [docs/finding-codes.md](docs/finding-codes.md). Codes follow a documented convention and are unique across the tool (enforced by a test). `AuditFinding.code` is required, so the compiler guarantees no finding ships without one.
+- `hasMissingMetaDescription` (the `--require-meta` gate) now keys on `technical-seo.meta-description.missing` rather than a message prefix — the first consumer migrated to codes.
+
+### Changed
+- **`schemaVersion` bumped to `1.1`** (additive: findings gained the `code` field). Report shapes are otherwise unchanged.
+
## 2.0.0 (2026-06-03)
### Breaking
diff --git a/README.md b/README.md
index c622757..e2ccc43 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@
- Audit **built HTML offline** in CI: a `next export` / `dist` / `out` directory, no network. [Static output](docs/cli.md#static-output-mode)
- Detect the **platform / CMS / framework**: WordPress, Webflow, Shopify, Next.js, Vercel. [Platform detection](docs/cli.md#platform-detection)
- Opt in to **Lighthouse, geographic, and agent-skill** factors. [Optional factors](docs/scoring.md#optional-factors)
-- `text`, `json`, and `markdown` output with **CI-friendly exit codes**. [CLI reference](docs/cli.md)
+- `text`, `json`, `markdown`, and `agent` output with **CI-friendly exit codes**. [CLI reference](docs/cli.md)
+- **Agent-native output**: a versioned `schemaVersion`, a slim `--format agent` decision, ranked structured fixes, and stable [finding codes](docs/finding-codes.md) so integrations key on codes, not prose. [API](docs/api.md#machine-readable-output-for-ai-agents)
- Use as a **library** ([API](docs/api.md)) or from Claude Code via the **`/aeo` skill** ([skill](docs/skill.md)).
Website: [canonry.ai](https://canonry.ai)
diff --git a/docs/api.md b/docs/api.md
index 1a20e0b..54e5979 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -33,7 +33,7 @@ const report = await runSitemapAudit('https://example.com', {
factors: ['schema-validity', 'structured-data'], // Optional subset
})
-console.log(report.schemaVersion) // '1.0', JSON shape version (see "Machine-readable output")
+console.log(report.schemaVersion) // '1.1', JSON shape version (see "Machine-readable output")
console.log(report.aggregateGrade) // 'B+'
console.log(report.pagesAudited) // 22
console.log(report.criticalDefects) // Binary per-page defects (multiple/missing H1, missing title/meta), grouped by defect
@@ -51,7 +51,7 @@ Each entry in `crossCuttingIssues[].topIssues` carries a `recommendation` plus t
- **`schemaVersion`** (on `AuditReport` and `SitemapAuditReport`, exported as `SCHEMA_VERSION`) versions the JSON shape independently of the npm version. Pin to it and treat a major bump as breaking; treat its absence as a pre-2.0 report.
- **`prioritizedFixes: PrioritizedFix[]`** is the ranked, pre-computed to-do list, so an agent need not average factor scores and re-rank. Each fix carries a stable `id` (a defect id like `"multiple-h1"` or a factor id like `"technical-seo"`), `kind`, an optional `severity`, the complete `affectedPages` array (never truncated), `affectsHomepage`, `prevalencePct`, and a human `summary`.
-- **Stable identifiers** on the decision surface (`criticalDefects[].id`, `prioritizedFixes[].id` / `kind`) let integrations key on codes, not on matching message strings.
+- **Stable identifiers** everywhere: the decision surface (`criticalDefects[].id`, `prioritizedFixes[].id` / `kind`) and every individual factor finding (`factors[].findings[].code`, e.g. `technical-seo.h1.multiple`) carry stable codes, so integrations key on codes, not on matching message strings. The full code registry is in [finding-codes.md](finding-codes.md).
## Static output (offline, from disk)
diff --git a/docs/finding-codes.md b/docs/finding-codes.md
new file mode 100644
index 0000000..0eec9a9
--- /dev/null
+++ b/docs/finding-codes.md
@@ -0,0 +1,278 @@
+# Finding codes
+
+Every `AuditFinding` carries a stable `code` so integrations can key on a machine identifier instead of matching the human `message` string (which may change between releases).
+
+## Convention
+
+`.[.]` — lowercase kebab-case, dot-separated. `` names the sub-check (e.g. `h1`, `meta-description`); `` distinguishes the outcomes of one check (e.g. `missing`, `multiple`, `single`). All branches of one check share the `` segment. Codes are stable across releases and unique across the tool.
+
+## Registry
+
+### Structured Data (JSON-LD)
+
+- `structured-data.json-ld.found`
+- `structured-data.json-ld.missing`
+- `structured-data.schema.found`
+- `structured-data.schema.missing`
+- `structured-data.schema-depth.strong`
+- `structured-data.schema-depth.moderate`
+- `structured-data.schema-depth.low`
+
+### Content Depth
+
+- `content-depth.word-count.strong`
+- `content-depth.word-count.moderate`
+- `content-depth.word-count.low`
+- `content-depth.h1.single`
+- `content-depth.h1.multiple`
+- `content-depth.h1.missing`
+- `content-depth.headings.strong`
+- `content-depth.headings.moderate`
+- `content-depth.headings.low`
+- `content-depth.paragraphs.strong`
+- `content-depth.paragraphs.moderate`
+- `content-depth.paragraphs.low`
+- `content-depth.lists.present`
+- `content-depth.lists.none`
+
+### AI-Readable Content
+
+- `ai-readable-content.content-negotiation.found`
+- `ai-readable-content.aux-resource.missing`
+- `ai-readable-content.aux-resource.timeout`
+- `ai-readable-content.aux-resource.unreachable`
+- `ai-readable-content.aux-resource.not-html`
+- `ai-readable-content.aux-resource.found`
+- `ai-readable-content.llms-txt.strong`
+- `ai-readable-content.llms-txt.short`
+- `ai-readable-content.llms-full-txt.strong`
+- `ai-readable-content.llms-full-txt.short`
+- `ai-readable-content.robots-txt.found`
+- `ai-readable-content.robots-txt.unreachable`
+- `ai-readable-content.robots-txt.missing`
+- `ai-readable-content.sitemap.found`
+- `ai-readable-content.sitemap.unreachable`
+- `ai-readable-content.sitemap.missing`
+- `ai-readable-content.llms-txt-link.found`
+- `ai-readable-content.llms-txt-link.missing`
+- `ai-readable-content.markdown-endpoint.found`
+- `ai-readable-content.markdown-endpoint.missing`
+
+### E-E-A-T Signals
+
+- `eeat-signals.author.credentialed`
+- `eeat-signals.author.no-credentials`
+- `eeat-signals.author.missing`
+- `eeat-signals.author-meta.found`
+- `eeat-signals.author-meta.missing`
+- `eeat-signals.review.found`
+- `eeat-signals.review.missing`
+- `eeat-signals.trust-links.strong`
+- `eeat-signals.trust-links.partial`
+- `eeat-signals.trust-links.missing`
+- `eeat-signals.organization.with-people`
+- `eeat-signals.organization.no-people`
+- `eeat-signals.organization.missing`
+
+### FAQ Content
+
+- `faq-content.faqpage.present`
+- `faq-content.faqpage.missing`
+- `faq-content.details.multiple`
+- `faq-content.details.single`
+- `faq-content.details.none`
+- `faq-content.headings.multiple`
+- `faq-content.headings.low`
+- `faq-content.headings.missing`
+- `faq-content.qa-pairs.multiple`
+- `faq-content.qa-pairs.low`
+- `faq-content.qa-pairs.none`
+
+### Citations & Authority Signals
+
+- `citations.external-links.strong`
+- `citations.external-links.moderate`
+- `citations.external-links.low`
+- `citations.authoritative-domains.found`
+- `citations.authoritative-domains.none`
+- `citations.sameas.strong`
+- `citations.sameas.moderate`
+- `citations.sameas.missing`
+- `citations.anchor-text.strong`
+- `citations.anchor-text.moderate`
+- `citations.anchor-text.low`
+
+### Schema Completeness
+
+- `schema-completeness.schema.none`
+- `schema-completeness.local-business.strong`
+- `schema-completeness.local-business.partial`
+- `schema-completeness.local-business.low`
+- `schema-completeness.faqpage.strong`
+- `schema-completeness.faqpage.partial`
+- `schema-completeness.faqpage.low`
+- `schema-completeness.howto.strong`
+- `schema-completeness.howto.partial`
+- `schema-completeness.organization.strong`
+- `schema-completeness.organization.partial`
+- `schema-completeness.organization.low`
+- `schema-completeness.schema-depth.moderate`
+- `schema-completeness.schema-depth.low`
+
+### Schema Validity
+
+- `schema-validity.json-ld.none`
+- `schema-validity.block.empty`
+- `schema-validity.block.invalid`
+- `schema-validity.singleton.duplicate`
+- `schema-validity.block.valid`
+
+### Entity Consistency
+
+- `entity-consistency.name.missing`
+- `entity-consistency.name.single`
+- `entity-consistency.name.moderate`
+- `entity-consistency.name.multiple`
+- `entity-consistency.title.ok`
+- `entity-consistency.title.long`
+- `entity-consistency.canonical.present`
+- `entity-consistency.canonical.missing`
+- `entity-consistency.contact.ok`
+- `entity-consistency.contact.partial`
+- `entity-consistency.contact.missing`
+
+### Content Freshness
+
+- `content-freshness.date-modified.recent`
+- `content-freshness.date-modified.moderate`
+- `content-freshness.date-modified.stale`
+- `content-freshness.date-modified.missing`
+- `content-freshness.last-modified.recent`
+- `content-freshness.last-modified.older`
+- `content-freshness.last-modified.missing`
+- `content-freshness.sitemap.recent`
+- `content-freshness.sitemap.stale`
+- `content-freshness.sitemap.no-match`
+- `content-freshness.sitemap.timeout`
+- `content-freshness.sitemap.unreachable`
+- `content-freshness.sitemap.missing`
+- `content-freshness.copyright.recent`
+- `content-freshness.copyright.older`
+- `content-freshness.copyright.missing`
+
+### Content Extractability
+
+- `content-extractability.content-ratio.strong`
+- `content-extractability.content-ratio.moderate`
+- `content-extractability.content-ratio.low`
+- `content-extractability.citable-blocks.strong`
+- `content-extractability.citable-blocks.moderate`
+- `content-extractability.citable-blocks.missing`
+- `content-extractability.paywall.found`
+- `content-extractability.paywall.none`
+- `content-extractability.ad-density.high`
+- `content-extractability.ad-density.low`
+- `content-extractability.ad-density.none`
+- `content-extractability.direct-answer.strong`
+- `content-extractability.direct-answer.moderate`
+- `content-extractability.direct-answer.none`
+
+### Definition Blocks
+
+- `definition-blocks.headings.multiple`
+- `definition-blocks.headings.single`
+- `definition-blocks.headings.missing`
+- `definition-blocks.lists.found`
+- `definition-blocks.lists.none`
+- `definition-blocks.schema.found`
+- `definition-blocks.schema.missing`
+- `definition-blocks.dl.found`
+- `definition-blocks.dl.none`
+
+### AI Crawler Access
+
+- `ai-crawler-access.robots-txt.missing`
+- `ai-crawler-access.robots-txt.unreachable`
+- `ai-crawler-access.crawler.allowed`
+- `ai-crawler-access.crawler.blocked`
+- `ai-crawler-access.sitemap.found`
+- `ai-crawler-access.content-signal.found`
+
+### Named Entities
+
+- `named-entities.brand-name.strong`
+- `named-entities.brand-name.low`
+- `named-entities.brand-name.missing`
+- `named-entities.entity-name.missing`
+- `named-entities.knows-about.present`
+- `named-entities.knows-about.missing`
+- `named-entities.proper-noun-density.strong`
+- `named-entities.proper-noun-density.moderate`
+- `named-entities.proper-noun-density.low`
+
+### Technical SEO
+
+- `technical-seo.h1.single`
+- `technical-seo.h1.missing`
+- `technical-seo.h1.multiple`
+- `technical-seo.alt-text.none`
+- `technical-seo.alt-text.ok`
+- `technical-seo.alt-text.missing`
+- `technical-seo.alt-text.empty`
+- `technical-seo.meta-description.missing`
+- `technical-seo.meta-description.short`
+- `technical-seo.meta-description.long`
+- `technical-seo.meta-description.present`
+- `technical-seo.canonical.missing`
+- `technical-seo.canonical.present`
+
+### Snippet Eligibility
+
+- `snippet-eligibility.directives.none`
+- `snippet-eligibility.noindex.present`
+- `snippet-eligibility.nosnippet.present`
+- `snippet-eligibility.max-snippet.zero`
+- `snippet-eligibility.max-snippet.low`
+- `snippet-eligibility.noarchive.present`
+- `snippet-eligibility.noimageindex.present`
+- `snippet-eligibility.directives.not-restrictive`
+
+### Geographic Signals (optional)
+
+- `geographic-signals.localbusiness-schema.found`
+- `geographic-signals.localbusiness-schema.missing`
+- `geographic-signals.geo-coordinates.found`
+- `geographic-signals.geo-coordinates.missing`
+- `geographic-signals.postal-address.found`
+- `geographic-signals.postal-address.missing`
+- `geographic-signals.area-served.found`
+- `geographic-signals.area-served.missing`
+- `geographic-signals.geo-meta.found`
+- `geographic-signals.geo-meta.missing`
+- `geographic-signals.visible-location.found`
+- `geographic-signals.visible-location.missing`
+
+### Agent Skill Exposure (optional)
+
+- `agent-skill-exposure.schema-action.well-formed`
+- `agent-skill-exposure.schema-action.partial`
+- `agent-skill-exposure.schema-action.missing`
+- `agent-skill-exposure.mcp-discovery.found`
+- `agent-skill-exposure.mcp-discovery.missing`
+- `agent-skill-exposure.a2a-agent-card.found`
+- `agent-skill-exposure.a2a-agent-card.missing`
+- `agent-skill-exposure.openapi.found`
+- `agent-skill-exposure.openapi.missing`
+- `agent-skill-exposure.microdata.found`
+- `agent-skill-exposure.microdata.missing`
+- `agent-skill-exposure.forms.none`
+- `agent-skill-exposure.forms.strong`
+- `agent-skill-exposure.forms.partial`
+- `agent-skill-exposure.forms.weak`
+
+### Lighthouse (optional)
+
+- `lighthouse.psi.unreachable`
+- `lighthouse.category.missing`
+- `lighthouse.category.score`
+- `lighthouse.category.none`
diff --git a/package.json b/package.json
index 768ff50..cc3c13a 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@ainyc/aeo-audit",
- "version": "2.0.0",
+ "version": "2.1.0",
"description": "The most comprehensive open-source Answer Engine Optimization (AEO) audit tool. Scores websites across 16 ranking factors that determine AI citation.",
"type": "module",
"main": "./dist/index.js",
diff --git a/skills/aeo/SKILL.md b/skills/aeo/SKILL.md
index 2f39ee8..3b43817 100644
--- a/skills/aeo/SKILL.md
+++ b/skills/aeo/SKILL.md
@@ -128,7 +128,7 @@ Returns:
Use `--format json` for the full report, or **`--format agent`** for just the decision: `{ schemaVersion, tool, mode, url, score, grade, pass, criticalDefectCount, issues }`, where `issues` is the ranked `prioritizedFixes` and the per-factor/per-page detail is omitted. Prefer `--format agent` when you only need to decide and act. Key fields for acting on the result without parsing prose:
- `schemaVersion` (on every audit report) versions the JSON shape independently of the package version — pin to it and treat a major bump as breaking; absence means a pre-2.0 report.
- `prioritizedFixes` is a ranked array of objects, each with a stable `id`, `kind`, optional `severity`, the complete `affectedPages` list (never truncated), `affectsHomepage`, `prevalencePct`, and a human `summary`. It's the pre-computed to-do list — no need to re-rank factor scores yourself.
-- Stable identifiers (`criticalDefects[].id`, `prioritizedFixes[].id`) let integrations key on codes rather than message strings.
+- Stable identifiers everywhere — `criticalDefects[].id`, `prioritizedFixes[].id`, and every factor finding's `code` (e.g. `technical-seo.h1.multiple`) — let integrations key on codes rather than message strings.
#### Auxiliary File Diagnostics
diff --git a/src/analyzers/agent-skill-exposure.ts b/src/analyzers/agent-skill-exposure.ts
index 5eeafac..38ec5cf 100644
--- a/src/analyzers/agent-skill-exposure.ts
+++ b/src/analyzers/agent-skill-exposure.ts
@@ -124,15 +124,15 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
if (wellFormed.length > 0) {
score += 35
const types = [...new Set(wellFormed.map((a) => a.type))].slice(0, 3).join(', ')
- findings.push({ type: 'found', message: `Schema.org Action markup declared with target and inputs: ${types}.` })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.schema-action.well-formed', message: `Schema.org Action markup declared with target and inputs: ${types}.` })
} else {
score += 18
const types = [...new Set(actions.map((a) => a.type))].slice(0, 3).join(', ')
- findings.push({ type: 'info', message: `Schema.org Action types present (${types}) but missing target/urlTemplate or query-input/object shape.` })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.schema-action.partial', message: `Schema.org Action types present (${types}) but missing target/urlTemplate or query-input/object shape.` })
recommendations.push('Add target (with urlTemplate) and query-input/object to Action schema so agents know how to invoke it.')
}
} else {
- findings.push({ type: 'missing', message: 'No Schema.org Action markup detected (PotentialAction / SearchAction / OrderAction / etc.).' })
+ findings.push({ type: 'missing', code: 'agent-skill-exposure.schema-action.missing', message: 'No Schema.org Action markup detected (PotentialAction / SearchAction / OrderAction / etc.).' })
recommendations.push('Declare interactive affordances with Schema.org Action markup (e.g. SearchAction with urlTemplate and query-input) so agents can invoke them as tools.')
}
@@ -149,9 +149,9 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
: mcpMeta.length
? ``
: 'Link header'
- findings.push({ type: 'found', message: `Agent protocol discovery present (${src}).` })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.mcp-discovery.found', message: `Agent protocol discovery present (${src}).` })
} else {
- findings.push({ type: 'missing', message: 'No MCP / WebMCP / ai-plugin discovery link or header.' })
+ findings.push({ type: 'missing', code: 'agent-skill-exposure.mcp-discovery.missing', message: 'No MCP / WebMCP / ai-plugin discovery link or header.' })
recommendations.push('Expose an MCP server card via or a Link header so agents can discover your tools.')
}
@@ -166,9 +166,9 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
const agentCardHeader = /rel="?(agent-card|a2a)"?/i.test(linkHeader)
if (agentCardLink.length || agentCardMeta.length || agentCardHeader) {
score += 12
- findings.push({ type: 'found', message: 'A2A agent card discovery present — agents can fetch an agent card to negotiate capabilities.' })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.a2a-agent-card.found', message: 'A2A agent card discovery present — agents can fetch an agent card to negotiate capabilities.' })
} else {
- findings.push({ type: 'info', message: 'No A2A agent card discovery (no link/meta/Link header pointing to an agent card).' })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.a2a-agent-card.missing', message: 'No A2A agent card discovery (no link/meta/Link header pointing to an agent card).' })
recommendations.push(
`Publish an A2A agent card and advertise it via or a Link header. ${specCitation('a2a-agent-cards')}`,
)
@@ -180,9 +180,9 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
).first()
if (openapiLink.length) {
score += 10
- findings.push({ type: 'found', message: `Service description link found (type="${openapiLink.attr('type') || 'unspecified'}").` })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.openapi.found', message: `Service description link found (type="${openapiLink.attr('type') || 'unspecified'}").` })
} else {
- findings.push({ type: 'info', message: 'No OpenAPI / service-description link found.' })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.openapi.missing', message: 'No OpenAPI / service-description link found.' })
recommendations.push('Link to an OpenAPI document via so agents can see the underlying endpoint shape.')
}
@@ -191,9 +191,9 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
const itemtypeCount = $('[itemtype]').length
if (itempropCount >= 3 || itemtypeCount >= 1) {
score += 10
- findings.push({ type: 'found', message: `Microdata present (${itempropCount} itemprop, ${itemtypeCount} itemtype) — helps agents map semantic meaning.` })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.microdata.found', message: `Microdata present (${itempropCount} itemprop, ${itemtypeCount} itemtype) — helps agents map semantic meaning.` })
} else {
- findings.push({ type: 'info', message: 'Little or no microdata (itemprop / itemtype) found on the page.' })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.microdata.missing', message: 'Little or no microdata (itemprop / itemtype) found on the page.' })
}
// ── Form structural fallback (up to 25) ─────────────────────────────────
@@ -207,7 +207,7 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
})
if (candidateForms.length === 0) {
- findings.push({ type: 'info', message: 'No interactive forms detected on this page.' })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.forms.none', message: 'No interactive forms detected on this page.' })
} else {
const perFormScores: number[] = []
candidateForms.each((_, el) => {
@@ -218,12 +218,12 @@ export function analyzeAgentSkillExposure(context: AuditContext): AnalysisResult
score += formContribution
if (avg >= 80) {
- findings.push({ type: 'found', message: `${candidateForms.length} form(s) with strong agent-usable structure (labels, autocomplete, semantic types).` })
+ findings.push({ type: 'found', code: 'agent-skill-exposure.forms.strong', message: `${candidateForms.length} form(s) with strong agent-usable structure (labels, autocomplete, semantic types).` })
} else if (avg >= 40) {
- findings.push({ type: 'info', message: `${candidateForms.length} form(s) partially agent-usable. Average structure score ${Math.round(avg)}/100.` })
+ findings.push({ type: 'info', code: 'agent-skill-exposure.forms.partial', message: `${candidateForms.length} form(s) partially agent-usable. Average structure score ${Math.round(avg)}/100.` })
recommendations.push('Strengthen forms with aria-label /