diff --git a/apps/docs/package.json b/apps/docs/package.json
index 24618f0beb..9d9b47bb12 100644
--- a/apps/docs/package.json
+++ b/apps/docs/package.json
@@ -16,6 +16,7 @@
"lint:external-links": "tsx ./scripts/lint-external-links.ts",
"lint:images": "tsx ./scripts/lint-images.ts",
"lint:code": "tsx ./scripts/lint-code-blocks.ts",
+ "test:llm-markdown": "tsx ./scripts/test-llm-markdown-fidelity.ts",
"audit:redirects": "node ./scripts/audit-redirects.mjs",
"audit:redirects:strict": "node ./scripts/audit-redirects.mjs --strict",
"lint:spellcheck": "cspell \"content/docs/**/*.mdx\" \"content/docs/**/*.json\" --show-context"
diff --git a/apps/docs/scripts/test-llm-markdown-fidelity.ts b/apps/docs/scripts/test-llm-markdown-fidelity.ts
new file mode 100644
index 0000000000..4db2db330f
--- /dev/null
+++ b/apps/docs/scripts/test-llm-markdown-fidelity.ts
@@ -0,0 +1,176 @@
+import { strict as assert } from "node:assert";
+import { normalizeProcessedMarkdown } from "../src/lib/llm-markdown";
+
+const rawComponentPattern =
+ /<(?:APIPage|CodeBlockTabs|CodeBlockTab|Tabs|Tab|Cards|Card|Accordions|Accordion|Youtube|Button|SharedContent|Steps|Step)\b/;
+
+type SnapshotCase = {
+ name: string;
+ input: string;
+ expected: string;
+};
+
+const snapshots: SnapshotCase[] = [
+ {
+ name: "APIPage",
+ input: ``,
+ expected: `## API reference
+
+### GET /v1/example
+
+\`GET /v1/example\``,
+ },
+ {
+ name: "CodeBlockTabs",
+ input: `
+
+ npm
+ pnpm
+
+
+
+ \`\`\`bash
+ npm install @prisma/client
+ \`\`\`
+
+
+
+ \`\`\`bash
+ pnpm add @prisma/client
+ \`\`\`
+
+`,
+ expected: `#### npm
+
+\`\`\`bash
+npm install @prisma/client
+\`\`\`
+
+#### pnpm
+
+\`\`\`bash
+pnpm add @prisma/client
+\`\`\``,
+ },
+ {
+ name: "manual Tabs",
+ input: `
+
+ 1. Create \`seed.ts\`.
+
+ \`\`\`ts
+ console.log("seed");
+ \`\`\`
+
+
+
+ 1. Create \`seed.js\`.
+
+`,
+ expected: `#### TypeScript
+
+1. Create \`seed.ts\`.
+
+ \`\`\`ts
+ console.log("seed");
+ \`\`\`
+
+#### JavaScript
+
+1. Create \`seed.js\`.`,
+ },
+ {
+ name: "admonition",
+ input: `
+ Before you continue
+
+ Keep both paragraphs.
+
+ - Parent item
+ - Nested item
+`,
+ expected: `> [!WARNING]
+> Before you continue
+>
+> Keep both paragraphs.
+>
+> - Parent item
+> - Nested item`,
+ },
+ {
+ name: "Accordion and Youtube",
+ input: `
+
+
+
+`,
+ expected: `### Watch video: Multi-file Prisma schema
+
+[How to split your Prisma schema](https://www.youtube.com/watch?v=abc123)`,
+ },
+ {
+ name: "Cards",
+ input: `
+ }>
+ Provision a short-lived Prisma Postgres database.
+
+
+
+ Choose the right connection string.
+
+`,
+ expected: `- [Create a temporary database](/postgres/npx-create-db): Provision a short-lived Prisma Postgres database.
+
+- [Connect to your database](/postgres/database/connecting-to-your-database): Choose the right connection string.`,
+ },
+ {
+ name: "Button",
+ input: ``,
+ expected: `[Install the Prisma plugin for Cursor](https://cursor.com/marketplace/prisma)`,
+ },
+ {
+ name: "SharedContent and Steps",
+ input: `
+ Shared paragraph.
+
+
+
+
+ Run the command.
+
+`,
+ expected: `Shared paragraph.
+
+### Install
+
+Run the command.`,
+ },
+];
+
+function stripFencedCodeBlocks(markdown: string) {
+ return markdown.replace(/^([ \t]*)([`~]{3,})[^\n]*\n[\s\S]*?^\1\2\s*$/gm, "");
+}
+
+for (const snapshot of snapshots) {
+ const actual = normalizeProcessedMarkdown(snapshot.input);
+ assert.equal(actual, snapshot.expected, snapshot.name);
+ assert.equal(
+ rawComponentPattern.test(stripFencedCodeBlocks(actual)),
+ false,
+ `${snapshot.name} leaves raw MDX component JSX in markdown output`,
+ );
+}
+
+const codeFenceInput = `\`\`\`tsx
+Keep component examples intact inside code fences.
+\`\`\``;
+
+assert.equal(
+ normalizeProcessedMarkdown(codeFenceInput),
+ codeFenceInput,
+ "code fences are preserved",
+);
+
+console.log(`LLM markdown fidelity snapshots passed (${snapshots.length + 1} cases).`);
diff --git a/apps/docs/src/lib/get-llm-text.ts b/apps/docs/src/lib/get-llm-text.ts
index d3a843f060..0a5ea9eef2 100644
--- a/apps/docs/src/lib/get-llm-text.ts
+++ b/apps/docs/src/lib/get-llm-text.ts
@@ -1,4 +1,5 @@
import { source } from "@/lib/source";
+import { normalizeProcessedMarkdown } from "@/lib/llm-markdown";
import { getPageTitleText } from "@/lib/page-title";
import { getBaseUrl, withDocsBasePath } from "@/lib/urls";
import type { InferPageType } from "fumadocs-core/source";
@@ -172,65 +173,6 @@ function formatRelatedPages(relatedPages: RelatedPageLink[]) {
return `\n\n## Related pages\n\n${links}`;
}
-function trimComponentContent(value: string) {
- const lines = value.replace(/^\n+|\n+$/g, "").split("\n");
- const indent = lines
- .filter((line) => line.trim().length > 0)
- .reduce((minimum, line) => Math.min(minimum, line.match(/^ */)?.[0].length ?? 0), Infinity);
-
- return lines
- .map((line) => (Number.isFinite(indent) ? line.slice(indent) : line))
- .join("\n")
- .trim();
-}
-
-function cleanCalloutContent(value: string) {
- return trimComponentContent(value)
- .replace(
- /([\s\S]*?)<\/Callout(?:Title|Description)>/g,
- (_match, content: string) => trimComponentContent(content),
- )
- .replace(/<\/?(?:CalloutTitle|CalloutDescription)>/g, "")
- .replace(/^(?:[ \t]*\n)+|(?:\n[ \t]*)+$/g, "")
- .split("\n")
- .map((line) => line.replace(/[ \t]+$/g, ""))
- .join("\n");
-}
-
-function formatCallout(type: string, content: string) {
- const label = type.trim().toUpperCase() || "NOTE";
- const text = cleanCalloutContent(content);
- if (!text) return "";
-
- return `> [!${label}]\n${text
- .split("\n")
- .map((line) => `> ${line}`)
- .join("\n")}`;
-}
-
-function formatCodeBlockTab(value: string, content: string) {
- const text = trimComponentContent(content);
- if (!text) return "";
-
- return `#### ${value.trim()}\n\n${text}`;
-}
-
-function normalizeProcessedMarkdown(markdown: string) {
- return markdown
- .replace(
- /]*>([\s\S]*?)<\/CalloutContainer>/g,
- (_match, type: string, content: string) => formatCallout(type, content),
- )
- .replace(/[\s\S]*?<\/CodeBlockTabsList>/g, "")
- .replace(
- /]*>([\s\S]*?)<\/CodeBlockTab>/g,
- (_match, value: string, content: string) => formatCodeBlockTab(value, content),
- )
- .replace(/<\/?CodeBlockTabs[^>]*>/g, "")
- .replace(/\n{3,}/g, "\n\n")
- .trim();
-}
-
export async function getLLMText(page: DocsPage) {
const processed = normalizeProcessedMarkdown(await page.data.getText("processed"));
const breadcrumbLine = getBreadcrumbLine(page);
diff --git a/apps/docs/src/lib/llm-markdown.ts b/apps/docs/src/lib/llm-markdown.ts
new file mode 100644
index 0000000000..8dad6a84d1
--- /dev/null
+++ b/apps/docs/src/lib/llm-markdown.ts
@@ -0,0 +1,449 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+type OpenApiSpec = {
+ paths?: Record>;
+};
+
+type OpenApiOperation = {
+ summary?: string;
+ description?: string;
+ parameters?: OpenApiParameter[];
+ requestBody?: {
+ content?: Record;
+ };
+ responses?: Record;
+};
+
+type OpenApiParameter = {
+ name?: string;
+ in?: string;
+ description?: string;
+ required?: boolean;
+ schema?: JsonSchema;
+};
+
+type JsonSchema = {
+ type?: string | string[];
+ description?: string;
+ default?: unknown;
+ enum?: unknown[];
+ properties?: Record;
+ required?: string[];
+ $ref?: string;
+};
+
+type ApiPageOperation = {
+ path?: string;
+ method?: string;
+};
+
+let openApiSpecCache: OpenApiSpec | null | undefined;
+
+function getAttribute(attrs: string, name: string) {
+ const pattern = new RegExp(
+ `${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|\\{\\s*"([^"]*)"\\s*\\}|\\{\\s*'([^']*)'\\s*\\})`,
+ );
+ const match = attrs.match(pattern);
+ return match?.slice(1).find((value) => value !== undefined);
+}
+
+function cleanInlineText(value: string | undefined) {
+ return value?.replace(/\s+/g, " ").trim();
+}
+
+function formatDefaultValue(value: unknown) {
+ if (value === undefined) return undefined;
+ return typeof value === "string" ? value : JSON.stringify(value);
+}
+
+function formatSchemaType(schema: JsonSchema | undefined) {
+ if (!schema) return undefined;
+ if (schema.$ref) return schema.$ref.split("/").at(-1);
+ if (Array.isArray(schema.type)) return schema.type.join(" | ");
+ if (schema.type) return schema.type;
+ if (schema.enum) return "enum";
+ if (schema.properties) return "object";
+ return undefined;
+}
+
+function getJsonSchema(content: OpenApiOperation["requestBody"] | undefined) {
+ return content?.content?.["application/json"]?.schema;
+}
+
+function loadOpenApiSpec() {
+ if (openApiSpecCache !== undefined) return openApiSpecCache;
+
+ for (const cachePath of [
+ join(process.cwd(), "cache", "openapi.json"),
+ join(process.cwd(), "apps/docs/cache/openapi.json"),
+ ]) {
+ try {
+ openApiSpecCache = JSON.parse(readFileSync(cachePath, "utf8")) as OpenApiSpec;
+ return openApiSpecCache;
+ } catch {}
+ }
+
+ openApiSpecCache = null;
+ return openApiSpecCache;
+}
+
+function getOpenApiOperation(path: string, method: string) {
+ const spec = loadOpenApiSpec();
+ return spec?.paths?.[path]?.[method.toLowerCase()];
+}
+
+function formatParameter(parameter: OpenApiParameter) {
+ const name = parameter.name ?? "parameter";
+ const location = parameter.in ? `${parameter.in}` : "parameter";
+ const required = parameter.required ? "required" : "optional";
+ const type = formatSchemaType(parameter.schema);
+ const details = [location, type, required].filter(Boolean).join(", ");
+ const description = cleanInlineText(parameter.description ?? parameter.schema?.description);
+ const suffix = description ? `: ${description}` : "";
+
+ return `- \`${name}\`${details ? ` (${details})` : ""}${suffix}`;
+}
+
+function formatRequestBody(operation: OpenApiOperation) {
+ const schema = getJsonSchema(operation.requestBody);
+ const properties = schema?.properties;
+ if (!properties) return "";
+
+ const required = new Set(schema.required ?? []);
+ const lines = Object.entries(properties).map(([name, property]) => {
+ const type = formatSchemaType(property);
+ const defaultValue = formatDefaultValue(property?.default);
+ const description = cleanInlineText(property?.description);
+ const details = [type, required.has(name) ? "required" : "optional"].filter(Boolean).join(", ");
+ const metadata = [
+ description,
+ defaultValue !== undefined ? `Default: \`${defaultValue}\`.` : undefined,
+ ].filter(Boolean);
+
+ return `- \`${name}\`${details ? ` (${details})` : ""}${metadata.length > 0 ? `: ${metadata.join(" ")}` : ""}`;
+ });
+
+ return lines.length > 0 ? `\n\n#### Request body\n\n${lines.join("\n")}` : "";
+}
+
+function formatResponses(operation: OpenApiOperation) {
+ const responses = operation.responses;
+ if (!responses) return "";
+
+ const lines = Object.entries(responses).map(([status, response]) => {
+ const description = cleanInlineText(response?.description);
+ return `- \`${status}\`${description ? `: ${description}` : ""}`;
+ });
+
+ return lines.length > 0 ? `\n\n#### Responses\n\n${lines.join("\n")}` : "";
+}
+
+function parseApiPageOperations(value: string): ApiPageOperation[] {
+ const match = value.match(/operations=\{\s*(\[[\s\S]*?\])\s*\}/);
+ if (!match) return [];
+
+ try {
+ const operations = JSON.parse(match[1]) as ApiPageOperation[];
+ return Array.isArray(operations) ? operations : [];
+ } catch {
+ return [];
+ }
+}
+
+function formatApiOperation(operation: ApiPageOperation) {
+ if (!operation.path || !operation.method) return "";
+
+ const method = operation.method.toUpperCase();
+ const apiOperation = getOpenApiOperation(operation.path, operation.method);
+ const summary = cleanInlineText(apiOperation?.summary);
+ const description = cleanInlineText(apiOperation?.description);
+ const parameters = apiOperation?.parameters ?? [];
+ const parameterText =
+ parameters.length > 0
+ ? `\n\n#### Parameters\n\n${parameters.map(formatParameter).join("\n")}`
+ : "";
+ const requestBodyText = apiOperation ? formatRequestBody(apiOperation) : "";
+ const responsesText = apiOperation ? formatResponses(apiOperation) : "";
+ const title = summary ? `### ${summary}` : `### ${method} ${operation.path}`;
+ const endpoint = `\`${method} ${operation.path}\``;
+
+ return `${title}\n\n${endpoint}${description ? `\n\n${description}` : ""}${parameterText}${requestBodyText}${responsesText}`;
+}
+
+function formatApiPage(value: string) {
+ const operations = parseApiPageOperations(value);
+ const text = operations.map(formatApiOperation).filter(Boolean).join("\n\n");
+
+ if (!text) return "## API reference\n\n_API reference details unavailable in markdown output._";
+
+ return `## API reference\n\n${text}`;
+}
+
+function trimComponentContent(value: string) {
+ const lines = value.replace(/^\n+|\n+$/g, "").split("\n");
+ const indent = lines
+ .filter((line) => line.trim().length > 0)
+ .reduce((minimum, line) => Math.min(minimum, line.match(/^ */)?.[0].length ?? 0), Infinity);
+
+ return lines
+ .map((line) => (Number.isFinite(indent) ? line.slice(indent) : line))
+ .join("\n")
+ .trim();
+}
+
+function cleanCalloutContent(value: string) {
+ return trimComponentContent(value)
+ .replace(
+ /([\s\S]*?)<\/Callout(?:Title|Description)>/g,
+ (_match, content: string) => trimComponentContent(content),
+ )
+ .replace(/<\/?(?:CalloutTitle|CalloutDescription)>/g, "")
+ .replace(/^(?:[ \t]*\n)+|(?:\n[ \t]*)+$/g, "")
+ .split("\n")
+ .map((line) => line.replace(/[ \t]+$/g, ""))
+ .join("\n");
+}
+
+function formatCallout(type: string, content: string) {
+ const labelMap: Record = {
+ danger: "CAUTION",
+ error: "CAUTION",
+ info: "NOTE",
+ note: "NOTE",
+ ppg: "NOTE",
+ success: "TIP",
+ tip: "TIP",
+ warn: "WARNING",
+ warning: "WARNING",
+ };
+ const label = labelMap[type.trim().toLowerCase()] ?? "NOTE";
+ const text = cleanCalloutContent(content);
+ if (!text) return "";
+
+ return `> [!${label}]\n${text
+ .split("\n")
+ .map((line) => `> ${line}`)
+ .join("\n")}`;
+}
+
+function formatCodeBlockTab(value: string, content: string) {
+ const text = trimComponentContent(content);
+ if (!text) return "";
+
+ return `#### ${value.trim()}\n\n${text}`;
+}
+
+function formatSectionComponent(attrs: string, content: string, fallbackTitle: string) {
+ const title = getAttribute(attrs, "title") ?? getAttribute(attrs, "value") ?? fallbackTitle;
+ const text = trimComponentContent(content);
+
+ return text ? `### ${title}\n\n${text}` : `### ${title}`;
+}
+
+function formatYoutube(attrs: string) {
+ const videoId = getAttribute(attrs, "videoId");
+ const title = getAttribute(attrs, "title") ?? "Watch video";
+ if (!videoId) return title;
+
+ return `[${title}](https://www.youtube.com/watch?v=${videoId})`;
+}
+
+function convertHtmlLinks(value: string) {
+ return value.replace(/]*)>([\s\S]*?)<\/a>/g, (_match, attrs: string, content: string) => {
+ const href = getAttribute(attrs, "href");
+ const label = trimComponentContent(content).replace(/\s+/g, " ");
+ return href ? `[${label}](${href})` : label;
+ });
+}
+
+function stripJsxTags(value: string) {
+ return convertHtmlLinks(value)
+ .replace(/<\/?[A-Z][A-Za-z0-9]*(?:\s[^>]*)?>/g, "")
+ .replace(/<\/?a(?:\s[^>]*)?>/g, "")
+ .replace(/\{["']\s*["']\}/g, " ")
+ .trim();
+}
+
+function formatCard(attrs: string, content: string) {
+ const title = getAttribute(attrs, "title") ?? "Card";
+ const href = getAttribute(attrs, "href");
+ const text = stripJsxTags(trimComponentContent(content)).replace(/\n+/g, " ");
+ const label = href ? `[${title}](${href})` : title;
+
+ return `- ${label}${text ? `: ${text}` : ""}`;
+}
+
+function formatButton(_attrs: string, content: string) {
+ return stripJsxTags(trimComponentContent(content));
+}
+
+function findOpeningTagEnd(value: string, startIndex: number) {
+ let quote: string | undefined;
+ let braceDepth = 0;
+
+ for (let index = startIndex; index < value.length; index++) {
+ const char = value[index];
+ const previous = value[index - 1];
+
+ if (quote) {
+ if (char === quote && previous !== "\\") quote = undefined;
+ continue;
+ }
+
+ if (char === '"' || char === "'") {
+ quote = char;
+ continue;
+ }
+
+ if (char === "{") {
+ braceDepth++;
+ continue;
+ }
+
+ if (char === "}" && braceDepth > 0) {
+ braceDepth--;
+ continue;
+ }
+
+ if (char === ">" && braceDepth === 0) return index;
+ }
+
+ return -1;
+}
+
+function isComponentTag(value: string, index: number, name: string) {
+ const next = value[index + name.length + 1];
+ return value.startsWith(`<${name}`, index) && !/[A-Za-z0-9]/.test(next ?? "");
+}
+
+function replaceComponentBlocks(
+ markdown: string,
+ name: string,
+ format: (attrs: string, content: string) => string,
+) {
+ let result = "";
+ let cursor = 0;
+
+ while (cursor < markdown.length) {
+ const start = markdown.indexOf(`<${name}`, cursor);
+ if (start === -1) {
+ result += markdown.slice(cursor);
+ break;
+ }
+
+ if (!isComponentTag(markdown, start, name)) {
+ result += markdown.slice(cursor, start + 1);
+ cursor = start + 1;
+ continue;
+ }
+
+ const openingEnd = findOpeningTagEnd(markdown, start);
+ if (openingEnd === -1) {
+ result += markdown.slice(cursor);
+ break;
+ }
+
+ const openingTag = markdown.slice(start, openingEnd + 1);
+ const attrs = openingTag
+ .replace(new RegExp(`^<${name}\\b`), "")
+ .replace(/\/?>$/, "")
+ .trim();
+ const isSelfClosing = openingTag.replace(/\s+$/, "").endsWith("/>");
+
+ result += markdown.slice(cursor, start);
+
+ if (isSelfClosing) {
+ result += format(attrs, "");
+ cursor = openingEnd + 1;
+ continue;
+ }
+
+ const closingTag = `${name}>`;
+ const closingStart = markdown.indexOf(closingTag, openingEnd + 1);
+ if (closingStart === -1) {
+ result += openingTag;
+ cursor = openingEnd + 1;
+ continue;
+ }
+
+ result += format(attrs, markdown.slice(openingEnd + 1, closingStart));
+ cursor = closingStart + closingTag.length;
+ }
+
+ return result;
+}
+
+function protectFencedCodeBlocks(markdown: string) {
+ const blocks: string[] = [];
+ const protectedMarkdown = markdown.replace(
+ /^([ \t]*)([`~]{3,})[^\n]*\n[\s\S]*?^\1\2\s*$/gm,
+ (match) => {
+ const token = `__LLM_FENCED_CODE_BLOCK_${blocks.length}__`;
+ blocks.push(match);
+ return token;
+ },
+ );
+
+ return {
+ markdown: protectedMarkdown,
+ restore(value: string) {
+ return blocks.reduce(
+ (text, block, index) => text.replace(`__LLM_FENCED_CODE_BLOCK_${index}__`, block),
+ value,
+ );
+ },
+ };
+}
+
+export function normalizeProcessedMarkdown(markdown: string) {
+ const componentMarkdown = markdown
+ .replace(/\{\/\*[\s\S]*?\*\/\}/g, "")
+ .replace(
+ /]*>([\s\S]*?)<\/CalloutContainer>/g,
+ (_match, type: string, content: string) => formatCallout(type, content),
+ )
+ .replace(/[\s\S]*?<\/CodeBlockTabsList>/g, "")
+ .replace(
+ /]*>([\s\S]*?)<\/CodeBlockTab>/g,
+ (_match, value: string, content: string) => formatCodeBlockTab(value, content),
+ )
+ .replace(/<\/?CodeBlockTabs[^>]*>/g, "")
+ .replace(
+ /]*>([\s\S]*?)<\/Tab>/g,
+ (_match, value: string, content: string) => formatCodeBlockTab(value, content),
+ )
+ .replace(//g, "")
+ .replace(/<\/?(?:Tabs|TabsContent)[^>]*>/g, "")
+ .replace(
+ /]*)>([\s\S]*?)<\/Accordion>/g,
+ (_match, attrs: string, content: string) =>
+ formatSectionComponent(attrs, content, "Accordion"),
+ )
+ .replace(/<\/?Accordions[^>]*>/g, "")
+ .replace(/]*)>([\s\S]*?)<\/Step>/g, (_match, attrs: string, content: string) =>
+ formatSectionComponent(attrs, content, "Step"),
+ )
+ .replace(/<\/?Steps[^>]*>/g, "")
+ .replace(/]*>([\s\S]*?)<\/SharedContent>/g, (_match, content: string) =>
+ trimComponentContent(content),
+ )
+ .replace(/]*\/>/g, "");
+
+ const protectedCode = protectFencedCodeBlocks(componentMarkdown);
+ const withoutJsxComponents = replaceComponentBlocks(
+ replaceComponentBlocks(protectedCode.markdown, "Card", formatCard)
+ .replace(/<\/?Cards[^>]*>/g, "")
+ .replace(//g, (match: string) => formatApiPage(match))
+ .replace(//g, (_match, attrs: string) => formatYoutube(attrs)),
+ "Button",
+ formatButton,
+ );
+
+ return protectedCode
+ .restore(withoutJsxComponents)
+ .replace(/^[ \t]+(#{3,4} )/gm, "$1")
+ .replace(/^[ \t]+(- \[)/gm, "$1")
+ .replace(/\n{3,}/g, "\n\n")
+ .trim();
+}