Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"lint:external-links": "tsx ./scripts/lint-external-links.ts",
"lint:images": "tsx ./scripts/lint-images.ts",
"lint:code": "tsx ./scripts/lint-code-blocks.ts",
"test:llm-markdown": "tsx ./scripts/test-llm-markdown-fidelity.ts",
"audit:redirects": "node ./scripts/audit-redirects.mjs",
"audit:redirects:strict": "node ./scripts/audit-redirects.mjs --strict",
"lint:spellcheck": "cspell \"content/docs/**/*.mdx\" \"content/docs/**/*.json\" --show-context"
Expand Down
176 changes: 176 additions & 0 deletions apps/docs/scripts/test-llm-markdown-fidelity.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import { strict as assert } from "node:assert";
import { normalizeProcessedMarkdown } from "../src/lib/llm-markdown";

const rawComponentPattern =
/<(?:APIPage|CodeBlockTabs|CodeBlockTab|Tabs|Tab|Cards|Card|Accordions|Accordion|Youtube|Button|SharedContent|Steps|Step)\b/;

type SnapshotCase = {
name: string;
input: string;
expected: string;
};

const snapshots: SnapshotCase[] = [
{
name: "APIPage",
input: `<APIPage document={"management-api"} operations={[{"path":"/v1/example","method":"get"}]} />`,
expected: `## API reference

### GET /v1/example

\`GET /v1/example\``,
},
{
name: "CodeBlockTabs",
input: `<CodeBlockTabs groupId="package-manager">
<CodeBlockTabsList>
<CodeBlockTabsTrigger value="npm">npm</CodeBlockTabsTrigger>
<CodeBlockTabsTrigger value="pnpm">pnpm</CodeBlockTabsTrigger>
</CodeBlockTabsList>

<CodeBlockTab value="npm">
\`\`\`bash
npm install @prisma/client
\`\`\`
</CodeBlockTab>

<CodeBlockTab value="pnpm">
\`\`\`bash
pnpm add @prisma/client
\`\`\`
</CodeBlockTab>
</CodeBlockTabs>`,
expected: `#### npm

\`\`\`bash
npm install @prisma/client
\`\`\`

#### pnpm

\`\`\`bash
pnpm add @prisma/client
\`\`\``,
},
{
name: "manual Tabs",
input: `<Tabs items={["TypeScript", "JavaScript"]}>
<Tab value="TypeScript">
1. Create \`seed.ts\`.

\`\`\`ts
console.log("seed");
\`\`\`
</Tab>

<Tab value="JavaScript">
1. Create \`seed.js\`.
</Tab>
</Tabs>`,
expected: `#### TypeScript

1. Create \`seed.ts\`.

\`\`\`ts
console.log("seed");
\`\`\`

#### JavaScript

1. Create \`seed.js\`.`,
},
{
name: "admonition",
input: `<CalloutContainer type="warning">
<CalloutTitle>Before you continue</CalloutTitle>

Keep both paragraphs.

- Parent item
- Nested item
</CalloutContainer>`,
expected: `> [!WARNING]
> Before you continue
>
> Keep both paragraphs.
>
> - Parent item
> - Nested item`,
},
{
name: "Accordion and Youtube",
input: `<Accordions>
<Accordion title="Watch video: Multi-file Prisma schema">
<Youtube videoId="abc123" title="How to split your Prisma schema" />
</Accordion>
</Accordions>`,
expected: `### Watch video: Multi-file Prisma schema

[How to split your Prisma schema](https://www.youtube.com/watch?v=abc123)`,
},
{
name: "Cards",
input: `<Cards>
<Card href="/postgres/npx-create-db" title="Create a temporary database" icon={<Database className="text-primary" />}>
Provision a short-lived Prisma Postgres database.
</Card>

<Card href="/postgres/database/connecting-to-your-database" title="Connect to your database">
Choose the right connection string.
</Card>
</Cards>`,
expected: `- [Create a temporary database](/postgres/npx-create-db): Provision a short-lived Prisma Postgres database.

- [Connect to your database](/postgres/database/connecting-to-your-database): Choose the right connection string.`,
},
{
name: "Button",
input: `<Button asChild variant="ppg">
<a href="https://cursor.com/marketplace/prisma">Install the Prisma plugin for Cursor</a>
</Button>`,
expected: `[Install the Prisma plugin for Cursor](https://cursor.com/marketplace/prisma)`,
},
{
name: "SharedContent and Steps",
input: `<SharedContent>
Shared paragraph.
</SharedContent>

<Steps>
<Step title="Install">
Run the command.
</Step>
</Steps>`,
expected: `Shared paragraph.

### Install

Run the command.`,
},
];

function stripFencedCodeBlocks(markdown: string) {
return markdown.replace(/^([ \t]*)([`~]{3,})[^\n]*\n[\s\S]*?^\1\2\s*$/gm, "");
}

for (const snapshot of snapshots) {
const actual = normalizeProcessedMarkdown(snapshot.input);
assert.equal(actual, snapshot.expected, snapshot.name);
assert.equal(
rawComponentPattern.test(stripFencedCodeBlocks(actual)),
false,
`${snapshot.name} leaves raw MDX component JSX in markdown output`,
);
}

const codeFenceInput = `\`\`\`tsx
<Card>Keep component examples intact inside code fences.</Card>
\`\`\``;

assert.equal(
normalizeProcessedMarkdown(codeFenceInput),
codeFenceInput,
"code fences are preserved",
);

console.log(`LLM markdown fidelity snapshots passed (${snapshots.length + 1} cases).`);
60 changes: 1 addition & 59 deletions apps/docs/src/lib/get-llm-text.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { source } from "@/lib/source";
import { normalizeProcessedMarkdown } from "@/lib/llm-markdown";
import { getPageTitleText } from "@/lib/page-title";
import { getBaseUrl, withDocsBasePath } from "@/lib/urls";
import type { InferPageType } from "fumadocs-core/source";
Expand Down Expand Up @@ -172,65 +173,6 @@ function formatRelatedPages(relatedPages: RelatedPageLink[]) {
return `\n\n## Related pages\n\n${links}`;
}

function trimComponentContent(value: string) {
const lines = value.replace(/^\n+|\n+$/g, "").split("\n");
const indent = lines
.filter((line) => line.trim().length > 0)
.reduce((minimum, line) => Math.min(minimum, line.match(/^ */)?.[0].length ?? 0), Infinity);

return lines
.map((line) => (Number.isFinite(indent) ? line.slice(indent) : line))
.join("\n")
.trim();
}

function cleanCalloutContent(value: string) {
return trimComponentContent(value)
.replace(
/<Callout(?:Title|Description)>([\s\S]*?)<\/Callout(?:Title|Description)>/g,
(_match, content: string) => trimComponentContent(content),
)
.replace(/<\/?(?:CalloutTitle|CalloutDescription)>/g, "")
.replace(/^(?:[ \t]*\n)+|(?:\n[ \t]*)+$/g, "")
.split("\n")
.map((line) => line.replace(/[ \t]+$/g, ""))
.join("\n");
}

function formatCallout(type: string, content: string) {
const label = type.trim().toUpperCase() || "NOTE";
const text = cleanCalloutContent(content);
if (!text) return "";

return `> [!${label}]\n${text
.split("\n")
.map((line) => `> ${line}`)
.join("\n")}`;
}

function formatCodeBlockTab(value: string, content: string) {
const text = trimComponentContent(content);
if (!text) return "";

return `#### ${value.trim()}\n\n${text}`;
}

function normalizeProcessedMarkdown(markdown: string) {
return markdown
.replace(
/<CalloutContainer\s+type="([^"]+)"[^>]*>([\s\S]*?)<\/CalloutContainer>/g,
(_match, type: string, content: string) => formatCallout(type, content),
)
.replace(/<CodeBlockTabsList>[\s\S]*?<\/CodeBlockTabsList>/g, "")
.replace(
/<CodeBlockTab\s+value="([^"]+)"[^>]*>([\s\S]*?)<\/CodeBlockTab>/g,
(_match, value: string, content: string) => formatCodeBlockTab(value, content),
)
.replace(/<\/?CodeBlockTabs[^>]*>/g, "")
.replace(/\n{3,}/g, "\n\n")
.trim();
}

export async function getLLMText(page: DocsPage) {
const processed = normalizeProcessedMarkdown(await page.data.getText("processed"));
const breadcrumbLine = getBreadcrumbLine(page);
Expand Down
Loading
Loading