Skip to content

Commit d5c45d6

Browse files
robtaylorclaude
andcommitted
Add LLM discovery enhancements
- Add <script type="text/llms.txt"> to HTML pages for inline discovery (following Vercel's proposal for LLM instructions in HTML) - Reorganize llms.txt into logical sections: - Getting Started, User Guide, API Reference - Digital IP Library, Amaranth Language, Amaranth SoC - Optional (changelog, support) - Configurable section mappings via llms_sections config option 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 349d9bd commit d5c45d6

2 files changed

Lines changed: 165 additions & 0 deletions

File tree

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
'sphinx_design',
7575
'sphinx_llm.txt',
7676
'sphinx_json_index',
77+
'sphinx_llms_enhancements',
7778
]
7879

7980
rst_prolog = """

tools/sphinx_llms_enhancements.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
"""
2+
Sphinx extension to enhance LLM discoverability.
3+
4+
This extension adds:
5+
1. <script type="text/llms.txt"> tags to HTML pages for inline LLM discovery
6+
(following Vercel's proposal)
7+
2. Post-processing of llms.txt to organize pages into logical sections
8+
"""
9+
import re
10+
from pathlib import Path
11+
from sphinx.application import Sphinx
12+
from sphinx.util import logging
13+
14+
logger = logging.getLogger(__name__)
15+
16+
# Default section mappings - can be overridden in conf.py
17+
DEFAULT_SECTION_MAPPINGS = {
18+
"Getting Started": [
19+
r"^index\.html\.md$",
20+
r"^chipflow-lib/getting-started",
21+
r"^chipflow-lib/index",
22+
r"^tutorial",
23+
r"^examples/getting-started",
24+
],
25+
"User Guide": [
26+
r"^chipflow-lib/(?!autoapi)",
27+
r"^examples/",
28+
r"^configurator/",
29+
],
30+
"API Reference": [
31+
r"^chipflow-lib/autoapi/",
32+
r"^platform-api",
33+
],
34+
"Digital IP Library": [
35+
r"^chipflow-digital-ip/",
36+
],
37+
"Amaranth Language": [
38+
r"^amaranth/(?!.*soc)",
39+
],
40+
"Amaranth SoC": [
41+
r"^amaranth-soc/",
42+
],
43+
"Optional": [
44+
r"^amaranth/changes",
45+
r"^support",
46+
],
47+
}
48+
49+
50+
def add_llms_script_tag(app: Sphinx, pagename: str, templatename: str,
51+
context: dict, doctree) -> None:
52+
"""Add <script type="text/llms.txt"> to HTML pages."""
53+
54+
description = getattr(app.config, 'llms_txt_description', '')
55+
if not description:
56+
description = "Documentation for this project."
57+
description = description.strip()
58+
59+
project = app.config.project or "Documentation"
60+
61+
# Create the inline llms.txt content
62+
llms_script = f'''<script type="text/llms.txt">
63+
# {project}
64+
65+
> {description}
66+
67+
For complete documentation in LLM-friendly format:
68+
- [Documentation Index](/llms.txt) - Sitemap of all pages
69+
- [Full Documentation](/llms-full.txt) - Complete docs in one file
70+
- [JSON Index](/docs-index.json) - Structured metadata
71+
</script>'''
72+
73+
# Add to metatags which Sphinx/Furo will include in <head>
74+
if 'metatags' not in context:
75+
context['metatags'] = ''
76+
context['metatags'] = context.get('metatags', '') + llms_script
77+
78+
79+
def reorganize_llms_txt(app: Sphinx, exception) -> None:
80+
"""Post-process llms.txt to organize pages into logical sections."""
81+
if exception:
82+
return
83+
84+
outdir = Path(app.outdir)
85+
llms_txt_path = outdir / "llms.txt"
86+
87+
if not llms_txt_path.exists():
88+
logger.warning("llms.txt not found, skipping reorganization")
89+
return
90+
91+
# Read current llms.txt
92+
content = llms_txt_path.read_text(encoding='utf-8')
93+
94+
# Parse the header (everything before ## Pages)
95+
header_match = re.match(r'^(.*?)(?=^## Pages|\Z)', content, re.MULTILINE | re.DOTALL)
96+
if not header_match:
97+
logger.warning("Could not parse llms.txt header")
98+
return
99+
100+
header = header_match.group(1).strip()
101+
102+
# Extract all page links
103+
page_pattern = re.compile(r'^- \[([^\]]+)\]\(([^)]+)\)(?::\s*(.*))?$', re.MULTILINE)
104+
pages = [(m.group(1), m.group(2), m.group(3) or '') for m in page_pattern.finditer(content)]
105+
106+
if not pages:
107+
logger.warning("No pages found in llms.txt")
108+
return
109+
110+
# Get section mappings from config or use defaults
111+
section_mappings = getattr(app.config, 'llms_sections', DEFAULT_SECTION_MAPPINGS)
112+
113+
# Categorize pages into sections
114+
sections = {name: [] for name in section_mappings.keys()}
115+
sections["Other"] = [] # Catch-all
116+
117+
for title, path, description in pages:
118+
categorized = False
119+
for section_name, patterns in section_mappings.items():
120+
for pattern in patterns:
121+
if re.search(pattern, path):
122+
sections[section_name].append((title, path, description))
123+
categorized = True
124+
break
125+
if categorized:
126+
break
127+
if not categorized:
128+
sections["Other"].append((title, path, description))
129+
130+
# Build new llms.txt with sections
131+
new_content = header + "\n\n"
132+
133+
for section_name, section_pages in sections.items():
134+
if not section_pages:
135+
continue
136+
137+
new_content += f"## {section_name}\n\n"
138+
for title, path, description in section_pages:
139+
if description:
140+
new_content += f"- [{title}]({path}): {description}\n"
141+
else:
142+
new_content += f"- [{title}]({path})\n"
143+
new_content += "\n"
144+
145+
# Write reorganized llms.txt
146+
llms_txt_path.write_text(new_content.strip() + "\n", encoding='utf-8')
147+
logger.info(f"Reorganized llms.txt with {len(sections)} sections")
148+
149+
150+
def setup(app: Sphinx):
151+
# Config value for custom section mappings
152+
app.add_config_value('llms_sections', DEFAULT_SECTION_MAPPINGS, 'html')
153+
154+
# Add script tag to each HTML page
155+
app.connect('html-page-context', add_llms_script_tag)
156+
157+
# Reorganize llms.txt after build (run after sphinx-llm)
158+
app.connect('build-finished', reorganize_llms_txt, priority=900)
159+
160+
return {
161+
'version': '0.1',
162+
'parallel_read_safe': True,
163+
'parallel_write_safe': True,
164+
}

0 commit comments

Comments
 (0)