Skip to content

Commit c5317c8

Browse files
committed
Re-added the YAML library to the find tags script.
Removed the YAML library as I didn't want to require the library but Norm pointed out it was already included in Docker container.
1 parent f0d1cae commit c5317c8

2 files changed

Lines changed: 29 additions & 25 deletions

File tree

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,18 @@ These scripts will:
123123
Categories are broad topic groupings and tags are specific topic labels for filtering. **Please use existing categories and tags when possible** to keep the taxonomy consistent. New ones can be added when truly needed.
124124

125125
To see all categories and tags currently in use, run:
126+
127+
**Bash (Linux/macOS)**:
126128
```bash
127129
python3 scripts/find_tags_categories.py
128130
```
129131

130-
This is also run automatically by the `create_post` scripts when scaffolding a new post.
132+
**PowerShell (Windows)**:
133+
```powershell
134+
python scripts/find_tags_categories.py
135+
```
136+
137+
This script requires `pyyaml`, which is included in `requirements.txt`. It is also run automatically by the `create_post` scripts when scaffolding a new post.
131138

132139
## Project Structure
133140

scripts/find_tags_categories.py

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,49 @@
44
Looks for both 'tags' and 'categories' keys (common variations).
55
"""
66

7+
import yaml
78
from pathlib import Path
8-
from typing import Set
99

1010
DOCS_DIR = Path(__file__).parent.parent / "docs"
1111
EXTENSIONS = (".md", ".markdown", ".mkd")
1212

13-
def extract_frontmatter(file_path: Path) -> dict:
14-
"""Extract tags and categories from YAML front matter using string parsing."""
13+
14+
def extract_frontmatter(file_path: Path) -> dict[str, list[str]]:
15+
"""Extract YAML front matter if present."""
1516
content = file_path.read_text(encoding="utf-8")
1617
if not content.startswith("---"):
1718
return {}
18-
parts = content.split("---", 2)
19-
if len(parts) < 3:
19+
try:
20+
parts = content.split("---", 2)
21+
if len(parts) < 3:
22+
return {}
23+
fm = yaml.safe_load(parts[1])
24+
if not isinstance(fm, dict):
25+
return {}
26+
return fm
27+
except yaml.YAMLError:
28+
print(f"Warning: Invalid YAML in {file_path}")
2029
return {}
21-
result: dict[str, list[str]] = {"tags": [], "categories": []}
22-
current_key = None
23-
for line in parts[1].splitlines():
24-
stripped = line.strip()
25-
if stripped in ("tags:", "categories:"):
26-
current_key = stripped[:-1]
27-
elif current_key and stripped.startswith("- "):
28-
result[current_key].append(stripped[2:].strip())
29-
else:
30-
current_key = None
31-
return result
32-
33-
def collect_tags() -> tuple[Set[str], Set[str]]:
34-
all_tags: Set[str] = set()
35-
all_categories: Set[str] = set()
30+
31+
32+
def collect_tags() -> tuple[set[str], set[str]]:
33+
all_tags = set()
34+
all_categories = set()
3635

3736
docs_path = Path(DOCS_DIR)
3837
if not docs_path.is_dir():
3938
print(f"Error: Directory not found: {docs_path}")
4039
return all_tags, all_categories
4140

4241
for file_path in docs_path.rglob("*"):
43-
if not file_path.is_file() or not file_path.suffix.lower() in EXTENSIONS:
42+
if not file_path.is_file() or file_path.suffix.lower() not in EXTENSIONS:
4443
continue
4544

4645
fm = extract_frontmatter(file_path)
4746

48-
# Handle 'tags'
4947
tags = fm.get("tags", [])
5048
all_tags.update(str(t).strip() for t in tags if t)
5149

52-
# Handle 'categories' (sometimes used instead / in addition)
5350
cats = fm.get("categories", [])
5451
all_categories.update(str(c).strip() for c in cats if c)
5552

@@ -61,7 +58,7 @@ def collect_tags() -> tuple[Set[str], Set[str]]:
6158
return all_tags, all_categories
6259

6360

64-
def main():
61+
def main() -> None:
6562
tags, categories = collect_tags()
6663

6764
print("\nExisting categories:")

0 commit comments

Comments
 (0)