diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1927381 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/dist +/tools/__pycache__ diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..b6cba6d --- /dev/null +++ b/tools/README.md @@ -0,0 +1,64 @@ +# Tools + +This directory contains local build tooling for repository artifacts. + +## Kindle EPUB Builder + +`build_kindle_epub.py` converts the Markdown wiki into Kindle-compatible EPUB files. + +Run it from the repository root: + +```bash +python3 tools/build_kindle_epub.py +``` + +The script writes: + +- `dist/awesome-time-tracking.epub` - one all-in-one EPUB containing the README and every `details/*.md` page. +- `dist/split/Awesome-Time-Tracking-Time-Management-Methods.epub` +- `dist/split/Awesome-Time-Tracking-Personal-Productivity-Tools.epub` +- `dist/split/Awesome-Time-Tracking-Business-Time-Tracking-Software.epub` +- `dist/split/Awesome-Time-Tracking-Workforce-Payroll-and-Compliance.epub` +- `dist/split/Awesome-Time-Tracking-Industry-Specific-Time-Tracking.epub` +- `dist/split/Awesome-Time-Tracking-Calculators-Templates-and-Utilities.epub` + +Each generated book starts with a landing page that includes the repository URL: + +```text +https://github.com/ever-works/awesome-time-tracking +``` + +## Split Logic + +The split volumes are assigned from `README.md` section headings. For each `##` section, the builder classifies the heading using keyword groups defined in `VOLUMES` inside `build_kindle_epub.py`. + +Each linked `details/*.md` page is assigned to the first matching volume where it appears. This avoids duplicate chapters across the split books. Any detail page not linked from the README falls back to the utilities/templates volume. + +The beginning-of-book contents page for each split EPUB is grouped by the README sections that contributed articles to that volume. It only links to articles included in that EPUB. + +## Markdown Support + +The builder intentionally implements a small Markdown subset that matches this wiki: + +- headings +- unordered lists +- paragraphs +- fenced code blocks +- links +- inline code +- bold and italic text + +Images are converted to their alt text. Internal links to `details/*.md` are rewritten to the generated XHTML files inside the EPUB. + +## Validation + +Before writing each EPUB, the builder parses every generated XML/XHTML document with Python's XML parser. This catches invalid XML, mismatched tags, and disallowed characters such as null bytes. + +After generation, you can also check the EPUB container: + +```bash +unzip -t dist/awesome-time-tracking.epub +unzip -t dist/split/Awesome-Time-Tracking-Time-Management-Methods.epub +``` + +EPUB files can be sent to Kindle using Amazon's Send to Kindle flow. diff --git a/tools/build_kindle_epub.py b/tools/build_kindle_epub.py new file mode 100644 index 0000000..aa64159 --- /dev/null +++ b/tools/build_kindle_epub.py @@ -0,0 +1,490 @@ +#!/usr/bin/env python3 +"""Build a Kindle-compatible EPUB from this Markdown wiki.""" + +from __future__ import annotations + +import html +import mimetypes +import posixpath +import re +import uuid +import zipfile +from datetime import datetime, timezone +from pathlib import Path +from xml.etree import ElementTree + + +ROOT = Path(__file__).resolve().parents[1] +DETAILS = ROOT / "details" +OUT = ROOT / "dist" / "awesome-time-tracking.epub" +TITLE = "Awesome Time Tracking" +AUTHOR = "Ever Works" +REPO_URL = "https://github.com/ever-works/awesome-time-tracking" + +CSS = """ +body { + font-family: serif; + line-height: 1.45; + margin: 0 5%; +} +h1, h2, h3, h4 { + font-family: sans-serif; + line-height: 1.2; + page-break-after: avoid; +} +h1 { + font-size: 1.7em; +} +a { + color: inherit; +} +code, pre { + font-family: monospace; +} +pre { + white-space: pre-wrap; + border-left: 0.2em solid #999; + padding-left: 0.8em; +} +li { + margin: 0.25em 0; +} +""".strip() + +VOLUMES = [ + ( + "01-methods", + "Awesome Time Tracking: Time Management Methods", + ("method", "practice", "philosophy", "principle", "technique", "thought leader", "research", "statistic", "concept", "policy", "neuroscience", "time blocking", "student"), + ), + ( + "02-personal-productivity", + "Awesome Time Tracking: Personal Productivity Tools", + ("productivity", "personal", "pomodoro", "timer", "browser", "desktop", "mobile", "digital wellness", "health", "gtd", "accessibility", "virtual coworking", "work-life"), + ), + ( + "03-business-software", + "Awesome Time Tracking: Business Time Tracking Software", + ("software", "automatic", "automated", "analytics", "professional services", "project", "team", "business", "client", "commerce", "billing", "resource", "integration", "apis", "api", "cli", "open source", "developer", "web based", "work management", "work os"), + ), + ( + "04-workforce-compliance", + "Awesome Time Tracking: Workforce, Payroll, and Compliance", + ("attendance", "payroll", "workforce", "employee", "scheduling", "monitoring", "compliance", "overtime", "legal", "government", "hr", "privacy", "tax", "workplace", "global team", "remote work"), + ), + ( + "05-industry-specific", + "Awesome Time Tracking: Industry-Specific Time Tracking", + ("construction", "healthcare", "field", "equipment", "fleet", "industry", "creative agencies", "freelance", "freelancers", "nonprofit", "hardware", "physical time clocks", "mobile workforce"), + ), + ( + "06-utilities-templates", + "Awesome Time Tracking: Calculators, Templates, and Utilities", + ("utilities", "calculators", "calculator", "templates", "template", "time clock", "world clock", "meeting planner", "rate", "timesheet", "overview", "feature", "technology", "roi", "comparison", "criticism", "process", "tool", "others"), + ), +] + + +def epub_filename(title: str) -> str: + """Return a Kindle-friendly filename matching the book title.""" + safe = re.sub(r"[^A-Za-z0-9]+", "-", title).strip("-") + return f"{safe}.epub" + + +def slugify(value: str) -> str: + value = value.lower() + value = re.sub(r"<[^>]+>", "", value) + value = re.sub(r"[^a-z0-9]+", "-", value) + return value.strip("-") or "section" + + +def page_title(path: Path) -> str: + for line in path.read_text(encoding="utf-8").splitlines(): + if line.startswith("#"): + return line.lstrip("#").strip() + return path.stem.replace("-", " ").title() + + +def discover_detail_order(readme: str) -> tuple[list[Path], dict[Path, str]]: + seen: set[Path] = set() + ordered: list[Path] = [] + titles: dict[Path, str] = {} + for line in readme.splitlines(): + match = re.search(r"\]\((?:/)?details/([^)#]+\.md)(?:#[^)]+)?\)", line) + if not match: + continue + path = DETAILS / match.group(1) + if path.exists() and path not in seen: + seen.add(path) + ordered.append(path) + first_link = re.search(r"\[([^\]]+)\]\([^)]+\)", line) + if first_link: + titles[path] = re.sub(r"\s+", " ", first_link.group(1)).strip() + + for path in sorted(DETAILS.glob("*.md")): + if path not in seen: + ordered.append(path) + return ordered, titles + + +def detail_path_from_link(line: str) -> Path | None: + match = re.search(r"\]\((?:/)?details/([^)#]+\.md)(?:#[^)]+)?\)", line) + if not match: + return None + path = DETAILS / match.group(1) + return path if path.exists() else None + + +def parse_readme_sections(readme: str) -> list[tuple[str, list[str]]]: + sections: list[tuple[str, list[str]]] = [] + current_title: str | None = None + current_lines: list[str] = [] + + for line in readme.splitlines(): + heading = re.match(r"^##\s+(.+?)\s*$", line) + if heading: + if current_title is not None: + sections.append((current_title, current_lines)) + current_title = heading.group(1) + current_lines = [] + elif current_title is not None: + current_lines.append(line) + + if current_title is not None: + sections.append((current_title, current_lines)) + return sections + + +def classify_section(title: str) -> str: + normalized = title.lower() + for slug, _book_title, keywords in VOLUMES: + if any(keyword in normalized for keyword in keywords): + return slug + return "06-utilities-templates" + + +def split_details_by_volume(readme: str, all_details: list[Path]) -> dict[str, list[Path]]: + return { + slug: [path for _section, paths in section_list for path in paths] + for slug, section_list in split_sections_by_volume(readme, all_details).items() + } + + +def split_sections_by_volume(readme: str, all_details: list[Path]) -> dict[str, list[tuple[str, list[Path]]]]: + by_volume: dict[str, list[tuple[str, list[Path]]]] = {slug: [] for slug, _title, _keywords in VOLUMES} + assigned: set[Path] = set() + + for section_title, lines in parse_readme_sections(readme): + if section_title.startswith(("🔥", "📑", "🍺", "⭐", "™", "🛡")): + continue + slug = classify_section(section_title) + section_paths: list[Path] = [] + for line in lines: + path = detail_path_from_link(line) + if path and path not in assigned: + section_paths.append(path) + assigned.add(path) + if section_paths: + by_volume[slug].append((section_title, section_paths)) + + unassigned = [path for path in all_details if path not in assigned] + if unassigned: + by_volume["06-utilities-templates"].append(("Other Detail Pages", unassigned)) + + return by_volume + + +def inline_markdown(text: str) -> str: + placeholders: list[str] = [] + + def stash(value: str) -> str: + placeholders.append(value) + return f"HTMLPLACEHOLDER{len(placeholders) - 1}TOKEN" + + def link_repl(match: re.Match[str]) -> str: + label = inline_markdown(match.group(1)) + target = match.group(2) + href = rewrite_href(target) + return stash(f'{label}') + + text = html.escape(text) + text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", lambda m: m.group(1), text) + text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", link_repl, text) + text = re.sub(r"`([^`]+)`", lambda m: stash(f"{m.group(1)}"), text) + text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text) + text = re.sub(r"\*([^*]+)\*", r"\1", text) + text = re.sub(r"__([^_]+)__", r"\1", text) + text = re.sub(r"_([^_]+)_", r"\1", text) + for idx, value in enumerate(placeholders): + text = text.replace(f"HTMLPLACEHOLDER{idx}TOKEN", value) + return text + + +def rewrite_href(target: str) -> str: + if re.match(r"^[a-z][a-z0-9+.-]*:", target): + return target + if target.startswith("#"): + return target + + target = target.lstrip("/") + path, marker, fragment = target.partition("#") + if path == "README.md": + href = "readme.xhtml" + elif path.startswith("details/") and path.endswith(".md"): + href = f"details/{Path(path).stem}.xhtml" + else: + href = path + if marker: + href += f"#{fragment}" + return href + + +def markdown_to_body(markdown: str, *, title: str | None = None) -> str: + lines = markdown.splitlines() + out: list[str] = [] + list_stack: list[int] = [] + paragraph: list[str] = [] + in_code = False + code_lines: list[str] = [] + + def close_paragraph() -> None: + if paragraph: + out.append(f"

{inline_markdown(' '.join(paragraph))}

") + paragraph.clear() + + def close_lists(to_level: int = 0) -> None: + while len(list_stack) > to_level: + out.append("") + list_stack.pop() + + def heading_id(text: str) -> str: + base = slugify(text) + existing = {m.group(1) for m in re.finditer(r'id="([^"]+)"', "\n".join(out))} + if base not in existing: + return base + i = 1 + while f"{base}-{i}" in existing: + i += 1 + return f"{base}-{i}" + + if title: + out.append(f"

{html.escape(title)}

") + + for raw in lines: + line = raw.rstrip() + if line.startswith("```"): + close_paragraph() + close_lists() + if in_code: + out.append(f"
{html.escape(chr(10).join(code_lines))}
") + code_lines.clear() + in_code = False + else: + in_code = True + continue + if in_code: + code_lines.append(line) + continue + if not line.strip(): + close_paragraph() + close_lists() + continue + heading = re.match(r"^(#{1,6})\s+(.+)$", line) + if heading: + close_paragraph() + close_lists() + level = min(len(heading.group(1)), 6) + text = heading.group(2).strip() + out.append(f'{inline_markdown(text)}') + continue + item = re.match(r"^(\s*)[-*]\s+(.+)$", line) + if item: + close_paragraph() + level = len(item.group(1)) // 2 + 1 + while len(list_stack) < level: + out.append("