diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..776ee6e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: CI + +# Builds the site from the committed mirror/ and audits link health. +# This is the required status check for pull requests into main. +on: + pull_request: + branches: [main] + push: + branches: [main] + +permissions: + contents: read + +jobs: + build-and-audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: make install + + - name: Build dist/ + run: make dist + + - name: Audit link health + run: make audit diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml new file mode 100644 index 0000000..0a9be16 --- /dev/null +++ b/.github/workflows/deploy-pages.yml @@ -0,0 +1,55 @@ +name: Deploy to GitHub Pages + +# On every push to main (i.e. after a PR merges), build the site from the +# committed mirror/ and deploy dist/ to GitHub Pages. No archive.org crawl is +# needed — mirror/ is the committed source of truth. +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment; don't cancel an in-progress one. +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: make install + + - name: Build dist/ + run: make dist + + - name: Configure Pages + uses: actions/configure-pages@v5 + + - name: Upload dist/ artifact + uses: actions/upload-pages-artifact@v3 + with: + path: dist + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..cc3eb7f --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,45 @@ +name: Release + +# Triggered by a vX.Y.Z tag (created by `make release`). Builds the site, +# packages dist/ as a downloadable zip, and publishes a GitHub Release whose +# notes are the annotated tag's changelog. +on: + push: + tags: ["v*"] + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # need the annotated tag object for its changelog + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Install dependencies + run: make install + + - name: Build dist/ + run: make dist + + - name: Package dist/ as a zip + run: | + ( cd dist && zip -qr "../annotated-gd-lyrics-${GITHUB_REF_NAME}.zip" . ) + + - name: Publish GitHub Release + env: + GH_TOKEN: ${{ github.token }} + run: | + NOTES=$(git for-each-ref "refs/tags/${GITHUB_REF_NAME}" --format='%(contents)') + [ -z "$NOTES" ] && NOTES="Release ${GITHUB_REF_NAME}" + gh release create "${GITHUB_REF_NAME}" \ + --title "${GITHUB_REF_NAME}" \ + --notes "$NOTES" \ + "annotated-gd-lyrics-${GITHUB_REF_NAME}.zip" diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..7cdcc81 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,82 @@ +# AGENTS.md — conventions for this repo + +Conventions for anyone (human or AI agent) working in this repository. Keep +changes consistent with what's here; this file is the canonical reference and +is summarized in the README and CONTRIBUTING.md. + +## What this project is + +A faithful, offline preservation of David Dodd's 1990s *Annotated Grateful Dead +Lyrics*, recovered from the Internet Archive. The flow: + +``` +mirror/ ──build_site.py──► dist/ ──CI──► GitHub Pages +(committed source of truth) (generated) +``` + +## The golden rule: never hand-edit `mirror/` or `dist/` + +- **`mirror/`** is the byte-for-byte archived source of truth. Treat it as + read-only. It is only ever (re)written by `scripts/mirror.py`. +- **`dist/`** is generated by `make dist` and is gitignored. Never edit it. +- **All content/link fixes are expressed as code** in `scripts/build_site.py`, + so they are repeatable and reviewable. There are dedicated passes: + - `HTML_FIXES` — exact literal repairs for specific malformed source tags. + - `REDIRECTS` — dead internal links whose real target lives under another name. + - `ALT_LINKS` — known-dead external links → the generated `link-gone.html`. + - anchor repair — close-match fixing of broken `#fragments`. + When you fix a link, add to the right pass; don't patch output by hand. + +## Quality gate + +Every change must keep the link audit green: + +```bash +make dist && make audit # audit exits non-zero on real broken links +``` + +`make audit` is the required CI check on pull requests. It fails only on *real* +broken internal links or case-mismatches; defects already present in the 1990s +source (malformed fragments, never-created anchors) are reported but tolerated. + +## Commit messages — Conventional Commits (this drives releases) + +Releases are versioned automatically from commit history, so the prefix matters: + +| Prefix | Example | Release effect | +|--------|---------|----------------| +| `feat:` | `feat: add alt link for gdhour` | minor bump | +| `fix:` / `perf:` | `fix: repair broken biblio anchor` | patch bump | +| `feat!:` / `BREAKING CHANGE:` | `feat!: restructure dist layout` | major bump | +| `docs:` `chore:` `refactor:` `test:` `ci:` `build:` | — | no release | + +Use the imperative mood. Scope is optional, e.g. `fix(build): …`. + +## Pull request flow + +`main` is protected — **no direct pushes**. All changes go through a PR: + +1. Branch off `main`. +2. Make the change (in `scripts/`, docs, or workflows — never in `mirror/`/`dist/`). +3. `make dist && make audit` locally. +4. Open a PR; CI (`make install`/`dist`/`audit`) must pass. +5. Merge → `deploy-pages` publishes the updated site to GitHub Pages. + +## Releases + +- `make release-dryrun` previews the next version + changelog. +- `make release` computes the next semver from Conventional Commits since the + last `v*` tag, then creates and pushes an annotated `vX.Y.Z` tag (no commit to + `main`). The tag triggers `.github/workflows/release.yml`, which builds the + site, attaches `dist.zip`, and publishes a GitHub Release. + +## Common commands + +```bash +make install # uv sync +make mirror # (re)download the raw archive — rarely needed; ~30-45 min +make dist # build dist/ from mirror/ +make audit # link-health audit of dist/ +make serve-dist # serve dist/ at http://localhost:8000 +make all # mirror (if missing) -> dist -> audit -> serve +``` diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..30fca18 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,10 @@ +# CLAUDE.md + +This project's conventions for AI agents live in **[AGENTS.md](AGENTS.md)** — +read it first. Key points: + +- Never hand-edit `mirror/` (committed source of truth) or `dist/` (generated). + Express all link/content fixes as code in `scripts/build_site.py` passes. +- Keep the audit green: `make dist && make audit`. +- Use Conventional Commits (`feat:`/`fix:`/`feat!:`) — they drive releases. +- `main` is PR-protected; all changes go through a pull request. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5770e4c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,71 @@ +# Contributing + +Thanks for helping preserve *The Annotated Grateful Dead Lyrics*! This is a +small, focused project: a faithful offline mirror of the 1990s site, with link +fixes applied at build time. Contributions are welcome — especially repairing +links and adding good substitutes for dead external ones. + +See **[AGENTS.md](AGENTS.md)** for the full conventions; this is the short, +human-friendly version. + +## Setup + +```bash +make install # installs deps with uv +make dist # build the browsable site into dist/ +make serve-dist # view it at http://localhost:8000 +make audit # check link health +``` + +You do **not** need to run `make mirror` (the ~30–45 min archive crawl) — the +`mirror/` source is committed. + +## The one rule + +**Never edit `mirror/` or `dist/` by hand.** + +- `mirror/` is the byte-for-byte archived source of truth. +- `dist/` is generated and gitignored. + +All fixes are expressed as code in `scripts/build_site.py`, so they're +repeatable and reviewable. + +## Common contributions + +**Fix a broken internal link** (a page that exists under a different name): +add an entry to `REDIRECTS` in `scripts/build_site.py`. + +**Repair a malformed link in the source** (missing quote, typo'd tag): add an +exact literal `(bad, good)` entry to `HTML_FIXES`, keyed by the file. + +**Offer an alternative for a dead external link**: add an entry to `ALT_LINKS` +— it renders on the generated `link-gone.html` page. Please link to a real, +still-working resource (e.g. a Wikipedia article), not a guess. + +After any change: `make dist && make audit` must pass (the audit is the CI gate). + +## Commit messages + +We use [Conventional Commits](https://www.conventionalcommits.org/) — they +drive automatic versioning: + +- `feat: …` → minor release (e.g. `feat: add alt link for the Grateful Dead Hour`) +- `fix: …` / `perf: …` → patch release (e.g. `fix: repair broken biblio anchor`) +- `feat!: …` or a `BREAKING CHANGE:` footer → major release +- `docs:`, `chore:`, `refactor:`, `ci:`, `build:`, `test:` → no release + +## Pull requests + +`main` is protected, so: + +1. Branch off `main`. +2. Make your change and run `make dist && make audit`. +3. Open a PR. CI must pass before it can merge. +4. On merge, the site redeploys to GitHub Pages automatically. + +## A note on faithfulness + +This is a *preservation*. We fix links so the site is navigable, but we don't +rewrite the original authors' content, styling, or period HTML. When a link is +genuinely dead with no honest substitute, we leave it rather than invent a +destination. diff --git a/Makefile b/Makefile index 60f3269..aeb3888 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install mirror mirror-retry dist audit serve-dist all clean help +.PHONY: install mirror mirror-retry dist audit serve-dist all release release-dryrun clean help help: ## Show this help message @echo 'Usage: make [target]' @@ -48,6 +48,12 @@ all: ## Full pipeline: mirror (only if missing) -> build -> audit -> serve @$(MAKE) audit @$(MAKE) serve-dist +release: ## Tag a semver release from conventional commits (pushes tag, triggers CI release) + @./scripts/release.sh $(VERSION) + +release-dryrun: ## Preview the next release version + changelog without tagging + @./scripts/release.sh --dry-run $(VERSION) + clean: ## Remove build artifacts (dist/, logs, caches) — mirror/ is kept rm -rf dist/ rm -f mirror.log diff --git a/README.md b/README.md index 195707f..5709942 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ A self-contained, offline, **faithful preservation** of David Dodd's (`artsites.ucsc.edu/GDead/agdl/`), recovered from the Internet Archive and made fully browsable on its own, with the period HTML preserved byte-for-byte. +**Live site: https://ds17f.github.io/annotatedDead/** + > The original site is frozen/offline. This project rebuilds it from a single > archive.org snapshot (timestamp `20230806233010`) and fixes the links so it > works without the dead live domain. @@ -183,6 +185,8 @@ scripts/ mirror.py # the raw crawler (make mirror / mirror-retry) build_site.py # the cleanup build (make dist) audit_links.py # the link auditor (make audit) + release.sh # tag a semver release (make release) +.github/workflows/ # CI, Pages deploy, release automation Makefile # all commands — run `make help` .mirror_state/ # crawler resume state (gitignored) ``` @@ -191,6 +195,31 @@ Run `make help` for the full target list. --- +## Hosting & releases + +The site is hosted on **GitHub Pages** and deploys automatically: + +- **Every merge to `main`** runs CI (build + link audit) and, on success, + publishes the site to Pages (`.github/workflows/deploy-pages.yml`). The build + uses the committed `mirror/`, so no archive.org crawl happens in CI. +- **Releases are semver-tagged.** `make release` reads + [Conventional Commits](https://www.conventionalcommits.org/) since the last + `v*` tag, picks the next version, and pushes a `vX.Y.Z` tag. That triggers + `release.yml`, which builds the site, attaches `dist.zip`, and publishes a + GitHub Release. Preview first with `make release-dryrun`. + +## Contributing + +`main` is protected — all changes go through a pull request with passing CI. +The cardinal rule: **never hand-edit `mirror/` or `dist/`** — express link and +content fixes as code in `scripts/build_site.py` (`HTML_FIXES`, `REDIRECTS`, +`ALT_LINKS`, anchor repair), so they're repeatable and reviewable. + +See **[CONTRIBUTING.md](CONTRIBUTING.md)** for the workflow and +**[AGENTS.md](AGENTS.md)** for the full conventions (also what AI agents follow). + +--- + ## Content source & credits Content is *The Annotated Grateful Dead Lyrics* by **David Dodd**, originally diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 0000000..26c867d --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# +# Release script for the Annotated Grateful Dead Lyrics mirror. +# +# Git tags are the source of truth for versions. This script determines the +# next semantic version from Conventional Commits since the last v* tag, +# previews the changelog, then creates and pushes an annotated tag vX.Y.Z. +# Pushing the tag triggers .github/workflows/release.yml, which builds the +# site, attaches dist.zip, and publishes a GitHub Release. +# +# It deliberately does NOT commit to main (main is PR-protected) — the tag is +# all that is needed. +# +# Usage: +# scripts/release.sh # auto version from commits +# scripts/release.sh 1.2.3 # explicit version +# scripts/release.sh --dry-run # preview only, no tag created/pushed +# scripts/release.sh --dry-run 1.2.3 +# +# Version bump rules (Conventional Commits since the last tag): +# feat!: / fix!: / BREAKING CHANGE -> major +# feat: -> minor +# fix: / perf: -> patch +# (nothing user-facing) -> error, no release +# +set -euo pipefail + +GREEN='\033[0;32m'; BLUE='\033[0;34m'; YELLOW='\033[0;33m'; RED='\033[0;31m'; NC='\033[0m' + +DRY_RUN=false +VERSION="" +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=true ;; + -h|--help) sed -n '2,30p' "$0"; exit 0 ;; + *) VERSION="$arg" ;; + esac +done + +echo -e "${BLUE}🌹 Annotated GD Lyrics — release${NC}" + +# Last release tag, if any. +LAST_TAG=$(git describe --tags --abbrev=0 --match 'v*' 2>/dev/null || echo "") +if [ -z "$LAST_TAG" ]; then + RANGE="" + echo -e "${YELLOW}No previous tag — this is the first release.${NC}" +else + RANGE="${LAST_TAG}..HEAD" + echo -e "${BLUE}Last tag: ${LAST_TAG}${NC}" +fi + +# Determine the version to release. +if [ -n "$VERSION" ]; then + echo -e "${BLUE}Using explicit version: ${VERSION}${NC}" +elif [ -z "$LAST_TAG" ]; then + VERSION="0.1.0" + echo -e "${BLUE}Defaulting first release to ${VERSION}${NC}" +else + base=${LAST_TAG#v} + IFS='.' read -r MAJOR MINOR PATCH <<< "${base%%[-+]*}" + + subjects=$(git log "$RANGE" --pretty=format:'%s') + bodies=$(git log "$RANGE" --pretty=format:'%B') + feat_break=$(printf '%s\n' "$subjects" | grep -cE '^[a-z]+(\([^)]+\))?!:' || true) + body_break=$(printf '%s\n' "$bodies" | grep -cE 'BREAKING CHANGE' || true) + feats=$(printf '%s\n' "$subjects" | grep -cE '^feat(\([^)]+\))?:' || true) + fixes=$(printf '%s\n' "$subjects" | grep -cE '^(fix|perf)(\([^)]+\))?:' || true) + + echo -e "${BLUE}Since ${LAST_TAG}: ${feat_break}+${body_break} breaking, ${feats} feat, ${fixes} fix/perf${NC}" + + if [ $((feat_break + body_break)) -gt 0 ]; then + MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 + elif [ "$feats" -gt 0 ]; then + MINOR=$((MINOR + 1)); PATCH=0 + elif [ "$fixes" -gt 0 ]; then + PATCH=$((PATCH + 1)) + else + echo -e "${RED}No user-facing changes (feat/fix/perf) since ${LAST_TAG}. Nothing to release.${NC}" + exit 1 + fi + VERSION="${MAJOR}.${MINOR}.${PATCH}" + echo -e "${GREEN}Next version: ${VERSION}${NC}" +fi + +if ! [[ $VERSION =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$ ]]; then + echo -e "${RED}Invalid version '${VERSION}' (expected semver like 1.2.3).${NC}"; exit 1 +fi +TAG="v${VERSION}" +if git rev-parse "$TAG" >/dev/null 2>&1; then + echo -e "${RED}Tag ${TAG} already exists.${NC}"; exit 1 +fi + +# Build changelog body from Conventional Commits. +section() { + local pattern=$1 title=$2 + local lines + lines=$(git log $RANGE --pretty=format:'%s (%h)' | grep -E "$pattern" \ + | sed -E 's/^[a-z]+(\([^)]+\))?!?: //' || true) + if [ -n "$lines" ]; then + printf '### %s\n' "$title" + printf '%s\n' "$lines" | sed 's/^/- /' + printf '\n' + fi +} +CHANGELOG=$( + printf '## %s — %s\n\n' "$TAG" "$(date +%Y-%m-%d)" + section '^feat(\([^)]+\))?!?:' 'Features' + section '^fix(\([^)]+\))?!?:' 'Fixes' + section '^perf(\([^)]+\))?:' 'Performance' +) + +echo "" +echo -e "${BLUE}Changelog preview:${NC}" +echo "-------------------------------------" +printf '%s\n' "$CHANGELOG" +echo "-------------------------------------" + +if [ "$DRY_RUN" = true ]; then + echo -e "${YELLOW}DRY RUN — would create and push tag ${TAG}. No changes made.${NC}" + exit 0 +fi + +echo -e "${GREEN}Creating tag ${TAG}…${NC}" +git tag -a "$TAG" -m "$CHANGELOG" +git push origin "$TAG" +echo -e "${GREEN}Pushed ${TAG}. The release workflow will build the site and publish the GitHub Release.${NC}"