diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml index d79f6e1f..2e1707f1 100644 --- a/.github/workflows/deploy-staging.yml +++ b/.github/workflows/deploy-staging.yml @@ -3,6 +3,18 @@ name: Deploy Staging Branch on: push: branches: [ staging ] + paths-ignore: + # Ignore changes that don't affect the site + - 'src/assignments/**' + - 'scripts/**' + - 'tests/**' + - 'private/**' + - '**.md' + - '.github/**' + - 'data/**' + - 'pyproject.toml' + - 'pytest.ini' + - 'uv.lock' jobs: deploy: diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f3e353a5..99d73cfb 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,6 +3,18 @@ name: Deploy to Production on: push: branches: [ main ] + paths-ignore: + # Ignore changes that don't affect the site + - 'src/assignments/**' + - 'scripts/**' + - 'tests/**' + - 'private/**' + - '**.md' + - '.github/**' + - 'data/**' + - 'pyproject.toml' + - 'pytest.ini' + - 'uv.lock' jobs: deploy: diff --git a/.github/workflows/shadow-tester.yml b/.github/workflows/shadow-tester.yml new file mode 100644 index 00000000..1fef1a47 --- /dev/null +++ b/.github/workflows/shadow-tester.yml @@ -0,0 +1,139 @@ +name: Shadow Tester + +on: + repository_dispatch: + types: [run-shadow-tests] + +permissions: + contents: read + pull-requests: write # Needed to comment on PRs + +jobs: + shadow-test: + runs-on: ubuntu-latest + steps: + - name: Checkout Private Repo + uses: actions/checkout@v4 + with: + repository: crheckman/private-vla-foundations + token: ${{ secrets.PRIVATE_REPO_TOKEN }} # PAT with access to private repo + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install Python Dependencies + run: | + pip install pytest torch numpy + + - name: Fetch Student Code from Public PR + env: + PR_NUMBER: ${{ github.event.client_payload.pr_number }} + HEAD_BRANCH: ${{ github.event.client_payload.head_branch }} + HEAD_SHA: ${{ github.event.client_payload.head_sha }} + REPO_URL: ${{ github.event.client_payload.repo_url }} + run: | + echo "Fetching student code from PR #${PR_NUMBER}" + + # Clone the public repo + git clone https://github.com/arpg/vla-foundations.git /tmp/public-repo + cd /tmp/public-repo + + # Fetch the PR branch + git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER} + git checkout pr-${PR_NUMBER} + + # Copy student code to our testing directory + # Copy src/assignments to the current repo + if [ -d "src/assignments" ]; then + cp -r src/assignments/* $GITHUB_WORKSPACE/src/assignments/ || true + fi + + echo "Student code fetched successfully" + + - name: Run Internal Rigorous Tests + id: tests + continue-on-error: true + run: | + # Run pytest with internal tests + pytest tests/internal/ -v --tb=short --maxfail=5 > test_output.txt 2>&1 + TEST_EXIT_CODE=$? + + # Capture output + cat test_output.txt + + # Save exit code for later + echo "exit_code=${TEST_EXIT_CODE}" >> $GITHUB_OUTPUT + + # Exit with the actual test result + exit $TEST_EXIT_CODE + + - name: Prepare Test Summary + if: always() + id: summary + run: | + if [ -f test_output.txt ]; then + # Extract summary from pytest output + SUMMARY=$(tail -20 test_output.txt | grep -E "(PASSED|FAILED|ERROR)" || echo "Test execution completed") + + # Escape newlines for GitHub output + SUMMARY="${SUMMARY//$'\n'/'%0A'}" + echo "summary=${SUMMARY}" >> $GITHUB_OUTPUT + else + echo "summary=No test output available" >> $GITHUB_OUTPUT + fi + + - name: Comment on Public PR - Pass + if: steps.tests.outcome == 'success' + uses: peter-evans/create-or-update-comment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} # Default token works for same repo + repository: arpg/vla-foundations + issue-number: ${{ github.event.client_payload.pr_number }} + body: | + ## ✅ Shadow CI: Internal Tests Passed + + Your submission passed all internal rigorous tests! + +
+ Test Summary + + ``` + ${{ steps.summary.outputs.summary }} + ``` + +
+ + --- + *These are hidden internal tests run by the instructor. Your code meets the required standards.* + + - name: Comment on Public PR - Fail + if: steps.tests.outcome == 'failure' + uses: peter-evans/create-or-update-comment@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} # Default token works for same repo + repository: arpg/vla-foundations + issue-number: ${{ github.event.client_payload.pr_number }} + body: | + ## ❌ Shadow CI: Internal Tests Failed + + Your submission did not pass all internal tests. Please review the feedback and make necessary corrections. + +
+ Test Summary + + ``` + ${{ steps.summary.outputs.summary }} + ``` + +
+ + ### Next Steps: + 1. Review the test failures above + 2. Make corrections to your code + 3. Push updates to your PR branch + 4. Tests will automatically re-run + + --- + *These are hidden internal tests run by the instructor. Contact @crheckman if you need clarification on the failures.* diff --git a/.github/workflows/vla-audit.yml b/.github/workflows/vla-audit.yml index b8237a36..afa5a0d5 100644 --- a/.github/workflows/vla-audit.yml +++ b/.github/workflows/vla-audit.yml @@ -42,7 +42,15 @@ jobs: ### Common Issues: - **1. Semantic Line Breaks** + **1. Required Frontmatter Fields** + - Every audit MDX file must include these fields: + - `title`: Paper title + - `author`: Paper author(s) + - `topic`: Research topic/category + - `paper`: Link to paper or citation + - All fields must have non-empty values (no placeholders like "TBD" or "TODO") + + **2. Semantic Line Breaks** - Each sentence should be on its own line - This makes PR commenting and reviewing much easier - Example: @@ -53,7 +61,7 @@ jobs: + This makes PR review much easier. ``` - **2. Clean Git History** + **3. Clean Git History** - No "Merge branch 'main'" commits allowed - Use `git rebase main` instead of `git merge main` - Keep your commit history linear and clean @@ -144,22 +152,3 @@ jobs: --- *This preview will be removed when the PR is closed.* - - trigger-shadow-tests: - runs-on: ubuntu-latest - if: github.event_name == 'pull_request' && github.base_ref == 'staging' - needs: audit - steps: - - name: Trigger Shadow CI in Private Repo - uses: peter-evans/repository-dispatch@v2 - with: - token: ${{ secrets.PRIVATE_DISPATCH_TOKEN }} - repository: crheckman/private-vla-foundations - event-type: run-shadow-tests - client-payload: | - { - "pr_number": "${{ github.event.pull_request.number }}", - "head_branch": "${{ github.event.pull_request.head.ref }}", - "head_sha": "${{ github.event.pull_request.head.sha }}", - "repo_url": "${{ github.event.pull_request.head.repo.clone_url }}" - } diff --git a/.gitignore b/.gitignore index 3519214c..4eea2148 100644 --- a/.gitignore +++ b/.gitignore @@ -53,15 +53,13 @@ __pycache__/ # project-specific /arxiv-digest/ -# Private repo files - do not commit to public -private/ -tests/ -scripts/_sanitize_todos.py -scripts/manage_solutions.py -scripts/sanitize.sh -scripts/setup_private_repo.sh -scripts/add_github_secret.sh -pytest.ini -PRIVATE_REPO_SETUP.md -SETUP_WITH_GH_CLI.md -QUICK_REFERENCE.md + +# Private solution infrastructure (NEVER commit to public branches) +*.backup.py + +# Claude Code skill outputs (generated reports) +.claude/releases/*.md +.claude/sync-reports/*.md +!.claude/releases/.gitkeep +!.claude/sync-reports/.gitkeep +!tests/internal/reports/.gitkeep diff --git a/README.md b/README.md index 323ea9e7..f83a27ee 100644 --- a/README.md +++ b/README.md @@ -83,103 +83,6 @@ git push --force-with-lease --- -## Repository Structure - -``` -vla-foundations/ -├── app/ # Next.js App Router (web framework) -│ ├── page.tsx # Landing page -│ ├── textbook/[slug]/ # Dynamic chapter pages -│ ├── course/ # Course overview page -│ │ └── assignments/[slug]/ # Dynamic assignment pages -│ └── contributors/[slug]/ # Dynamic contributor profile pages -│ -├── content/ # All MDX content (rendered as web pages) -│ ├── textbook/ # 8-chapter VLA textbook -│ │ ├── foundations/ # Chapter 0: Core concepts -│ │ ├── architectures/ # Chapter 1: Model designs -│ │ ├── data/ # Chapter 2: Dataset construction -│ │ ├── training/ # Chapter 3: Optimization methods -│ │ ├── evaluation/ # Chapter 4: Metrics and benchmarks -│ │ ├── deployment/ # Chapter 5: Production systems -│ │ ├── applications/ # Chapter 6: Real-world use cases -│ │ └── future/ # Chapter 7: Open problems -│ │ -│ ├── course/ # Course materials -│ │ ├── Syllabus.mdx # Course syllabus -│ │ ├── assignments/ # Assignment specifications -│ │ └── submissions/ # Student submission reports -│ │ -│ └── contributors/ # Contributor profiles -│ └── [github-handle].mdx # One profile per contributor -│ -└── src/ # Executable source code - └── assignments/ # Assignment code templates - └── scratch-1/ # Example: Transformer implementation - ├── README.md # Minimal README - ├── backbone.py # Implementation template with TODOs - └── generate_data.py # Dataset generator script -``` - ---- - -## The 8-Chapter Textbook - -0. **Foundations** - Core concepts and problem formulation -1. **Architectures** - Model designs and network topologies -2. **Data** - Dataset construction and curation strategies -3. **Training** - Optimization and fine-tuning methods -4. **Evaluation** - Metrics and benchmarking protocols -5. **Deployment** - Production systems and scaling -6. **Applications** - Real-world use cases and case studies -7. **Future Directions** - Open problems and research frontiers - ---- - -## Development Workflow - -### Initial Setup - -```bash -# Clone the repository -git clone https://github.com/arpg/vla-foundations.git -cd vla-foundations - -# Install dependencies -pnpm install - -# Run development server -pnpm dev -``` - -Navigate to `http://localhost:3000` to see the site. - -### Local Build - -```bash -# Build the static site -pnpm build - -# Preview the production build -pnpm start -``` - ---- - -## Technologies - -### Core -- **Next.js 16**: Static site generation -- **TypeScript**: Type safety -- **Tailwind CSS**: Styling -- **MDX**: Markdown with JSX - -### Content Processing -- **remark-math** + **rehype-katex**: LaTeX rendering -- **remark-gfm**: GitHub-flavored Markdown - ---- - ## Resources ### Documentation diff --git a/app/textbook/audits/[...slug]/page.tsx b/app/textbook/audits/[...slug]/page.tsx index ceef3934..2660ad55 100644 --- a/app/textbook/audits/[...slug]/page.tsx +++ b/app/textbook/audits/[...slug]/page.tsx @@ -89,14 +89,22 @@ export default async function AuditPage({ params }: PageProps) { // Get chapters for sidebar const chapters = getAllChapters(); + // Determine if we're in review mode based on environment + const isReviewMode = isStaging && process.env.STAGING_PR_NUMBER !== undefined; + const prNumber = process.env.STAGING_PR_NUMBER; + return ( - + ← Back to Audits - {/* Staging Banner */} - {isStaging && ( + {/* Staging Banner (shown when in staging but not in review mode) */} + {isStaging && !isReviewMode && (

⚠️ DRAFT AUDIT - UNDER REVIEW

diff --git a/claude.md b/claude.md new file mode 100644 index 00000000..6837d90b --- /dev/null +++ b/claude.md @@ -0,0 +1,644 @@ +# VLA Foundations Development Guide for AI SWE Agents (Private Repo) + +This is the **private instructor repository** for VLA Foundations, containing complete assignment solutions, internal grading tests, and instructor operations. The public student-facing repository is at `arpg/vla-foundations`. This repo uses **Next.js (App Router)** for the textbook, **Tailwind CSS** for styling, **MDX** for content, and **pnpm** for package management. + +Read more about the dual-repository architecture in [INSTRUCTOR.md](INSTRUCTOR.md). + +--- + +## Repository Architecture + +This is a **two-repository system**: + +``` +Private Repo (crheckman/private-vla-foundations) +├── private/ # Complete assignment solutions (NEVER PUBLIC) +│ └── solutions/ +├── tests/internal/ # Internal grading tests (NEVER PUBLIC) +│ ├── fixtures/ # Gold standard test data +│ └── reports/ # Grading reports (git-ignored) +├── scripts/ +│ ├── dev_utils.py # Solution management (inject/reset/verify-clean) +│ ├── sanitize.sh # Automated sanitization pipeline +│ └── _sanitize_todos.py # TODO comment sanitizer +├── .claude/ +│ ├── skills/ # Claude Code skills for automation +│ └── commands/ # Slash command shortcuts +└── src/assignments/ # Starter code with [SOLUTION] hints + + ↓ (Orphan push on release tag) + +Public Repo (arpg/vla-foundations) +├── src/assignments/ # Starter code (TODOs only) +├── tests/public/ # Student-visible tests +├── content/ # Textbook and assignment specs +└── [NO private/ or tests/internal/] +``` + +**Critical**: Never commit `private/` or `tests/internal/` to public branches. + +--- + +## Initial Setup + +### Prerequisites +```bash +# Install dependencies +pnpm install + +# Install uv (Python package manager) - REQUIRED +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install Python dependencies via uv +uv sync + +# Install GitHub CLI (required for skills) +brew install gh +gh auth login +``` + +### Python Environment (uv) +**All Python commands MUST use `uv run`** to ensure correct dependencies: +```bash +# Run Python scripts +uv run python scripts/dev_utils.py --list + +# Run pytest +uv run pytest tests/internal/ -v -m rigor + +# Run any Python file +uv run python src/assignments/scratch-1/generate_data.py +``` + +### Development +```bash +# Run development server +pnpm dev + +# Build production (static export in out/) +pnpm build + +# Lint Next.js +pnpm lint +``` + +--- + +## Claude Code Skills (Automation) + +This repository has **7 Claude Code skills** for workflow automation. See [.claude/skills/README.md](.claude/skills/README.md) for complete documentation. + +### Core Skills + +#### `/vla-guard` - Solution Leak Audit +**Purpose**: Prevent solution leaks before any public operation + +**Usage**: +```bash +/vla-guard +``` + +**What it does**: +- Scans for `[SOLUTION]` markers in `src/` and `content/` +- Verifies `private/` and `tests/internal/` not staged +- Checks git history for accidental commits +- Runs `dev_utils.py --verify-clean` (similarity detection) +- **Blocks** sync if any check fails + +**When to use**: Before every push, PR, or release + +--- + +#### `/test-rigor` - Internal Grading Tests +**Purpose**: Run internal grading tests with automatic solution injection/reset + +**Usage**: +```bash +/test-rigor +# Select: "Scratch-1" / "Scratch-2" / "All" +``` + +**What it does**: +1. Injects solutions: `python3 scripts/dev_utils.py --inject ` +2. Runs pytest: `pytest tests/internal/ -v -m rigor` +3. Generates report: `tests/internal/reports/test-report-.txt` +4. Resets to starter code: `python3 scripts/dev_utils.py --reset ` + +**Safe to run multiple times** - always resets after completion. + +--- + +#### `/generate-fixtures` - Gold Standard Fixtures +**Purpose**: Generate reference data for fidelity tests from solution code + +**Usage**: +```bash +/generate-fixtures +# Select assignment +``` + +**What it does**: +1. Injects solutions +2. Sets fixed random seeds (seed=42) +3. Runs solution code to generate outputs +4. Saves to `tests/internal/fixtures//gold_output.pt` +5. Verifies no NaNs +6. Generates fixture documentation +7. Resets to starter code + +**When to use**: After completing solution implementation or updating solution code + +--- + +#### `/grade` - Automated PR Grading +**Purpose**: Complete grading workflow for student pull requests + +**Usage**: +```bash +/grade +# Enter PR number or auto-detect latest +``` + +**What it does**: +1. Fetches student code from GitHub PR +2. Runs VLA Guard on student code (detect plagiarism/leaks) +3. Runs `tests/public/` (student-visible tests) +4. Injects reference solution +5. Runs `tests/internal/` (gradient leak, fidelity, training tests) +6. Restores student code +7. Generates detailed markdown feedback report +8. Posts comment on PR (optional) +9. Updates PR labels (ready-to-merge / needs-revision / changes-requested) + +**Output**: `tests/internal/reports/grade-pr.md` + +**When to use**: When reviewing student submissions + +--- + +#### `/release` - Safe Assignment Publishing +**Purpose**: Orchestrate complete release workflow with comprehensive safety checks + +**Usage**: +```bash +/release +# Select: "Scratch-1" / "Scratch-2" / etc. +``` + +**What it does**: +1. Verifies on main branch, no uncommitted changes +2. Runs `/vla-guard` pre-flight audit (fail-fast) +3. Prompts for release tag (e.g., `release-scratch-2`) +4. Shows changes since last release +5. Runs `scripts/sanitize.sh` (removes private/, [SOLUTION] markers, etc.) +6. Verifies sanitization (fail-safe) +7. Creates annotated git tag +8. Pushes tag → triggers `.github/workflows/sync-to-public.yml` +9. Monitors GitHub Actions workflow execution +10. Verifies public repository (no leaks) +11. Checks deployment status (https://www.vlm-robotics.dev) +12. Generates release summary: `.claude/releases/release-.md` + +**Fail-safe**: Aborts at ANY failed check, provides remediation instructions + +**When to use**: When ready to publish assignment to students + +--- + +#### `/new-assignment` - Assignment Scaffolding +**Purpose**: Create complete assignment structure with templates + +**Usage**: +```bash +/new-assignment +# Enter name, type, focus, difficulty +``` + +**What it does**: +1. Creates directory structure: + - `src/assignments//` (starter code with TODOs) + - `private/solutions//` (solution templates) + - `tests/public/test__basic.py` (student-visible tests) + - `tests/internal/test__rigor.py` (grading tests) + - `content/course/assignments/.mdx` (assignment spec) +2. Generates Python templates +3. Generates test templates +4. Creates README files + +**Next steps after scaffolding**: +1. Complete solution implementations +2. Run `/generate-fixtures` +3. Update MDX spec +4. Run `/test-rigor` +5. Commit changes +6. Run `/release` + +--- + +#### `/sync-check` - Post-Release Verification +**Purpose**: Verify public repository has no leaks after release sync + +**Usage**: +```bash +/sync-check +# Select: "Latest" or specify release tag +``` + +**What it does**: +1. Clones public repo to temp directory (read-only) +2. Scans for `[SOLUTION]` markers +3. Checks for private directories (`private/`, `tests/internal/`) +4. Checks for private scripts (`dev_utils.py`, `sanitize.sh`) +5. Checks for sensitive files (credentials, `*_solution.py`) +6. Verifies orphan push strategy (no linked git history) +7. Compares file lists (private vs public) +8. Checks deployment status (HTTPS 200) +9. Runs sample fidelity check +10. Generates verification report: `.claude/sync-reports/sync-check-.md` +11. Cleans up temp files + +**When to use**: Always run after `/release` completes + +**Critical**: If leaks detected, report provides urgent remediation steps + +--- + +## Commands Useful in Development + +### Solution Management +```bash +# List all available solutions +uv run python scripts/dev_utils.py --list + +# Inject solutions for testing/grading +uv run python scripts/dev_utils.py --inject scratch-1 + +# Reset to starter code +uv run python scripts/dev_utils.py --reset scratch-1 + +# Verify no solution leaks (similarity check) +uv run python scripts/dev_utils.py --verify-clean +``` + +### Testing +```bash +# Run public tests (students can see these) +uv run pytest tests/public/ -v + +# Run internal grading tests (after injecting solutions) +uv run pytest tests/internal/ -v -m rigor + +# Run specific test file +uv run pytest tests/internal/test_scratch1_rigor.py -v + +# Generate HTML report +uv run pytest tests/internal/ --html=tests/internal/reports/report.html --self-contained-html +``` + +### Pre-Release Checks +```bash +# Complete pre-flight check +/pre-flight + +# Or manually: +uv run python scripts/dev_utils.py --verify-clean +bash scripts/sanitize.sh # (Only in orphan branch workflow) +``` + +### GitHub Operations +```bash +# List open student PRs +gh pr list --base staging --state open + +# View PR details +gh pr view 123 + +# Comment on PR +gh pr comment 123 --body "Feedback here" + +# Merge PR +gh pr merge 123 --squash +``` + +--- + +## Linting and Formatting + +### Semantic Line Breaks +**All MDX files MUST use one sentence per line.** This is mandatory to allow granular, line-by-line feedback in Pull Requests. + +**Bad:** +```markdown +This is a very long sentence with multiple ideas. It continues on the same line. This makes PR review difficult. +``` + +**Good:** +```markdown +This is a sentence on its own line. +Each idea gets its own line. +This makes PR review much easier. +``` + +### LaTeX +Use formal LaTeX for all mathematical derivations: +```markdown +The loss function is: +$$ +\mathcal{L} = -\sum_{t=1}^T \log p(a_t | s_t, I_t) +$$ +``` + +Do not use code blocks for math. + +### Next.js Linting +```bash +pnpm lint +``` + +--- + +## Testing Philosophy + +### Public Tests (`tests/public/`) +**Purpose**: Student-visible validation tests + +**What they test**: +- Basic model structure (initialization, shapes) +- Forward pass correctness (no NaNs, correct dimensions) +- Gradient flow (backpropagation works) + +**Students can run these**: `pytest tests/public/test_scratch1_basic.py -v` + +### Internal Tests (`tests/internal/`) +**Purpose**: Rigorous grading tests (NEVER synced to public) + +**What they test**: +- **Gradient Leak Test**: Verify frozen parameters (e.g., DINOv2 backbone) +- **Latent Fidelity Test**: Compare output against gold standard fixtures +- **Training Convergence Test**: Verify model can train and loss decreases +- **Edge Case Tests**: Boundary conditions, error handling + +**Markers**: +- `@pytest.mark.internal` - All internal tests +- `@pytest.mark.rigor` - Strict grading tests +- `@pytest.mark.gradient` - Gradient flow tests +- `@pytest.mark.fidelity` - Output comparison tests +- `@pytest.mark.training` - Training convergence tests + +**Run with**: `pytest tests/internal/ -v -m rigor` + +--- + +## Interacting with the App + +### Local Development +```bash +pnpm dev +# Access at http://localhost:3000 +``` + +### Staging Previews +Every Pull Request to `staging` branch triggers deployment to: +``` +https://vlm-robotics.dev/staging/pulls/[PR_NUMBER]/ +``` + +**Review Protocol**: +1. Read the rendered audit on the staging site +2. Comment on the **source MDX** in GitHub "Files Changed" tab +3. Use the **Rich Diff** view in GitHub to verify LaTeX rendering + +### Production +Production site deployed at: +``` +https://www.vlm-robotics.dev +``` + +Deployment triggered by: +- Push to `main` branch (after staging → main merge) +- GitHub Action: `.github/workflows/deploy.yml` +- Deploys to ristoffer.ch via SSH + +--- + +## Patterns & Standards + +### Amazon Principle +We do not write "summaries." We write rigorous, durable **Audits**. A high-fidelity audit IS the textbook chapter. + +### Textbook Audit Sidebars +Every audit MUST contain these three technical sidebars: + +1. **The Lineage of Failure**: Why previous approaches died +2. **Intuitive Derivation**: The geometric/mathematical intuition of the loss function +3. **Implementation Gotchas**: Practitioners' notes on coordinate frames, normalization, or hyperparameters + +### The Interface Focus +When auditing VLA models, focus on the **Interface**: +- **Input Projection**: Pixels → Tokens +- **Action Head**: Tokens → Trajectories +- **The Loss/Objective Function** + +### Git Hygiene +We are a **rebase-only** lab. Use `git rebase main`. PRs containing "Merge branch 'main'" commits will be closed. + +**Correct workflow**: +```bash +git fetch origin +git rebase origin/main +git push --force-with-lease +``` + +### Sanitization +All private solutions are marked with `[SOLUTION]` tags: +```python +# TODO: Implement RMSNorm forward pass +# [SOLUTION] Use torch.rsqrt for efficiency +result = torch.rsqrt(variance + self.eps) +``` + +The sanitization pipeline: +1. `scripts/_sanitize_todos.py` - Removes `[SOLUTION]` markers +2. `scripts/sanitize.sh` - Orchestrates full cleanup (private dirs, scripts, README) +3. Triggered automatically by `.github/workflows/sync-to-public.yml` on release tags + +**Load-bearing wall**: `scripts/sanitize.sh` is the primary defense against solution leaks. + +### Orphan Push Strategy +When syncing to public repo, we use **orphan branches** to break all git history links: + +```bash +git checkout --orphan temp-public-branch +git add -A +git commit -m "Public Release: $(date)" +git push public temp-public-branch:main --force +``` + +**Benefits**: +- No commit history from private repo exposed +- Public repo has completely independent git history +- Maximum security against accidental leaks via `git log` + +--- + +## File Map of Interest + +### GitHub Actions +- [.github/workflows/sync-to-public.yml](.github/workflows/sync-to-public.yml) - Automated sync to public repo (orphan push) +- [.github/workflows/shadow-tester.yml](.github/workflows/shadow-tester.yml) - Shadow CI for student PRs +- [.github/workflows/deploy.yml](.github/workflows/deploy.yml) - Production deployment to ristoffer.ch + +### Configuration +- [next.config.ts](next.config.ts) - Next.js config with dynamic routing for staging +- [pytest.ini](pytest.ini) - pytest markers configuration +- [tailwind.config.ts](tailwind.config.ts) - Tailwind CSS configuration + +### Scripts +- [scripts/dev_utils.py](scripts/dev_utils.py) - Solution management (inject/reset/verify-clean) +- [scripts/sanitize.sh](scripts/sanitize.sh) - Complete sanitization pipeline +- [scripts/_sanitize_todos.py](scripts/_sanitize_todos.py) - TODO comment sanitizer + +### Claude Code Skills +- [.claude/skills/](/.claude/skills/) - All skill definitions +- [.claude/skills/README.md](.claude/skills/README.md) - Comprehensive skills documentation +- [.claude/commands/](.claude/commands/) - Command shortcuts + +### Components +- [components/audit/AuditLayout.tsx](components/audit/AuditLayout.tsx) - Primary wrapper for rendered textbook chapters + +### Testing +- [tests/conftest.py](tests/conftest.py) - pytest fixtures (auto-inject for internal tests) +- [tests/public/](tests/public/) - Student-visible tests +- [tests/internal/](tests/internal/) - Internal grading tests + +### Documentation +- [INSTRUCTOR.md](INSTRUCTOR.md) - Complete instructor guide (consolidated) +- [SKILLS_COMPLETE.md](SKILLS_COMPLETE.md) - Skills implementation summary +- [REFACTOR_COMPLETE.md](REFACTOR_COMPLETE.md) - Repository hardening summary + +--- + +## Typical Workflows + +### Creating a New Assignment +```bash +# 1. Scaffold structure +/new-assignment + +# 2. Implement solutions +# Edit: private/solutions/scratch-3/model_solution.py + +# 3. Generate fixtures +/generate-fixtures + +# 4. Update spec +# Edit: content/course/assignments/scratch-3.mdx + +# 5. Test grading +/test-rigor + +# 6. Commit +git add . && git commit -m "feat: add scratch-3 assignment" + +# 7. Release +/release + +# 8. Verify +/sync-check +``` + +### Grading Student Work +```bash +# 1. List PRs +gh pr list --base staging + +# 2. Grade PR +/grade + +# 3. Review report +cat tests/internal/reports/grade-pr123.md + +# 4. Merge if approved +gh pr merge 123 --squash +``` + +### Pre-Release Checklist +```bash +# 1. Audit +/vla-guard + +# 2. Pre-flight (audit + sanitize) +/pre-flight + +# 3. Release +/release + +# 4. Verify +/sync-check +``` + +--- + +## Shadow CI + +Student PRs to the public repo trigger **Shadow CI** - hidden testing with internal grading suite: + +1. Student opens PR to `arpg/vla-foundations` (public) +2. Public `.github/workflows/vla-audit.yml` triggers `repository_dispatch` to private repo +3. Private `.github/workflows/shadow-tester.yml` runs: + - Fetches student code + - Injects solutions + - Runs internal tests + - Posts Pass/Fail comment on public PR (no details) +4. Instructor uses `/grade` for detailed feedback + +**Purpose**: Catch critical failures early without exposing grading logic. + +--- + +## Security Boundaries + +### NEVER Sync to Public +- `private/` directory (complete solutions) +- `tests/internal/` directory (grading tests) +- `scripts/dev_utils.py` (solution management) +- `scripts/sanitize.sh` (sanitization script) +- `scripts/_sanitize_todos.py` (helper script) +- `.claude/` directory (instructor automation) +- Files with `[SOLUTION]` markers + +### Multi-Layer Protection +1. **Pre-commit hook** - Blocks commits with `[SOLUTION]` in public files +2. **VLA Guard skill** - Scans for leaks before operations +3. **Sanitization pipeline** - Removes private content automatically +4. **Post-sanitization validation** - Fail-safe check in GitHub Actions +5. **Orphan push** - Breaks git history links +6. **Sync-check skill** - Verifies public repo after release + +--- + +## Requirements + +- **Node.js** 18+ +- **pnpm** 8+ +- **Python** 3.11+ +- **uv** (Python package manager): `curl -LsSf https://astral.sh/uv/install.sh | sh` +- **gh CLI** (for skills): `brew install gh && gh auth login` + +Python dependencies (managed by uv via `pyproject.toml`): +- pytest, pytest-html +- torch +- numpy + +--- + +## Support + +- **Instructor Guide**: [INSTRUCTOR.md](INSTRUCTOR.md) +- **Skills Documentation**: [.claude/skills/README.md](.claude/skills/README.md) +- **Public Repo**: https://github.com/arpg/vla-foundations +- **Course Website**: https://www.vlm-robotics.dev + +--- + +**Remember**: This is the private instructor repository. Always run `/vla-guard` before any public-facing operation. diff --git a/components/audit/AuditLayout.tsx b/components/audit/AuditLayout.tsx index a44c8163..703df8e6 100644 --- a/components/audit/AuditLayout.tsx +++ b/components/audit/AuditLayout.tsx @@ -13,15 +13,44 @@ interface Chapter { interface AuditLayoutProps { children: ReactNode; chapters: Chapter[]; + isReviewMode?: boolean; + prNumber?: string; } -export function AuditLayout({ children, chapters }: AuditLayoutProps) { +export function AuditLayout({ children, chapters, isReviewMode = false, prNumber }: AuditLayoutProps) { return (

+ {/* Review Mode Banner */} + {isReviewMode && ( +
+
+
+ + + + +
+
+

+ 👁️ REVIEW MODE +

+

+ You are viewing a preview of this audit. This content is under review and not yet published. +

+ {prNumber && ( +

+ Preview from PR #{prNumber} +

+ )} +
+
+
+ )} +
{children}
diff --git a/content/course/assignments/capstone.mdx b/content/course/assignments/capstone.mdx index 43b7f82c..4adf8be1 100644 --- a/content/course/assignments/capstone.mdx +++ b/content/course/assignments/capstone.mdx @@ -5,10 +5,6 @@ due: 'Week 16' points: 300 --- -
-

⚠️ DRAFT: NOT YET ASSIGNED

-

This assignment is still under review and subject to change. Do not begin work until this notice is removed.

-
# Capstone Project: Textbook Contribution & Implementation diff --git a/content/course/assignments/scratch-1.mdx b/content/course/assignments/scratch-1.mdx index 2b1f2bce..cdb5636d 100644 --- a/content/course/assignments/scratch-1.mdx +++ b/content/course/assignments/scratch-1.mdx @@ -5,11 +5,6 @@ due: 'Sunday, February 1, 9:00 AM MST' points: 100 --- -
-

⚠️ DRAFT: NOT YET ASSIGNED

-

This assignment is still under review and subject to change. Do not begin work until this notice is removed.

-
- # Scratch-1: The Transformer Backbone **Focus**: Implementing the $O(1)$ engine of the VLA stack. @@ -187,7 +182,9 @@ When I removed the causal mask, the following happened: ### Pass Level (B): 70-89 points - ✅ Successful implementation of the backbone -- ✅ Loss converges on the synthetic dataset (< 1.0) +- ✅ Loss shows clear convergence (appreciable decrease from initial loss) + - Expected: Initial loss ~3-4, Final loss ~1.9-2.2 + - Model should demonstrate learning, not achieve arbitrary threshold - ✅ Attention maps visualization included - ✅ Causal mask audit completed - ✅ Code is clean and documented @@ -307,6 +304,13 @@ A: Check: 2. Is the learning rate too high? (Try 1e-4) 3. Are gradients exploding? (Enable gradient clipping) +**Q: What loss should I expect?** +A: With correct implementation on the synthetic trajectory dataset: +- **Initial loss**: ~3-4 (near random guessing for 256-way classification) +- **Final loss**: ~1.9-2.2 (showing clear learning) +- **Key metric**: Appreciable decrease indicating the model learns patterns +- The action encoding represents direction + magnitude toward target, which is learnable but not trivial + ## 11. Deadline **Due**: Sunday, February 1, 9:00 AM MST diff --git a/content/course/submissions/scratch-1/images_tr/attn_map.png b/content/course/submissions/scratch-1/images_tr/attn_map.png new file mode 100644 index 00000000..22448e4f Binary files /dev/null and b/content/course/submissions/scratch-1/images_tr/attn_map.png differ diff --git a/content/course/submissions/scratch-1/images_tr/attn_map_wo_mask.png b/content/course/submissions/scratch-1/images_tr/attn_map_wo_mask.png new file mode 100644 index 00000000..5ed453ab Binary files /dev/null and b/content/course/submissions/scratch-1/images_tr/attn_map_wo_mask.png differ diff --git a/content/course/submissions/scratch-1/images_tr/loss_curve-sin.png b/content/course/submissions/scratch-1/images_tr/loss_curve-sin.png new file mode 100644 index 00000000..3ed24f2b Binary files /dev/null and b/content/course/submissions/scratch-1/images_tr/loss_curve-sin.png differ diff --git a/content/course/submissions/scratch-1/images_tr/loss_curve.png b/content/course/submissions/scratch-1/images_tr/loss_curve.png new file mode 100644 index 00000000..f9be9cb3 Binary files /dev/null and b/content/course/submissions/scratch-1/images_tr/loss_curve.png differ diff --git a/content/course/submissions/scratch-1/images_tr/loss_curve_without_mask.png b/content/course/submissions/scratch-1/images_tr/loss_curve_without_mask.png new file mode 100644 index 00000000..11b38974 Binary files /dev/null and b/content/course/submissions/scratch-1/images_tr/loss_curve_without_mask.png differ diff --git a/content/course/submissions/scratch-1/thanushraam.mdx b/content/course/submissions/scratch-1/thanushraam.mdx new file mode 100644 index 00000000..affdda95 --- /dev/null +++ b/content/course/submissions/scratch-1/thanushraam.mdx @@ -0,0 +1,296 @@ +--- +title: "Scratch-1 Submission: Thanushraam Suresh Kumar" + +student: "Thanushraam Suresh Kumar" + +date: "2026-02-01" +--- + +# Scratch-1: The Transformer Backbone + +## Loss Curve + +![Training Loss](/content/course/submissions/scratch-1/images_tr/loss_curve.png) + +The model converged after 2820 iterations or 10 epochs with final training loss of 1.9652 and validation loss of 1.9769. + +## Attention Visualization + +![Attention Maps](/content/course/submissions/scratch-1/images_tr/attn_map.png) + +The attention map visualization shows clear lower-triangular structure which means that causal mask I applied was successful. +Unlike the unmasked case where attention patterns appear scattered and tokens can attend to future positions, the masked model +restricts each token to attending only to itself and previous tokens. A strong diagonal band shows that tokens place high weight +on themselves and very recent context, while attention weights gradually decrease as distance from the diagonal increases. +This fading pattern reflects a recency bias, which is expected in sequential modeling tasks. + +## The Audit: Removing the Causal Mask + +When comparing training behavior with and without the causal mask, the difference in loss dynamics is huge. Without the mask, +the training loss drops from an initial value of approximately 5.5 all the way down to ~0.05. This extremely low final loss +indicates that the model is effectively seeing the ground-truth future tokens during training, allowing it to trivially copy +the next token rather than predict it. Without the mask, attention is unrestricted, meaning token t can directly attend to +tokens t+1, t+2, and beyond. Since the training objective is next-token prediction, the model can simply read the ground-truth +answer from the future position and copy it. This creates information leakage, turning the task into trivial reconstruction +rather than autoregressive modeling. + +![Loss Curve](/content/course/submissions/scratch-1/images_tr/loss_curve_without_mask.png) + +![Attention Maps](/content/course/submissions/scratch-1/images_tr/attn_map_wo_mask.png) + +The attention visualization confirms this behavior. Instead of a lower-triangular structure, attention spreads across future +positions, showing that tokens strongly attend to positions to their right. + +### Why the Model "Cheats" +Autoregressive models rely on the principle that each token must be predicted using only past context. +The causal mask enforces this by restricting attention to previous positions. Removing it breaks this constraint, +allowing the model to access the exact token it is supposed to predict. The resulting low loss therefore reflects +shortcut learning, not genuine sequence understanding. + +## Extras +### KV Cache Inference +To evaluate the impact of KV caching I ran inference on two different input and compared the inference times. +First, for a prompt length of 50 and 500 generated tokens, the no-cache configuration +averaged 2139.95 ms (ran 5 times), while with KV cache it averaged 1612.92 ms, yielding a 1.33× speedup. +For a longer rollout with prompt length 200 and 1000 generated tokens, the no-cache version averaged 4206.06 ms compared to +3234.18 ms with KV cache, corresponding to a 1.30× speedup. In both cases, KV cache consistently reduced inference time, +and the absolute time savings increased with longer sequences (about 0.53 seconds saved in the shorter setting versus +nearly 1 second saved in the longer one), this demonstrates that caching becomes increasingly beneficial as generation length grows. + +### Sinusoidal vs ROPE +I tried to implement Sinusoidal and compare it with RoPE in terms of training and validation loss along with their inference time for +two different context length. +At short context length (prompt = 50), sinusoidal positional encoding showed slightly lower inference time (1769.65 ms) +compared to RoPE (2078.67 ms). However, RoPE achieved better modeling performance, reaching a lower validation loss (1.9678) +than sinusoidal encoding (2.0447) and converging faster during training. +The sinusoidal loss curve shows slower convergence and consistently higher loss than RoPE +![loss curve sinusoidal](/content/course/submissions/scratch-1/images_tr/loss_curve-sin.png) + +At longer context length (prompt = 200), RoPE outperformed sinusoidal encoding in both modeling quality and efficiency. +RoPE reached a validation loss of 1.9651, compared to 2.0348 for sinusoidal encoding. In inference without caching, RoPE +required 2089.14 ms, while sinusoidal encoding took 2782.25 ms (≈ 1.33× slower). + +## Code Highlights + +### Efficient QKV Projection and Multi-Head Reshaping + +Instead of separate layers for Q, K, and V, a single projection is used and then split: + +``` +qkv = self.qkv_proj(x) # (B, T, 3D) +q, k, v = qkv.chunk(3, dim=-1) + +q = q.reshape(B, T, H, Hd).transpose(1, 2) # (B, H, T, Hd) +k = k.reshape(B, T, H, Hd).transpose(1, 2) +v = v.reshape(B, T, H, Hd).transpose(1, 2) +``` +--- + +### Rotary Positional Embeddings (RoPE) + +Positional information is injected by **rotating Q and K**, rather than adding absolute position vectors: + +``` +cos = self.cos_cached[position_offset:position_offset+seq_len] +sin = self.sin_cached[position_offset:position_offset+seq_len] + +cos = cos[None, None, :, :] +sin = sin[None, None, :, :] + +q_rot = (q * cos) + (self.rotate_half(q) * sin) +k_rot = (k * cos) + (self.rotate_half(k) * sin) +``` + +This embeds **relative positional structure directly in attention**. + +--- + +### Causal Masking + +A lower-triangular mask is applied before softmax: + +``` +causal = torch.tril(torch.ones(seq_len, seq_len, device=x.device, dtype=torch.bool)) +scores = scores.masked_fill(~causal, float("-inf")) +``` +--- + +### Scaled Dot-Product Attention + +The attention core follows the standard Transformer pipeline: + +``` +scores = (q @ k.transpose(-2, -1)) * self.scale +attn = F.softmax(scores, dim=-1) +attn = self.attn_dropout(attn) + +out = attn @ v +out = out.transpose(1, 2).contiguous().view(B, T, D) +out = self.out_proj(out) +``` + +--- + +### Attention Map + +For visualization the attention map: + +``` +if not self.training: + self.last_attn = attn.detach().cpu() +``` + +--- + +### KV Cache Implementation + +Instead of recomputing attention over the entire prefix at every step, previously computed keys and values are stored and reused. + +#### Cache Allocation + +``` +if past_kv is None: + max_len = self.max_seq_len + k_cache = torch.empty(B, H, max_len, Hd, device=x.device) + v_cache = torch.empty_like(k_cache) + pos = 0 +else: + k_cache, v_cache, pos = past_kv +``` + +#### Writing New Tokens into Cache + +``` +k_cache[:, :, pos:pos+seq_len, :] = k +v_cache[:, :, pos:pos+seq_len, :] = v +``` + +Only the new token projections are computed. + + +#### Using Cached Prefix + +``` +k = k_cache[:, :, :pos+seq_len, :] +v = v_cache[:, :, :pos+seq_len, :] +scores = (q @ k.transpose(-2, -1)) * self.scale +``` + +Attention now runs against **all previous tokens** without recomputing their projections. + + + +#### Updated Cache Returned + +``` +present_kv = (k_cache, v_cache, pos + seq_len) +return out, present_kv +``` +--- +### Sinusoidal Positional Encoding Implementation + + +In my sinusoidal positional encoding implementation, positional information is added **directly to token embeddings** before +the transformer blocks: + +```python +x = self.token_embedding(input_ids) +x = x + self.pos_emb[:x.size(1), :].unsqueeze(0) +``` + +The positional embedding matrix is precomputed once: + +```python +self.pe = self.build_sinusoidal_pos_emb(max_seq_len, dim, device="cuda") +self.register_buffer("pos_emb", self.pe, persistent=False) +``` + +**Important Behavior During Generation** + +During generation, I use a **cropped context window**: + +```python +input_context = input_ids if input_ids.size(1) <= self.max_seq_len else input_ids[:, -self.max_seq_len:] +``` + +Because of this, the positional encoding applied at each forward pass is: + +```python +self.pos_emb[:context_length] +``` + +This means that **position indices restart from 0** whenever the context window is cropped. +So tokens at later stages of generation are assigned positions relative to the window rather than their absolute position in the full sequence. + + +## Challenges and Solutions + +1. Dataset understanding: Initially, the structure of the dataset was confusing, + particularly how states, actions, and targets aligned for next-token prediction. + By printing tensor shapes and inspecting sample sequences, I verified the sequence + dimensions and confirmed the correct autoregressive setup (predicting action t+1 from action t). + +2. KV cache performance: My first KV cache implementation was unexpectedly slower than the + no-cache baseline. This was due to inefficiencies such as suboptimal mask computation and + non-optimal buffer usage. After improving the masking logic and using pre-allocated key/value + buffers correctly, KV caching produced consistent inference speedups, especially for longer + sequences. + +3. Attention visualization: At first, I wasn’t sure how to visualize the attention maps. + After referring to a few online resources, I learned how to extract and detach the attention + weights from the model. By storing the detached attention matrix during a forward pass, + I was able to visualize the attention patterns for a single input sequence. [reference blog](!https://alessiodevoto.github.io/vit-attention/) + +## References +1. https://huggingface.co/blog/not-lain/kv-caching +2. https://www.kaggle.com/code/aisuko/causal-self-attention +3. https://alessiodevoto.github.io/vit-attention/ +4. https://medium.com/ai-insights-cobet/rotary-positional-embeddings-a-detailed-look-and-comprehensive-understanding-4ff66a874d83 +5. https://medium.com/@lepicardhugo/how-transformers-encode-position-pe-rope-made-simple-024d5e03fa03 +6. https://www.youtube.com/watch?v=GQPOtyITy54 + +## Sample Output +``` +Using device: cuda +dataset keys dict_keys(['states', 'actions']) +Epoch 0 | Batch 100/282 | Loss 3.4397 | perplexity 31.1783 +Epoch 0 | Batch 200/282 | Loss 2.5887 | perplexity 13.3130 +Epoch 1/10 - Loss: 3.2790 +Epoch 1: train 3.2790 | val 2.3331 | val_perplexity 10.3101 +Epoch 1 | Batch 100/282 | Loss 2.2714 | perplexity 9.6934 +Epoch 1 | Batch 200/282 | Loss 2.1437 | perplexity 8.5313 +Epoch 2/10 - Loss: 2.2153 +Epoch 2: train 2.2153 | val 2.0984 | val_perplexity 8.1527 +Epoch 2 | Batch 100/282 | Loss 2.0614 | perplexity 7.8570 +Epoch 2 | Batch 200/282 | Loss 2.0887 | perplexity 8.0743 +Epoch 3/10 - Loss: 2.0842 +Epoch 3: train 2.0842 | val 2.0381 | val_perplexity 7.6761 +Epoch 3 | Batch 100/282 | Loss 1.9957 | perplexity 7.3575 +Epoch 3 | Batch 200/282 | Loss 2.0274 | perplexity 7.5945 +Epoch 4/10 - Loss: 2.0412 +Epoch 4: train 2.0412 | val 2.0139 | val_perplexity 7.4923 +Epoch 4 | Batch 100/282 | Loss 2.0546 | perplexity 7.8034 +Epoch 4 | Batch 200/282 | Loss 2.0366 | perplexity 7.6647 +Epoch 5/10 - Loss: 2.0176 +Epoch 5: train 2.0176 | val 1.9938 | val_perplexity 7.3435 +Epoch 5 | Batch 100/282 | Loss 1.9758 | perplexity 7.2124 +Epoch 5 | Batch 200/282 | Loss 1.9989 | perplexity 7.3807 +Epoch 6/10 - Loss: 2.0011 +Epoch 6: train 2.0011 | val 1.9882 | val_perplexity 7.3026 +Epoch 6 | Batch 100/282 | Loss 2.0455 | perplexity 7.7333 +Epoch 6 | Batch 200/282 | Loss 1.9885 | perplexity 7.3045 +Epoch 7/10 - Loss: 1.9901 +Epoch 7: train 1.9901 | val 1.9798 | val_perplexity 7.2412 +Epoch 7 | Batch 100/282 | Loss 1.9672 | perplexity 7.1505 +Epoch 7 | Batch 200/282 | Loss 1.9944 | perplexity 7.3476 +Epoch 8/10 - Loss: 1.9802 +Epoch 8: train 1.9802 | val 1.9738 | val_perplexity 7.1983 +Epoch 8 | Batch 100/282 | Loss 1.9764 | perplexity 7.2166 +Epoch 8 | Batch 200/282 | Loss 1.9743 | perplexity 7.2013 +Epoch 9/10 - Loss: 1.9722 +Epoch 9: train 1.9722 | val 1.9699 | val_perplexity 7.1700 +Epoch 9 | Batch 100/282 | Loss 1.9751 | perplexity 7.2074 +Epoch 9 | Batch 200/282 | Loss 1.9751 | perplexity 7.2074 +Epoch 10/10 - Loss: 1.9662 +Epoch 10: train 1.9662 | val 1.9661 | val_perplexity 7.1426 +``` \ No newline at end of file diff --git a/data/trajectories.pkl b/data/trajectories.pkl new file mode 100644 index 00000000..d5b55493 Binary files /dev/null and b/data/trajectories.pkl differ diff --git a/grading_reports/GRADING_REPORT.md b/grading_reports/GRADING_REPORT.md new file mode 100644 index 00000000..242abb8b --- /dev/null +++ b/grading_reports/GRADING_REPORT.md @@ -0,0 +1,66 @@ +![Chris-Bot](~/chris_robot.png) +### 🤖 Chris's Grading Assistant - Feedback Report + +**Student:** @Tr0612 +**PR:** #35 +**Branch:** `scratch-1-thanushraam` + +Hi! I've reviewed your submission. Here's what I found: + +--- + +## 📊 Component Feedback + +### ✅ Causal Self-Attention + +✅ Perfect! Your causal mask correctly prevents future token leakage. + +✅ Test passed. + +### ✅ RMSNorm + +✅ RMSNorm implemented correctly with proper normalization and learnable scale. + +✅ Test passed. + +### ✅ Training Loop + +✅ Excellent! Your model trains successfully and loss converges. + +### ✅ RoPE Embeddings + +✅ RoPE correctly applied to Q and K tensors. + +### ✅ Model Architecture + +✅ Model forward pass works end-to-end with correct output shapes. + +✅ Model has the expected number of trainable parameters. + +### ✅ Code Quality + +Your code imports and runs cleanly. Nice! ✨ + +--- + +## 📝 Documentation & Analysis + +✅ Report submitted! I found: +- `content/course/submissions/scratch-1/thanushraam.mdx` +- `README.md` + +Your instructor will review the quality of your analysis. + +--- + +## 🎯 Mastery Features Detected + +I noticed you implemented: +- KV-Caching implementation +- RoPE vs Sinusoidal ablation study + +Great work going beyond the requirements! Your instructor will verify implementation quality. + +--- + +> *Grading is automated but reviewed by an instructor. If you have questions, reach out on Slack!* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e4141170 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,42 @@ +[project] +name = "vla-foundations" +version = "0.1.0" +description = "VLA Foundations Course - Private Instructor Repository" +readme = "README.md" +requires-python = ">=3.10,<3.14" +dependencies = [ + "torch>=2.0.0", + "torchvision", + "numpy>=1.24.0", + "pytest>=7.0.0", + "pytest-html>=4.0.0", +] + +[[tool.uv.index]] +name = "pytorch-cu118" +url = "https://download.pytorch.org/whl/cu118" +explicit = true + +[tool.uv.sources] +torch = [{ index = "pytorch-cu118" }] +torchvision = [{ index = "pytorch-cu118" }] + +[tool.hatch.build.targets.wheel] +packages = [] + +[tool.pytest.ini_options] +markers = [ + "internal: internal grading tests (never public)", + "rigor: rigorous grading tests", + "gradient: gradient flow tests", + "fidelity: output comparison tests", + "training: training convergence tests", + "mastery: optional mastery-level features (DINOv2, KV-cache, etc.)", +] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] + +[dependency-groups] +dev = [] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..ea7a96da --- /dev/null +++ b/pytest.ini @@ -0,0 +1,13 @@ +[pytest] +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +markers = + public: Tests that students can see and run + internal: Internal grading tests (never public) + rigor: Rigorous validation tests for grading + gradient: Tests for gradient flow validation + fidelity: Tests for output quality validation + training: Tests for training convergence + mastery: Optional mastery-level features (DINOv2, KV-cache, etc.) diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..0e328843 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,49 @@ +# CI/CD Scripts + +**Critical infrastructure scripts** used in GitHub Actions workflows. + +## Contents + +### Production Scripts + +- **`manage_solutions.py`** - Inject/reset assignment solutions (used in testing) +- **`sanitize.sh`** - Main sanitization pipeline for public sync +- **`_sanitize_todos.py`** - Remove solution hints from code +- **`audit_linter.py`** - Validate paper audit MDX files + +### Usage in CI/CD + +| Script | Workflow | Purpose | +|--------|----------|---------| +| `audit_linter.py` | `vla-audit.yml` | Validate audit frontmatter | +| `sanitize.sh` | `sync-to-public.yml` | Remove private content | +| `_sanitize_todos.py` | `sync-to-public.yml` | Strip solution hints | +| `manage_solutions.py` | (local testing) | Inject/reset solutions | + +### Critical Requirements + +1. **Fail-Safe**: All scripts must return non-zero exit codes on failure +2. **Idempotent**: Can be run multiple times safely +3. **Validated**: Must pass linting before sync +4. **Documented**: Clear error messages and usage + +## Development Scripts + +Local development helpers are in `scripts/dev/`. These are **not** used in CI/CD. + +## Modification Guidelines + +Changes to scripts in this directory affect production workflows. Always: + +1. Test locally first +2. Verify exit codes +3. Check GitHub Actions logs +4. Update documentation + +## Security + +These scripts handle sensitive operations: +- `sanitize.sh` - Removes private content before public sync +- `manage_solutions.py` - Manages private solutions + +Never commit secrets or tokens to these scripts. diff --git a/scripts/audit_linter.py b/scripts/audit_linter.py index 8ecfa1b8..ff1952a6 100755 --- a/scripts/audit_linter.py +++ b/scripts/audit_linter.py @@ -32,6 +32,62 @@ def check_semantic_breaks(file_path): ) return errors +def validate_frontmatter(file_path, content, lines): + """Validate YAML frontmatter contains required fields.""" + errors = [] + + # Extract frontmatter + if not content.startswith('---'): + errors.append( + f"{file_path}: Missing YAML frontmatter. File must start with '---' followed by " + "title, author, paper, and topic fields." + ) + return errors + + # Find the end of frontmatter + frontmatter_end = None + for i, line in enumerate(lines[1:], start=1): + if line.strip() == '---': + frontmatter_end = i + break + + if frontmatter_end is None: + errors.append( + f"{file_path}: Malformed YAML frontmatter. Missing closing '---'." + ) + return errors + + frontmatter_lines = lines[1:frontmatter_end] + frontmatter_text = '\n'.join(frontmatter_lines) + + # Required fields for audit MDX files + required_fields = ['title', 'author', 'topic', 'paper'] + + for field in required_fields: + # Check if field exists (case-insensitive) + if not any(line.strip().lower().startswith(f'{field}:') for line in frontmatter_lines): + errors.append( + f"{file_path}: Missing required frontmatter field: '{field}'" + ) + + # Validate field values are not empty + for line in frontmatter_lines: + stripped = line.strip() + if ':' in stripped: + field_name, field_value = stripped.split(':', 1) + field_name = field_name.strip().lower() + field_value = field_value.strip() + + if field_name in required_fields: + # Check for empty values or placeholder values + if not field_value or field_value in ['""', "''", 'null', 'TBD', 'TODO']: + errors.append( + f"{file_path}: Empty or placeholder value for required field: '{field_name}'" + ) + + return errors + + def check_mdx_syntax(file_path): """Check for MDX-specific syntax issues.""" with open(file_path, 'r', encoding='utf-8') as f: @@ -40,12 +96,8 @@ def check_mdx_syntax(file_path): errors = [] - # Check 1: Must have YAML frontmatter at the start - if not content.startswith('---'): - errors.append( - f"{file_path}: Missing YAML frontmatter. File must start with '---' followed by " - "title, author, paper, and topic fields." - ) + # Check 1: Validate frontmatter fields + errors.extend(validate_frontmatter(file_path, content, lines)) # Check 2: No HTML comments (should use JSX-style {/* */}) if '