Luce-Org · easel · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,32 @@
+# Local venv and Python caches — uv rebuilds inside the image.
+.venv/
+**/__pycache__/
+**/*.pyc
+
+# Build artefacts.
+**/build/
+**/build-*/
+dflash/build/
+
+# Model weights — bind-mount at runtime instead of baking into the image.
+dflash/models/
+**/*.gguf
+**/*.safetensors
+
+# Git metadata. Submodule contents are kept; .git files inside the worktree
+# are not needed at build time.
+.git/
+**/.git
+**/.gitignore.local
+
+# Local agent / IDE state.
+.claude/
+.idea/
+.vscode/
+
+# Misc large or volatile.
+*.log
+*.tmp
+*.swp
+**/*.bin
+**/*.npy
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -19,6 +19,12 @@ jobs:
         # full sync and builds megakernel against torch.
         run: bash scripts/check_uv_workspace.sh
 
+      - name: Lint Python surfaces touched by lucebox tooling
+        run: uv run --frozen --extra dev ruff check .
+
+      - name: Typecheck lucebox CLI
+        run: uv run --frozen --extra dev python -m mypy --package lucebox
+
   build:
     name: Build (cmake + uv sync --extra megakernel)
     runs-on: ubuntu-latest

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,147 @@
+name: Docker prebuilds
+
+# Builds the cuda12 lucebox-hub Docker image defined in docker-bake.hcl
+# and pushes it to GHCR. The bake file is the source of
+# truth for arch matrices and CUDA pinning; this workflow only handles
+# fetching submodules, freeing runner disk, signing in to the registry, and
+# wiring the cache.
+
+on:
+  # Build + push to GHCR when a GitHub Release is published. The release tag
+  # becomes one of the image tags via docker/metadata-action's `type=ref,
+  # event=tag` + `type=semver` rules below.
+  release:
+    types: [published]
+  # Build-only CI guard on PRs that touch the docker surface. We never push
+  # from a PR — even if we wanted to, GITHUB_TOKEN on PRs from forks lacks
+  # `packages:write`. The point is to catch Dockerfile / bake-file / arch-
+  # list regressions before they land on main.
+  pull_request:
+    paths:
+      - Dockerfile
+      - docker-bake.hcl
+      - .dockerignore
+      - .github/workflows/docker.yml
+      - server/CMakeLists.txt
+      - server/src/**
+      - server/test/**
+      - server/include/**
+      - server/scripts/**
+      - server/deps/**
+      - server/pyproject.toml
+      - pyproject.toml
+      - uv.lock
+      - lucebox.sh
+      - lucebox/**
+  # Manual trigger for one-off rebuilds or pre-release smoke tests. The
+  # `push` input controls whether the resulting images land in GHCR or only
+  # populate the buildx cache.
+  workflow_dispatch:
+    inputs:
+      push:
+        description: "Push images to GHCR after build"
+        type: boolean
+        default: false
+
+# Single in-flight build per ref. New pushes cancel the previous run so we
+# don't queue 30-min compiles.
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository_owner }}/lucebox-hub
+
+jobs:
+  build:
+    name: ${{ matrix.variant }}
+    # ubuntu-latest = 4 CPU / 16 GB RAM / 14 GB free disk on the GitHub-
+    # hosted plan. The disk-free step at the top of the job claws back
+    # ~30 GB, which is enough to land a 14 GB image with build cache.
+    # CPU is the harder constraint: the fat-binary arch list can take hours
+    # on hosted runners. If you outgrow this:
+    #   • Larger GitHub-hosted runners (`ubuntu-latest-8-cores`, paid)
+    #     halve wall time.
+    #   • A self-hosted runner with the host's nvcc avoids the
+    #     containerised CUDA toolkit pull entirely.
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        variant: [cuda12]
+    steps:
+      - name: Free runner disk space
+        # The default ubuntu-latest image keeps ~25 GB of preinstalled
+        # tooling (Android SDK, .NET, Haskell, ghc, etc.) we don't need.
+        # Pinned action; check upstream releases before bumping.
+        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false   # slow; preinstalled apt packages we don't need
+          swap-storage: true
+
+      - uses: actions/checkout@v4
+        with:
+          # Submodule contents are needed by the cmake build (llama.cpp ggml
+          # subtree, mit-han-lab Block-Sparse-Attention). The Dockerfile
+          # asserts they're present before running cmake.
+          submodules: recursive
+
+      - uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        # Skip on PR runs: we never push from a PR and the token from a fork
+        # PR can't `packages:write` anyway.
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Derive image metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          # Suffix every tag with the variant so future CUDA stacks can
+          # coexist under the same image name. Examples:
+          #   ghcr.io/<owner>/lucebox-hub:cuda12
+          #   ghcr.io/<owner>/lucebox-hub:v0.2.0-cuda12
+          #   ghcr.io/<owner>/lucebox-hub:main-cuda12
+          #   ghcr.io/<owner>/lucebox-hub:sha-abc1234-cuda12
+          flavor: |
+            latest=false
+            suffix=-${{ matrix.variant }},onlatest=true
+          tags: |
+            type=raw,value=${{ matrix.variant }},suffix=,priority=1000,enable=${{ github.event_name == 'release' }}
+            type=ref,event=branch
+            type=ref,event=tag
+            type=ref,event=pr
+            type=sha,prefix=sha-
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+
+      - name: Build and push
+        uses: docker/bake-action@v5
+        with:
+          files: |
+            docker-bake.hcl
+            ${{ steps.meta.outputs.bake-file }}
+          targets: ${{ matrix.variant }}
+          push: ${{ github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.push) }}
+          # gha cache stores layer blobs in the workflow's Actions cache,
+          # scoped by variant so future CUDA stacks don't evict each other.
+          # mode=max also caches multi-stage intermediate layers (the
+          # builder stage with the 30-min nvcc compile), which is the whole
+          # point of doing this.
+          set: |
+            ${{ matrix.variant }}.cache-from=type=gha,scope=${{ matrix.variant }}
+            ${{ matrix.variant }}.cache-to=type=gha,scope=${{ matrix.variant }},mode=max
diff --git a/.github/workflows/release-luce-bench.yml b/.github/workflows/release-luce-bench.yml
@@ -0,0 +1,58 @@
+name: Release luce-bench
+
+# Builds and publishes the luce-bench package to PyPI when a tag
+# matching `luce-bench-v*` is pushed (e.g. `luce-bench-v0.2.5`). The
+# tag's version suffix must match `luce-bench/pyproject.toml`'s
+# `[project] version` — the workflow asserts this and fails otherwise.
+#
+# Uses PyPI trusted publishing (OIDC): set up the publisher in the
+# PyPI project settings as `easel/lucebox-hub` repo + this workflow
+# file + the `pypi` environment. No long-lived API token needed.
+
+on:
+  push:
+    tags:
+      - 'luce-bench-v*'
+
+permissions:
+  contents: read
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/luce-bench
+    permissions:
+      id-token: write  # trusted publishing
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: latest
+
+      - name: Verify tag version matches pyproject.toml
+        run: |
+          set -euo pipefail
+          tag="${GITHUB_REF##*/}"            # luce-bench-v0.2.5
+          tag_version="${tag#luce-bench-v}"  # 0.2.5
+          file_version=$(awk -F'"' '/^version[[:space:]]*=/{print $2; exit}' luce-bench/pyproject.toml)
+          if [ "$tag_version" != "$file_version" ]; then
+            echo "Tag version ($tag_version) does not match luce-bench/pyproject.toml version ($file_version)"
+            exit 1
+          fi
+          echo "Releasing luce-bench v$tag_version"
+
+      - name: Build wheel + sdist
+        working-directory: luce-bench
+        run: |
+          uv build --out-dir dist
+
+      - name: Publish to PyPI (trusted publisher)
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: luce-bench/dist
diff --git a/.gitignore b/.gitignore
@@ -78,3 +78,18 @@ fix-plan.md
 # Harness test artifacts
 .harness-work/
 health
+
+# lucebox host-side generated config + benchmark output
+.lucebox/
+models/.lucebox/
+
+# Claude Code session state (worktrees, agent scratchpads)
+.claude/
+
+# Benchmark snapshots live in the standalone luce-bench-baselines repo
+# (https://github.com/easel/luce-bench-baselines) — not in lucebox-hub.
+dflash/docs/tuning-snapshots/
+
+# luce-bench --sweep default output dir (per-host bench runs); reference
+# baselines live in github.com/easel/luce-bench-baselines.
+luce-bench/snapshots/