diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..0152fec
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,53 @@
+name: Bug Report
+description: Report something that isn't working correctly
+labels: ["bug"]
+body:
+  - type: textarea
+    id: description
+    attributes:
+      label: Describe the bug
+      description: A clear description of what's happening.
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to reproduce
+      description: Minimal steps or code to reproduce the behavior.
+      placeholder: |
+        1. Create a scheduler with `Scheduler::builder()...`
+        2. Submit a task with `...`
+        3. See error...
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected behavior
+      description: What you expected to happen.
+    validations:
+      required: true
+
+  - type: input
+    id: version
+    attributes:
+      label: Taskmill version
+      description: The version of taskmill in your Cargo.toml
+      placeholder: "0.1.1"
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: Operating system
+      placeholder: "Ubuntu 24.04 / macOS 15 / etc."
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant logs
+      description: Paste any relevant log output or panic messages.
+      render: shell
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..e1acadf
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,31 @@
+name: Feature Request
+description: Suggest a new feature or improvement
+labels: ["enhancement"]
+body:
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem or use case
+      description: What are you trying to do? What problem does this solve?
+    validations:
+      required: true
+
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed solution
+      description: How do you think this should work?
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives considered
+      description: Any other approaches you've thought about or workarounds you're using.
+
+  - type: textarea
+    id: context
+    attributes:
+      label: Additional context
+      description: Anything else — screenshots, links, related issues.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..ff53739
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,10 @@
+## Summary
+
+<!-- What does this PR do and why? -->
+
+## Test plan
+
+<!-- How was this tested? -->
+
+- [ ] `cargo test --all-features` passes
+- [ ] `cargo clippy --all-features -- -D warnings` is clean
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..d35056c
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,36 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt, clippy
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Build
+        run: cargo build --all-features
+
+      - name: Run tests
+        run: cargo test --all-features
+
+      - name: Check formatting
+        run: cargo fmt --check
+
+      - name: Run clippy
+        run: cargo clippy --all-features -- -D warnings
diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml
new file mode 100644
index 0000000..414ab56
--- /dev/null
+++ b/.github/workflows/pre-release.yml
@@ -0,0 +1,65 @@
+name: Pre-release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Pre-release version (e.g. 0.1.0-alpha.1, 0.2.0-rc.1)"
+        required: true
+        type: string
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  pre-release:
+    name: Publish pre-release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Validate version format
+        run: |
+          if ! echo "${{ inputs.version }}" | grep -qP '^\d+\.\d+\.\d+-(alpha|beta|rc)\.\d+$'; then
+            echo "::error::Invalid version format '${{ inputs.version }}'. Expected: X.Y.Z-(alpha|beta|rc).N"
+            exit 1
+          fi
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Set version in Cargo.toml
+        run: sed -i 's/^version = ".*"/version = "${{ inputs.version }}"/' Cargo.toml
+
+      - name: Verify build
+        run: cargo build --all-features
+
+      - name: Run tests
+        run: cargo test --all-features
+
+      - name: Publish to crates.io
+        run: cargo publish --allow-dirty
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+
+      - name: Create git tag
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git tag -a "v${{ inputs.version }}" -m "Pre-release v${{ inputs.version }}"
+          git push origin "v${{ inputs.version }}"
+
+      - name: Create GitHub pre-release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh release create "v${{ inputs.version }}" \
+            --title "v${{ inputs.version }}" \
+            --generate-notes \
+            --prerelease
diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml
new file mode 100644
index 0000000..965d154
--- /dev/null
+++ b/.github/workflows/release-plz.yml
@@ -0,0 +1,63 @@
+name: Release-plz
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  release-plz-release:
+    name: Release-plz release
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Run release-plz
+        uses: release-plz/action@v0.5
+        with:
+          command: release
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+
+  release-plz-pr:
+    name: Release-plz PR
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    concurrency:
+      group: release-plz-${{ github.ref }}
+      cancel-in-progress: false
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo
+        uses: Swatinem/rust-cache@v2
+
+      - name: Run release-plz
+        uses: release-plz/action@v0.5
+        with:
+          command: release-pr
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96ef6c0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..2e70d8c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,34 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [0.1.1](https://github.com/deepjoy/shoebox/compare/taskmill-v0.1.0...taskmill-v0.1.1) - 2026-03-10
+
+### Added
+
+- add pagination, filtering, query optimization, and trigger-based staleness for duplicatesFunctional improvements ([#53](https://github.com/deepjoy/shoebox/pull/53))
+
+### Fixed
+
+- *(taskmill)* flush WAL and close database connection on shutdown ([#57](https://github.com/deepjoy/shoebox/pull/57))
+
+## [0.1.0](https://github.com/deepjoy/shoebox/releases/tag/taskmill-v0.1.0) - 2026-03-05
+
+### Added
+
+- *(taskmill)* type-keyed state map with post-build injection ([#46](https://github.com/deepjoy/shoebox/pull/46))
+- *(taskmill)* requeue duplicate submissions when task is running ([#45](https://github.com/deepjoy/shoebox/pull/45))
+- *(taskmill)* add adaptive priority task scheduler with IO-aware concurrency ([#38](https://github.com/deepjoy/shoebox/pull/38))
+
+### Fixed
+
+- *(taskmill)* resolve SQLite BUSY errors with proper transaction handling ([#40](https://github.com/deepjoy/shoebox/pull/40))
+
+### Other
+
+- *(taskmill)* separate priority from task payload, upgrade on dedup ([#44](https://github.com/deepjoy/shoebox/pull/44))
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..788f377
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,61 @@
+# Contributing to Taskmill
+
+Thanks for your interest in contributing! This document covers the basics to get you started.
+
+## Getting Started
+
+### Prerequisites
+
+- [Rust](https://rustup.rs/) (stable toolchain, MSRV 1.75)
+- [lefthook](https://github.com/evilmartians/lefthook) (git hooks)
+
+### Setup
+
+```bash
+git clone https://github.com/deepjoy/taskmill.git
+cd taskmill
+lefthook install
+cargo build
+```
+
+### Running Tests
+
+```bash
+cargo test --all-features
+```
+
+### Formatting and Linting
+
+The project uses `cargo fmt` and `clippy`. Lefthook runs these automatically on pre-commit, but you can run them manually:
+
+```bash
+cargo fmt --check
+cargo clippy --all-features -- -D warnings
+```
+
+## Making Changes
+
+1. Fork the repository and create a branch from `main`.
+2. Make your changes.
+3. Add tests for new functionality.
+4. Ensure `cargo test --all-features` passes.
+5. Ensure `cargo clippy --all-features -- -D warnings` is clean.
+6. Open a pull request against `main`.
+
+## Commit Messages
+
+This project uses [Conventional Commits](https://www.conventionalcommits.org/):
+
+```
+feat: add new feature
+fix: correct a bug
+docs: update documentation
+refactor: restructure code without behavior change
+chore: maintenance tasks
+```
+
+These are used by [release-plz](https://release-plz.ino.rs/) to auto-generate changelogs and determine version bumps.
+
+## Questions?
+
+Open an issue or start a discussion — happy to help.
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..1d39928
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "taskmill"
+version = "0.1.1"
+edition = "2021"
+rust-version = "1.75"
+license = "MIT"
+description = "Adaptive priority work scheduler with IO-aware concurrency and SQLite persistence"
+keywords = ["scheduler", "priority-queue", "task", "async"]
+categories = ["asynchronous", "concurrency"]
+repository = "https://github.com/deepjoy/taskmill"
+
+[features]
+default = ["sysinfo-monitor"]
+sysinfo-monitor = ["dep:sysinfo"]
+
+[dependencies]
+tokio = { version = "1", features = ["sync", "time", "rt", "macros"] }
+tokio-util = "0.7"
+sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite", "chrono"] }
+tracing = "0.1"
+thiserror = "2.0"
+chrono = { version = "0.4", features = ["serde"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+sha2 = "0.10"
+sysinfo = { version = "0.33", optional = true }
+
+[dev-dependencies]
+tokio = { version = "1", features = ["full", "test-util"] }
diff --git a/LICENSE b/LICENSE
index daacbef..d5e6293 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2026 DJ Majumdar
+Copyright (c) 2026 Deep Joy Majumdar
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ea850cc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,121 @@
+# Taskmill
+
+Adaptive priority work scheduler with IO-aware concurrency and SQLite persistence.
+
+Taskmill is an async task queue for Rust applications that persists work to SQLite,
+schedules by priority with IO-budget awareness, and supports preemption, retries, and
+composable backpressure. Designed for desktop apps (Tauri, etc.) and background services
+where tasks have measurable IO costs and the system needs to avoid saturating disk
+throughput.
+
+## Quick example
+
+```rust
+use std::sync::Arc;
+use std::time::Duration;
+use tokio_util::sync::CancellationToken;
+use taskmill::{
+    Scheduler, Priority, TaskSubmission, TaskExecutor,
+    TaskContext, TaskResult, TaskError, ShutdownMode,
+};
+
+struct ThumbnailGenerator;
+
+impl TaskExecutor for ThumbnailGenerator {
+    async fn execute<'a>(
+        &'a self, ctx: &'a TaskContext,
+    ) -> Result<TaskResult, TaskError> {
+        ctx.progress.report(0.5, Some("resizing".into()));
+        Ok(TaskResult { actual_read_bytes: 4096, actual_write_bytes: 1024 })
+    }
+}
+
+#[tokio::main]
+async fn main() {
+    let scheduler = Scheduler::builder()
+        .store_path("tasks.db")
+        .executor("thumbnail", Arc::new(ThumbnailGenerator))
+        .max_concurrency(8)
+        .with_resource_monitoring()
+        .build()
+        .await
+        .unwrap();
+
+    scheduler.submit(&TaskSubmission::with_payload(
+        "thumbnail",
+        Priority::NORMAL,
+        &serde_json::json!({"path": "/photos/img.jpg"}),
+        4096, 1024,
+    ).unwrap()).await.unwrap();
+
+    let token = CancellationToken::new();
+    scheduler.run(token).await;
+}
+```
+
+## Shared scheduler (library embedding)
+
+A single `Scheduler` can be shared across an application and any libraries it embeds.
+Multiple state types can coexist — each is keyed by its concrete `TypeId`, and new state
+can be injected after the scheduler is built via `register_state`.
+
+```rust
+use std::sync::Arc;
+use taskmill::Scheduler;
+
+// The host app builds the scheduler and registers its own executors.
+let scheduler = Scheduler::builder()
+    .store_path("app.db")
+    .executor("thumbnail", Arc::new(ThumbnailGenerator))
+    .app_state(MyAppServices { /* ... */ })
+    .max_concurrency(4)
+    .build()
+    .await
+    .unwrap();
+
+// A library can inject its own state after build.
+scheduler.register_state(Arc::new(LibraryState { /* ... */ })).await;
+
+// Both the host and the library submit tasks to the same queue.
+// The host manages the run loop.
+let token = CancellationToken::new();
+scheduler.run(token).await;
+```
+
+## Features
+
+- **SQLite persistence** — tasks survive restarts; crash recovery requeues interrupted work
+- **256-level priority queue** — with preemption of lower-priority tasks
+- **IO-aware scheduling** — defers work when disk throughput is saturated
+- **Key-based deduplication** — SHA-256 keys prevent duplicate submissions
+- **Composable backpressure** — plug in external pressure signals with custom throttle policies
+- **Cross-platform resource monitoring** — CPU and disk IO via `sysinfo` (Linux, macOS, Windows)
+- **Retries** — automatic requeue of retryable failures with configurable limits
+- **Progress reporting** — executor-reported and throughput-extrapolated progress
+- **Lifecycle events** — broadcast events for UI integration (Tauri, etc.)
+- **Typed payloads** — serialize/deserialize structured task data
+- **Batch submission** — bulk enqueue in a single SQLite transaction
+- **Graceful shutdown** — configurable drain timeout before force-cancellation
+- **Global pause/resume** — pause all work when the app is backgrounded
+- **Type-keyed application state** — register multiple state types, inject pre- or post-build
+- **Clone-friendly** — `Scheduler` is `Clone` via `Arc` for easy sharing
+- **Serde on all public types** — ready for Tauri IPC
+
+For a detailed breakdown of every feature, see [docs/features.md](docs/features.md).
+
+## Documentation
+
+| Guide | Description |
+|-------|-------------|
+| [Quick Start](docs/quick-start.md) | Installation, first executor, builder setup, and running the scheduler |
+| [Features](docs/features.md) | Complete feature list with descriptions |
+| [Priorities & Preemption](docs/priorities-and-preemption.md) | Priority levels, preemption mechanics, and throttle behavior |
+| [IO Tracking & Backpressure](docs/io-and-backpressure.md) | IO budgets, resource monitoring, pressure sources, and throttle policies |
+| [Persistence & Recovery](docs/persistence-and-recovery.md) | SQLite schema, crash recovery, deduplication, and history retention |
+| [Progress Reporting](docs/progress-reporting.md) | Executor progress, extrapolation, dashboard snapshots, and lifecycle events |
+| [Configuration](docs/configuration.md) | All configuration options for scheduler, store, sampler, and feature flags |
+| [Query APIs](docs/query-apis.md) | Full `TaskStore` query reference for dashboards and debugging |
+
+## License
+
+MIT
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..bc6c20e
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,20 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+If you discover a security vulnerability in Taskmill, please report it responsibly.
+
+**Email:** [code@deepjoy.com](mailto:code@deepjoy.com)
+
+Please include:
+- A description of the vulnerability
+- Steps to reproduce
+- Potential impact
+
+I'll acknowledge your report within 48 hours and aim to release a fix within 7 days for critical issues. Please don't open a public issue for security vulnerabilities.
+
+## Supported Versions
+
+| Version | Supported |
+|---------|-----------|
+| 0.1.x   | Yes       |
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
new file mode 100644
index 0000000..910808a
--- /dev/null
+++ b/docs/ARCHITECTURE.md
@@ -0,0 +1,546 @@
+# Taskmill Architecture
+
+Taskmill is an adaptive priority work scheduler with IO-aware concurrency and
+SQLite persistence, designed for desktop apps (Tauri) and background services.
+
+## Module map
+
+```
+taskmill/
+  src/
+    lib.rs                 — public API re-exports
+    task.rs                — TaskRecord, TaskSubmission, TaskResult, TaskError, TypedTask, etc.
+    priority.rs            — Priority newtype (u8, lower = higher priority)
+    store.rs               — TaskStore: SQLite persistence, atomic pop, queries, retention
+    registry.rs            — TaskExecutor trait (RPITIT), TaskContext, TaskTypeRegistry
+    backpressure.rs        — PressureSource trait, ThrottlePolicy, CompositePressure
+    scheduler/
+      mod.rs               — Scheduler, SchedulerBuilder, run loop, events, snapshot
+      gate.rs              — DispatchGate trait, DefaultDispatchGate, IO budget check
+      dispatch.rs          — ActiveTaskMap, spawn_task(), preemption
+      progress.rs          — ProgressReporter, EstimatedProgress, throughput extrapolation
+    resource/
+      mod.rs               — ResourceSampler + ResourceReader traits, ResourceSnapshot
+      sampler.rs           — EWMA-smoothed background loop, SmoothedReader
+      sysinfo_monitor.rs   — SysinfoSampler via `sysinfo` crate (feature-gated)
+  migrations/
+    001_tasks.sql          — tasks table, task_history table, indexes
+```
+
+## Task lifecycle
+
+```
+Submit ──► Pending ──► Running ──► Completed  (moved to task_history)
+                │         │
+                │         ├──► Failed         (moved to task_history, or retried)
+                │         │
+                │         └──► Paused         (preempted by higher-priority work)
+                │                 │
+                └─────────────────┘            (resumed when preemptors finish)
+```
+
+Active-queue states (`tasks` table): `pending`, `running`, `paused`.
+Terminal states (`task_history` table): `completed`, `failed`.
+
+## Data flow
+
+```mermaid
+flowchart TD
+    S["submit() / submit_batch()"] --> TS["TaskStore\n(INSERT OR IGNORE)"]
+    TS --> |SQLite| DB[(tasks table)]
+    DB --> SCH["Scheduler run loop"]
+    SCH --> |"tokio::spawn"| E1["Executor + TaskContext"]
+    SCH --> |"tokio::spawn"| E2["Executor + TaskContext"]
+    E1 --> CF["complete() / fail()"]
+    E2 --> CF
+    CF --> HIST[(task_history)]
+    CF --> PRUNE["maybe_prune()\n(amortised retention)"]
+    CF --> EVT["broadcast::Sender\n(SchedulerEvent)"]
+```
+
+## SQLite schema
+
+### `tasks` — active queue
+
+| Column                | Purpose                                            |
+|-----------------------|----------------------------------------------------|
+| `id`                  | `INTEGER PRIMARY KEY` — insertion order within tier|
+| `task_type`           | Executor lookup name (e.g. `"scan-l3"`)            |
+| `key`                 | `UNIQUE` — SHA-256 deduplication key               |
+| `priority`            | `INTEGER NOT NULL` — 0 (highest) to 255 (lowest)  |
+| `status`              | `TEXT` — `pending`, `running`, or `paused`         |
+| `payload`             | `BLOB` — opaque, max 1 MiB, executor-defined       |
+| `expected_read_bytes` | Caller's IO estimate for scheduling decisions      |
+| `expected_write_bytes`| Caller's IO estimate for scheduling decisions      |
+| `retry_count`         | Incremented on each retryable failure              |
+| `last_error`          | Most recent error message (for diagnostics)        |
+| `started_at`          | Set when popped; cleared on pause                  |
+
+A partial index `idx_tasks_pending` on `(status, priority ASC, id ASC) WHERE
+status = 'pending'` covers the scheduler's hot path (`pop_next`), making
+priority-ordered pops efficient regardless of how many running or paused tasks
+sit in the table.
+
+### `task_history` — terminal records
+
+Completed and failed tasks are moved here atomically (delete from `tasks`,
+insert into `task_history` in one transaction). Additional columns:
+
+| Column                | Purpose                                            |
+|-----------------------|----------------------------------------------------|
+| `actual_read_bytes`   | Reported by executor on completion                 |
+| `actual_write_bytes`  | Reported by executor on completion                 |
+| `completed_at`        | Timestamp of completion or failure                 |
+| `duration_ms`         | Computed from `started_at` to `completed_at`       |
+
+An index `idx_history_type_completed` on `(task_type, completed_at DESC)`
+supports IO learning queries (`avg_throughput`, `history_stats`).
+
+### Connection pool
+
+Defaults to 16 connections (`StoreConfig::max_connections`). SQLite serialises
+writes regardless, so this primarily benefits concurrent reads from multiple
+Tauri commands and background tasks.
+
+### Retention policy
+
+`StoreConfig::retention_policy` controls automatic pruning of `task_history`:
+
+- `RetentionPolicy::MaxCount(n)` — keep at most N history records
+- `RetentionPolicy::MaxAgeDays(n)` — keep records from the last N days
+
+Pruning is amortised: an `AtomicU64` completion counter triggers `maybe_prune()`
+every `prune_interval` completions (default 100) rather than after every single
+completion. Pruning errors are logged but never propagated — the task itself is
+already committed. Manual pruning is available via `prune_history_by_count()` and
+`prune_history_by_age()`.
+
+## Deduplication
+
+Key generation: `SHA-256(task_type + ":" + (explicit_key OR payload))`. The task
+type is always incorporated so different types with identical payloads never
+collide.
+
+Enforcement uses the `UNIQUE(key)` constraint with `INSERT OR IGNORE` — a
+duplicate submission silently returns `None`. The key stays occupied while the
+task is active (including retries) and is freed when the task moves to history.
+
+## Priority queue
+
+The priority queue lives entirely in SQLite. `pop_next()` is an atomic
+`UPDATE ... RETURNING` that claims the highest-priority pending row:
+
+```sql
+UPDATE tasks SET status = 'running', started_at = datetime('now')
+WHERE id = (
+    SELECT id FROM tasks WHERE status = 'pending'
+    ORDER BY priority ASC, id ASC LIMIT 1
+)
+RETURNING *
+```
+
+`priority ASC` means lower numeric values are popped first (higher priority).
+`id ASC` breaks ties by insertion order (FIFO within a tier). The partial index
+makes this a single index scan.
+
+The `Priority` type is a `u8` newtype with named constants:
+
+| Constant     | Value | Behaviour                             |
+|--------------|-------|---------------------------------------|
+| `REALTIME`   | 0     | Never throttled, triggers preemption  |
+| `HIGH`       | 64    | Throttled only under extreme pressure |
+| `NORMAL`     | 128   | Standard background work              |
+| `BACKGROUND` | 192   | Paused under moderate pressure        |
+| `IDLE`       | 255   | Runs only when system is idle         |
+
+`Ord` is reversed so `REALTIME > IDLE` semantically. Custom tiers are available
+via `Priority::new(n)`.
+
+## Scheduler architecture
+
+The scheduler is split across four files:
+
+| File           | Concern                                                       |
+|----------------|---------------------------------------------------------------|
+| `mod.rs`       | Orchestration: run loop, submit, cancel, snapshot, builder    |
+| `gate.rs`      | Admission control: backpressure + IO budget                   |
+| `dispatch.rs`  | Task lifecycle: active map, spawn, preemption                 |
+| `progress.rs`  | Progress reporting + throughput-based extrapolation            |
+
+### Dispatch cycle
+
+```mermaid
+flowchart TD
+    START["tick / notify"] --> PAUSED{"is_paused?"}
+    PAUSED -- yes --> WAIT
+    PAUSED -- no --> RESUME["Resume paused tasks\n(if no active preemptors)"]
+    RESUME --> CONC{"active < max_concurrency?"}
+    CONC -- no --> WAIT["Wait for next tick / notify"]
+    CONC -- yes --> PEEK["peek_next()\n(non-mutating)"]
+    PEEK -- empty --> WAIT
+    PEEK -- candidate --> GATE{"gate.admit()\nbackpressure + IO budget"}
+    GATE -- rejected --> WAIT
+    GATE -- admitted --> POP["pop_by_id()\n(atomic claim)"]
+    POP -- claimed --> REG{"Executor registered?"}
+    POP -- gone --> CONC
+    REG -- no --> FAIL["Fail immediately"]
+    REG -- yes --> SPAWN["spawn_task()"]
+    SPAWN --> CONC
+```
+
+The run loop wakes on two signals:
+
+1. **`Notify`** — triggered by `submit()`, `submit_batch()`, and `resume_all()`,
+   so newly enqueued work is picked up without waiting for the next tick.
+2. **`poll_interval` timer** (default 500 ms) — fallback for paused-task
+   resumption and periodic housekeeping.
+
+Key design: the loop uses **peek-then-pop-by-id** rather than a bare `pop_next()`.
+The gate inspects the candidate without mutating the queue; only after admission
+does `pop_by_id()` atomically claim it. If another consumer claimed it in the
+meantime, the loop simply retries. This eliminates the earlier race where a
+popped-then-rejected task needed an explicit requeue step.
+
+Each stage independently halts dispatch:
+
+- **Concurrency** — hard cap via `max_concurrency` (`AtomicUsize`, adjustable at runtime)
+- **DispatchGate** — pluggable admission (default: backpressure + IO budget)
+- **Empty queue** — no pending tasks
+
+### Clone-friendly design
+
+`Scheduler` wraps all shared state in `Arc<SchedulerInner>` and derives `Clone`:
+
+- Holds directly in `tauri::State<Scheduler>` without extra `Arc` wrapping
+- Cheap clones that share the underlying store, registry, and active map
+
+### Builder
+
+```rust
+Scheduler::builder()
+    .store_path("tasks.db")
+    .executor("scan", Arc::new(ScanExecutor))
+    .executor("exif", Arc::new(ExifExecutor))
+    .pressure_source(Box::new(battery_pressure))
+    .max_concurrency(8)
+    .shutdown_mode(ShutdownMode::Graceful(Duration::from_secs(30)))
+    .with_resource_monitoring()
+    .app_state(MyServices { http, db, cache })
+    .build()
+    .await?;
+```
+
+The builder handles: opening the store, assembling the registry, composing
+pressure sources, spawning the resource sampler, and wiring the `SmoothedReader`.
+The lower-level `Scheduler::new()` remains for advanced use.
+
+## Dispatch gate (internal)
+
+The `DispatchGate` trait (`pub(crate)`) controls admission. The default
+`DefaultDispatchGate` applies two checks:
+
+1. **Backpressure** — `ThrottlePolicy::should_throttle(priority, pressure)`.
+2. **IO budget** — `has_io_headroom()`, described below.
+
+The trait also exposes `pressure()` and `pressure_breakdown()` (with default
+no-op impls) so `Scheduler::snapshot()` can read backpressure state without
+knowing the concrete gate type.
+
+## IO-aware scheduling
+
+### Expected vs actual IO
+
+Callers provide `expected_read_bytes` / `expected_write_bytes` on submission.
+Executors report `actual_read_bytes` / `actual_write_bytes` on completion. The
+history table stores both, enabling learning via `avg_throughput()` and
+`history_stats()`.
+
+### IO budget heuristic
+
+When a `ResourceReader` is present, `has_io_headroom()` runs before each
+dispatch:
+
+1. Read the latest EWMA-smoothed `ResourceSnapshot` (disk bytes/sec).
+2. Sum expected IO across all currently running tasks.
+3. Compute a 2-second budget window: `capacity = bytes_per_sec * 2.0`.
+4. Defer if running IO exceeds 80% of capacity on either read or write axis.
+
+If no reader is configured the check is skipped (always allows dispatch).
+
+### Resource monitoring
+
+Two traits split sampling from consumption:
+
+- **`ResourceSampler`** — `sample() -> ResourceSnapshot`. Raw platform readings.
+- **`ResourceReader`** — `latest() -> ResourceSnapshot`. Read-only, sync.
+
+`SmoothedReader` bridges them: the `run_sampler()` background loop calls
+`sampler.sample()` at a configurable interval (default 1 s), applies EWMA
+smoothing (alpha 0.3), and writes to the `SmoothedReader`. The scheduler reads
+via `reader.latest()`, which uses `RwLock` so readers never block each other.
+
+The built-in `SysinfoSampler` (behind the `sysinfo-monitor` feature) provides
+cross-platform CPU and disk IO via the `sysinfo` crate.
+
+## Backpressure
+
+### PressureSource trait
+
+```rust
+pub trait PressureSource: Send + Sync + 'static {
+    fn pressure(&self) -> f32;  // 0.0 (idle) to 1.0 (saturated)
+    fn name(&self) -> &str;
+}
+```
+
+Implement for external signals: API rate, memory, queue depth, battery, etc.
+
+### CompositePressure
+
+Aggregates multiple sources. The composite value is the **max** across all — the
+system is as pressured as its most constrained resource. `breakdown()` provides
+per-source diagnostics.
+
+### ThrottlePolicy
+
+Default three-tier policy:
+
+| Priority range    | Throttle threshold |
+|-------------------|--------------------|
+| BACKGROUND (192+) | > 50% pressure    |
+| NORMAL (128+)     | > 75% pressure    |
+| HIGH / REALTIME   | Never throttled   |
+
+Custom policies via `ThrottlePolicy::new(thresholds)`.
+
+## Preemption
+
+When a task is submitted at or above `preempt_priority` (default `REALTIME`):
+
+1. All active tasks with strictly lower priority are cancelled
+   (`CancellationToken`) and moved to `paused` status in the store.
+2. `Preempted` events are emitted.
+3. On subsequent poll cycles, paused tasks are only resumed when no active
+   preemptors remain — this prevents a thrashing loop of pause/resume/re-preempt.
+
+Executors cooperate by checking `ctx.token.is_cancelled()` at yield points. An
+executor that ignores cancellation continues running but is no longer tracked;
+its completion or failure is still recorded normally.
+
+## Retry flow
+
+```
+Executor returns Err(TaskError)
+  └─ retryable: false? ──► move to task_history (failed)
+  └─ retryable: true?
+       └─ retry_count < max_retries? ──► status → pending, retry_count += 1
+       └─ otherwise ──► move to task_history (failed)
+```
+
+- Retried tasks keep their original priority (no demotion).
+- The dedup key remains occupied during retries.
+- `max_retries` defaults to 3 (`SchedulerConfig`).
+
+## Event system
+
+`Scheduler::subscribe()` returns a `tokio::sync::broadcast::Receiver<SchedulerEvent>`:
+
+| Event       | When                                         |
+|-------------|----------------------------------------------|
+| `Dispatched`| Task popped and executor spawned             |
+| `Completed` | Task finished successfully                   |
+| `Failed`    | Task failed (includes `will_retry` flag)     |
+| `Preempted` | Task paused for higher-priority work         |
+| `Cancelled` | Task cancelled via `cancel()`                |
+| `Progress`  | Executor reported progress (0.0–1.0)         |
+| `Paused`    | Scheduler globally paused                    |
+| `Resumed`   | Scheduler globally resumed                   |
+
+All variants derive `Serialize`/`Deserialize`.
+
+## Progress reporting
+
+### Executor-reported
+
+Executors call `ctx.progress.report(percent, message)` or
+`ctx.progress.report_fraction(completed, total, message)`. These emit
+`SchedulerEvent::Progress` and update the active task map.
+
+### Throughput-extrapolated
+
+For tasks that don't report progress, `estimated_progress()` extrapolates from
+elapsed time vs. the historical average duration for that task type. When a
+partial report exists, the extrapolation blends historical and current throughput
+for a more accurate estimate.
+
+`EstimatedProgress` provides `reported_percent`, `extrapolated_percent`, and a
+unified `percent` (reported preferred over extrapolated).
+
+## Task type registry
+
+`TaskTypeRegistry` maps string names to executor implementations. The public
+`TaskExecutor` trait uses RPITIT (`impl Future`) for ergonomic async; an internal
+`ErasedExecutor` trait provides object-safe dynamic dispatch for storage.
+
+Duplicate registration panics — catches configuration errors at startup. When the
+scheduler pops a task with no registered executor, it fails immediately with a
+descriptive error.
+
+The registry is essential for crash recovery: after `recover_running()` resets
+in-flight tasks to pending, the scheduler needs the registry to re-dispatch them.
+
+## Application state
+
+Executors often need shared services. Rather than capturing `Arc<T>` per executor,
+the scheduler provides a type-keyed `StateMap` that supports multiple state types:
+
+```rust
+Scheduler::builder()
+    .app_state(MyServices { http, db, cache })
+    .app_state(FeatureFlags { dark_mode: true })
+    .build().await?;
+
+// In the executor:
+let svc = ctx.state::<MyServices>().expect("state not set");
+let flags = ctx.state::<FeatureFlags>().expect("flags not set");
+```
+
+State flows: `SchedulerBuilder` collects `(TypeId, Arc<dyn Any>)` entries →
+assembled into `Arc<StateMap>` at build time → a `StateSnapshot` (lock-free
+`HashMap` clone) is taken once per dispatch and placed in `TaskContext` →
+executors call `ctx.state::<T>()` which does a `TypeId` lookup + downcast.
+
+Libraries that embed a shared scheduler can inject their own state **after**
+build via `scheduler.register_state(Arc::new(LibState { .. })).await`. This
+is how shoebox injects `ScanAppState` into an externally-provided scheduler.
+
+This mirrors Axum's `State<T>` / Tauri's `State<T>` pattern.
+
+## Global pause / resume
+
+`pause_all()` sets an `AtomicBool` flag, cancels every running task's token,
+moves them to paused status, and emits `Paused`. While paused the run loop skips
+dispatch entirely.
+
+`resume_all()` clears the flag, wakes the run loop via `Notify`, and emits
+`Resumed`. Paused tasks are picked up by the existing resumption logic on the
+next cycle.
+
+`try_dispatch()` does **not** check the flag, so manual single-task dispatch
+still works while globally paused. `SchedulerSnapshot::is_paused` reflects the
+flag for UI integration.
+
+## Graceful shutdown
+
+`ShutdownMode` controls behaviour when the run loop's `CancellationToken` fires:
+
+- **`Hard`** (default) — cancel all running tasks immediately.
+- **`Graceful(Duration)`** — stop dispatching, wait for running tasks to finish
+  (up to the timeout), then cancel stragglers.
+
+Both modes cancel the resource sampler's `CancellationToken`.
+
+## Crash recovery
+
+On `TaskStore::open()`, the store runs:
+
+```sql
+UPDATE tasks SET status = 'pending', started_at = NULL WHERE status = 'running'
+```
+
+Any task mid-execution when the process died is reset to pending. This is safe
+because executors should be idempotent (or check for partial work), the dedup key
+stays occupied (no duplicates), and `retry_count` is preserved.
+
+## Thread safety
+
+- `Scheduler` — `Clone` via `Arc<SchedulerInner>`
+- `TaskStore` — `Clone` via `SqlitePool`; WAL journal mode for concurrent access
+- `max_concurrency` — `AtomicUsize`, lock-free runtime adjustment
+- `paused` — `AtomicBool` with `Release`/`Acquire` ordering
+- `ActiveTaskMap` — `Arc<Mutex<HashMap>>`, `Clone`
+- `SmoothedReader` — `RwLock` so readers never block each other
+- `TaskTypeRegistry` — immutable after startup, shared via `Arc`
+- Application state — `Arc<dyn Any + Send + Sync>`, shared across all tasks
+- Each spawned task gets its own `CancellationToken`
+- All trait objects require `Send + Sync + 'static`
+
+## Feature flags
+
+- **`sysinfo-monitor`** (default) — enables `SysinfoSampler` for cross-platform
+  CPU and disk IO. Disable for mobile targets or when providing a custom sampler.
+
+Serde (`Serialize`/`Deserialize`) is always enabled on all public types.
+
+## Configuration reference
+
+### SchedulerConfig
+
+| Field                    | Default       | Notes                              |
+|--------------------------|---------------|------------------------------------|
+| `max_concurrency`        | 4             | Adjustable at runtime              |
+| `max_retries`            | 3             |                                    |
+| `preempt_priority`       | `REALTIME`    |                                    |
+| `poll_interval`          | 500 ms        | Fallback; notify wakes sooner      |
+| `throughput_sample_size` | 20            | History rows for IO learning       |
+| `shutdown_mode`          | `Hard`        |                                    |
+
+### StoreConfig
+
+| Field              | Default | Notes                                     |
+|--------------------|---------|-------------------------------------------|
+| `max_connections`  | 16      | SQLite pool size                          |
+| `retention_policy` | `None`  | `MaxCount(n)` or `MaxAgeDays(n)`          |
+| `prune_interval`   | 100     | Prune every N completions                 |
+
+### SamplerConfig
+
+| Field        | Default | Notes                    |
+|--------------|---------|--------------------------|
+| `interval`   | 1 s     | Sample period            |
+| `ewma_alpha` | 0.3     | Smoothing factor (0–1)   |
+
+## Tauri integration
+
+### State management
+
+```rust
+app.manage(scheduler);  // Scheduler is Clone — no Arc needed
+
+#[tauri::command]
+async fn submit_task(
+    scheduler: tauri::State<'_, Scheduler>,
+) -> Result<Option<i64>, StoreError> {
+    scheduler.submit(&submission).await
+}
+
+#[tauri::command]
+async fn scheduler_status(
+    scheduler: tauri::State<'_, Scheduler>,
+) -> Result<SchedulerSnapshot, StoreError> {
+    scheduler.snapshot().await
+}
+```
+
+### Event bridging
+
+```rust
+let mut events = scheduler.subscribe();
+let handle = app_handle.clone();
+tokio::spawn(async move {
+    while let Ok(event) = events.recv().await {
+        handle.emit("taskmill-event", &event).unwrap();
+    }
+});
+```
+
+### Error handling
+
+`StoreError` derives `Serialize`/`Deserialize`, so it can be returned directly
+from Tauri commands without conversion.
+
+### Cross-platform
+
+Gate `sysinfo-monitor` for mobile: `default-features = false`. Provide a custom
+`ResourceSampler` for iOS/Android if needed. Everything else (SQLite, scheduling,
+events) works on all platforms.
diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 0000000..f5eb708
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,147 @@
+# Configuration
+
+## SchedulerConfig
+
+Controls scheduling behavior. Set via builder methods or pass directly to `Scheduler::new()`.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `max_concurrency` | `usize` | 4 | Maximum concurrent running tasks. Adjustable at runtime via `set_max_concurrency()`. |
+| `max_retries` | `i32` | 3 | Retry limit before a task is permanently failed. |
+| `preempt_priority` | `Priority` | `REALTIME` (0) | Tasks at or above this priority trigger preemption of lower-priority work. |
+| `poll_interval` | `Duration` | 500ms | Sleep between scheduler dispatch cycles. The scheduler also wakes on `Notify` signals. |
+| `throughput_sample_size` | `i32` | 20 | Number of recent completions used for throughput-based progress extrapolation. |
+| `shutdown_mode` | `ShutdownMode` | `Hard` | `Hard` cancels all tasks immediately. `Graceful(Duration)` waits up to the timeout. |
+
+### Builder methods
+
+```rust
+use std::time::Duration;
+use taskmill::{Scheduler, Priority, ShutdownMode};
+
+let scheduler = Scheduler::builder()
+    .max_concurrency(8)
+    .max_retries(5)
+    .preempt_priority(Priority::HIGH)
+    .poll_interval(Duration::from_millis(250))
+    .shutdown_mode(ShutdownMode::Graceful(Duration::from_secs(30)))
+    .build()
+    .await?;
+```
+
+## StoreConfig
+
+Controls the SQLite connection pool and history retention.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `max_connections` | `u32` | 16 | SQLite connection pool size. |
+| `retention_policy` | `Option<RetentionPolicy>` | `None` | Automatic history pruning. `MaxCount(n)` or `MaxAgeDays(n)`. |
+| `prune_interval` | `u64` | 100 | Number of task completions between automatic prune runs. |
+
+### Builder method
+
+```rust
+use taskmill::{StoreConfig, RetentionPolicy};
+
+let scheduler = Scheduler::builder()
+    .store_config(StoreConfig {
+        max_connections: 32,
+        retention_policy: Some(RetentionPolicy::MaxCount(10_000)),
+        prune_interval: 50,
+        ..Default::default()
+    })
+    .build()
+    .await?;
+```
+
+## SamplerConfig
+
+Controls the resource monitoring background loop.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `interval` | `Duration` | 1s | How often to sample system resources. |
+| `ewma_alpha` | `f64` | 0.3 | EWMA smoothing factor. Higher = more responsive to changes, lower = smoother. |
+
+### Builder method
+
+```rust
+use std::time::Duration;
+use taskmill::SamplerConfig;
+
+let scheduler = Scheduler::builder()
+    .with_resource_monitoring()
+    .sampler_config(SamplerConfig {
+        interval: Duration::from_millis(500),
+        ewma_alpha: 0.5,
+    })
+    .build()
+    .await?;
+```
+
+## ShutdownMode
+
+| Variant | Behavior |
+|---------|----------|
+| `Hard` | Cancel all running tasks immediately when the scheduler stops. |
+| `Graceful(Duration)` | Stop dispatching new tasks, wait for running tasks to complete (up to the timeout), then force-cancel any remaining. Stops the resource sampler afterward. |
+
+## RetentionPolicy
+
+| Variant | Behavior |
+|---------|----------|
+| `MaxCount(i64)` | Keep the N most recent history records, prune the rest. |
+| `MaxAgeDays(i64)` | Keep records from the last N days, prune older entries. |
+
+## Priority constants
+
+| Constant | Value | Notes |
+|----------|-------|-------|
+| `Priority::REALTIME` | 0 | Highest. Never throttled. Triggers preemption. |
+| `Priority::HIGH` | 64 | |
+| `Priority::NORMAL` | 128 | Default for most tasks. |
+| `Priority::BACKGROUND` | 192 | |
+| `Priority::IDLE` | 255 | Lowest. |
+
+Custom: `Priority::new(n)` for any `u8` value.
+
+## Feature flags
+
+| Feature | Default | Description |
+|---------|---------|-------------|
+| `sysinfo-monitor` | Enabled | Cross-platform CPU and disk IO monitoring via `sysinfo`. Disable for mobile targets or custom samplers. |
+
+### Disabling platform monitoring
+
+```toml
+[dependencies]
+taskmill = { path = "crates/taskmill", default-features = false }
+```
+
+When disabled, you can still provide a custom `ResourceSampler` via `.resource_sampler()`.
+
+## Builder reference
+
+All `SchedulerBuilder` methods:
+
+| Method | Description |
+|--------|-------------|
+| `store_path(path)` | Path to the SQLite database file. |
+| `store(store)` | Use a pre-opened `TaskStore`. |
+| `store_config(config)` | Pool size and retention settings. |
+| `executor(name, executor)` | Register a `TaskExecutor` by name. |
+| `typed_executor::<T>(executor)` | Register using `T::TASK_TYPE` as the name. |
+| `max_concurrency(n)` | Set initial max concurrent tasks. |
+| `max_retries(n)` | Set retry limit. |
+| `preempt_priority(p)` | Set preemption threshold. |
+| `poll_interval(d)` | Set dispatch cycle interval. |
+| `shutdown_mode(mode)` | Set shutdown behavior. |
+| `pressure_source(source)` | Add a `PressureSource` to the composite. |
+| `throttle_policy(policy)` | Set a custom `ThrottlePolicy`. |
+| `with_resource_monitoring()` | Enable platform resource monitoring. |
+| `resource_sampler(sampler)` | Provide a custom `ResourceSampler`. |
+| `sampler_config(config)` | Configure sample interval and smoothing. |
+| `app_state(state)` | Register a state type (multiple types can coexist). |
+| `app_state_arc(arc)` | Register a state type from a pre-existing `Arc`. |
+| `build()` | Build and return the `Scheduler`. |
diff --git a/docs/features.md b/docs/features.md
new file mode 100644
index 0000000..83da43c
--- /dev/null
+++ b/docs/features.md
@@ -0,0 +1,110 @@
+# Features
+
+A complete list of taskmill's capabilities.
+
+## Persistence
+
+- **SQLite-backed queue** — all tasks are stored in SQLite with WAL journal mode. Tasks survive process restarts, crashes, and power loss.
+- **Crash recovery** — tasks left in `running` state during a crash are automatically reset to `pending` on startup. Dedup keys remain occupied so no duplicates sneak in during recovery.
+- **Connection pooling** — configurable pool size (default 16) for concurrent reads.
+
+## Scheduling
+
+- **256-level priority queue** — priorities range from 0 (highest, `REALTIME`) to 255 (lowest, `IDLE`). Five named tiers are provided: `REALTIME`, `HIGH`, `NORMAL`, `BACKGROUND`, `IDLE`. Custom values like `Priority::new(100)` work too.
+- **FIFO within tier** — tasks at the same priority are dispatched in insertion order.
+- **Atomic dispatch** — pop operations use `UPDATE ... WHERE id = (SELECT ...) RETURNING *` for race-free claiming with no lost tasks.
+- **Runtime-adjustable concurrency** — change `max_concurrency` at runtime via `set_max_concurrency()`.
+
+## Deduplication
+
+- **Key-based dedup** — each task gets a SHA-256 key derived from `task_type + payload` (or an explicit key). A `UNIQUE(key)` constraint with `INSERT OR IGNORE` prevents duplicate submissions.
+- **Type-scoped keys** — the task type is always part of the hash, so different task types never collide even with identical payloads.
+- **Lifecycle-aware** — keys are occupied while a task is pending, running, paused, or retrying. The key is freed when the task moves to history (completed or failed).
+- **Batch-safe** — deduplication applies within `submit_batch()` transactions too.
+
+## IO Awareness
+
+- **Expected/actual IO tracking** — submit estimated read/write bytes; executors report actual bytes on completion.
+- **IO budget gating** — the scheduler compares running task IO estimates against EWMA-smoothed system throughput. New work is deferred when cumulative IO would exceed 80% of observed disk capacity.
+- **Learning from history** — `avg_throughput()` and `history_stats()` compute per-type IO averages from actual completions, enabling callers to refine estimates over time.
+
+## Resource Monitoring
+
+- **Cross-platform** — CPU and disk IO via `sysinfo` on Linux, macOS, and Windows. Feature-gated under `sysinfo-monitor` (enabled by default).
+- **EWMA smoothing** — raw samples are smoothed with an exponentially weighted moving average (alpha=0.3, configurable) to avoid spiky readings.
+- **Two-trait design** — `ResourceSampler` (raw platform readings) and `ResourceReader` (smoothed snapshots) are separated for testability and custom implementations.
+- **Custom samplers** — disable the `sysinfo-monitor` feature and provide your own `ResourceSampler` for containers, cgroups, or mobile platforms.
+
+## Backpressure
+
+- **Composable pressure sources** — implement the `PressureSource` trait to expose a `0.0..=1.0` signal from any source (API load, memory, battery, queue depth). `CompositePressure` aggregates sources; the aggregate is the maximum across all.
+- **Throttle policies** — `ThrottlePolicy` maps `(priority, pressure)` to dispatch decisions. The default three-tier policy throttles `BACKGROUND` tasks at >50% pressure, `NORMAL` at >75%, and never throttles `HIGH` or `REALTIME`.
+- **Custom policies** — define your own thresholds for fine-grained control.
+
+## Preemption
+
+- **Priority-based preemption** — when a task at or above `preempt_priority` (default: `REALTIME`) is submitted, all lower-priority running tasks are cancelled and paused.
+- **Token-based cancellation** — preempted tasks have their `CancellationToken` triggered. Executors should check `token.is_cancelled()` at yield points.
+- **Anti-thrash protection** — paused tasks only resume when no active preemptors remain.
+
+## Retries
+
+- **Automatic requeue** — retryable failures (`TaskError { retryable: true }`) are requeued at the same priority with `retry_count += 1`.
+- **Configurable limit** — `max_retries` (default 3) controls how many times a task can be retried before permanent failure.
+- **Dedup preserved** — the key stays occupied during retries, preventing duplicate submission of in-progress work.
+
+## Progress Reporting
+
+- **Executor-reported progress** — report percentage or fraction-based progress via `ctx.progress.report()` or `ctx.progress.report_fraction()`.
+- **Throughput-based extrapolation** — for tasks without explicit reports, the scheduler extrapolates progress from historical average duration, capped at 99% to avoid false completion signals.
+- **Event-driven** — progress updates are emitted as `SchedulerEvent::Progress` for real-time UI updates.
+
+## Lifecycle Events
+
+- **Broadcast channel** — subscribe via `scheduler.subscribe()` to receive `SchedulerEvent` variants: `Dispatched`, `Completed`, `Failed`, `Preempted`, `Cancelled`, `Progress`, `Paused`, `Resumed`.
+- **Tauri-ready** — all events are `Serialize`, designed for direct bridging to frontend via `app_handle.emit()`.
+
+## Task Management
+
+- **Task cancellation** — cancel running, pending, or paused tasks via `scheduler.cancel(task_id)`.
+- **Global pause/resume** — `pause_all()` stops dispatch and pauses running tasks; `resume_all()` resumes on the next cycle. Emits events for UI integration.
+- **Task lookup by dedup key** — `task_lookup()` searches both active and history tables for a task matching a given type and dedup input.
+
+## Typed Payloads
+
+- **Structured submission** — `TaskSubmission::with_payload()` serializes any `Serialize` type to JSON bytes.
+- **Type-safe deserialization** — `TaskRecord::deserialize_payload::<T>()` in executors.
+- **TypedTask trait** — define `TASK_TYPE`, default priority, and expected IO on your struct. Submit with `scheduler.submit_typed()` and deserialize with `ctx.deserialize_typed()`.
+
+## Batch Operations
+
+- **Bulk enqueue** — `submit_batch()` wraps many inserts in a single SQLite transaction. Returns `Vec<Option<i64>>` where `None` indicates deduplication.
+
+## Graceful Shutdown
+
+- **Hard mode** (default) — immediately cancels all running tasks.
+- **Graceful mode** — stops dispatching, waits for running tasks up to a configurable timeout, then force-cancels stragglers.
+
+## Application State
+
+- **Type-keyed state map** — register multiple state types on the builder via `.app_state()` / `.app_state_arc()`. Each type is keyed by `TypeId`; access from any executor via `ctx.state::<T>()`.
+- **Post-build injection** — call `scheduler.register_state(arc)` after build to let libraries inject their own state into a shared scheduler.
+- **Arc-based sharing** — state is wrapped in `Arc` internally; all tasks share the same instance.
+
+## History & Pruning
+
+- **Automatic retention** — configure `RetentionPolicy::MaxCount(n)` or `RetentionPolicy::MaxAgeDays(n)` for automatic history pruning.
+- **Amortized pruning** — pruning runs every N completions (default 100, configurable) to avoid per-task overhead.
+- **Manual pruning** — `prune_history_by_count()` and `prune_history_by_age()` for on-demand cleanup.
+
+## Dashboard
+
+- **Single-call snapshot** — `scheduler.snapshot()` returns a serializable `SchedulerSnapshot` with running tasks, queue depths, progress estimates, pressure readings, and concurrency limits.
+- **Designed for Tauri commands** — return the snapshot directly from a `#[tauri::command]` handler.
+
+## Ergonomics
+
+- **Builder pattern** — `Scheduler::builder()` provides fluent construction with sensible defaults.
+- **Clone-friendly** — `Scheduler` is `Clone` via `Arc<SchedulerInner>` for easy sharing in Tauri state and across async tasks.
+- **Serde on all public types** — every public struct and enum derives `Serialize`/`Deserialize` for Tauri IPC.
+- **Serializable errors** — `StoreError` is serializable for direct use in Tauri command returns.
diff --git a/docs/io-and-backpressure.md b/docs/io-and-backpressure.md
new file mode 100644
index 0000000..6ec709d
--- /dev/null
+++ b/docs/io-and-backpressure.md
@@ -0,0 +1,208 @@
+# IO Tracking & Backpressure
+
+Taskmill combines two independent gating mechanisms — IO budget tracking and composable backpressure — to avoid saturating system resources.
+
+## IO tracking
+
+### Submission estimates
+
+Every `TaskSubmission` includes expected IO:
+
+```rust
+let sub = TaskSubmission {
+    task_type: "scan".into(),
+    key: None,
+    priority: Priority::NORMAL,
+    payload: Some(data),
+    expected_read_bytes: 50_000,   // caller's estimate
+    expected_write_bytes: 10_000,
+};
+```
+
+### Completion actuals
+
+Executors report actual IO in `TaskResult`:
+
+```rust
+Ok(TaskResult {
+    actual_read_bytes: 48_312,
+    actual_write_bytes: 9_876,
+})
+```
+
+Actual values are stored in `task_history` for learning.
+
+### IO budget gating
+
+When resource monitoring is enabled, the scheduler checks IO headroom before dispatching:
+
+1. Query EWMA-smoothed disk throughput from the `ResourceReader`.
+2. Sum expected IO across all running tasks.
+3. Compute a 2-second capacity window: `capacity = bytes_per_sec * 2.0`.
+4. If running IO + candidate IO would exceed 80% of capacity on either axis (read or write), the task is deferred.
+
+This prevents the scheduler from piling up IO-heavy tasks that would saturate the disk.
+
+### Learning from history
+
+Use store queries to refine future estimates:
+
+```rust
+let store = scheduler.store();
+
+// Average read/write bytes per second for a task type (from recent completions)
+let (avg_read_bps, avg_write_bps) = store.avg_throughput("scan", 20).await?;
+
+// Aggregate stats: count, avg duration, avg IO, failure rate
+let stats = store.history_stats("scan").await?;
+```
+
+## Resource monitoring
+
+### Built-in platform sampler
+
+Enabled by default via the `sysinfo-monitor` feature flag. Provides CPU and disk IO on Linux, macOS, and Windows.
+
+```rust
+let scheduler = Scheduler::builder()
+    .with_resource_monitoring()  // uses SysinfoSampler automatically
+    .build()
+    .await?;
+```
+
+### Custom samplers
+
+For containers, cgroups, or mobile platforms, provide your own `ResourceSampler`:
+
+```rust
+use taskmill::{ResourceSampler, ResourceSnapshot};
+
+struct CgroupSampler;
+
+impl ResourceSampler for CgroupSampler {
+    fn sample(&mut self) -> ResourceSnapshot {
+        ResourceSnapshot {
+            cpu_usage: read_cgroup_cpu(),         // 0.0–1.0
+            io_read_bytes_per_sec: read_blkio_read(),
+            io_write_bytes_per_sec: read_blkio_write(),
+        }
+    }
+}
+
+let scheduler = Scheduler::builder()
+    .resource_sampler(Box::new(CgroupSampler))
+    .build()
+    .await?;
+```
+
+### EWMA smoothing
+
+Raw samples are smoothed via a `SmoothedReader` background loop:
+
+```
+smoothed = alpha * raw + (1 - alpha) * previous
+```
+
+- Default alpha: 0.3 (configurable via `SamplerConfig`)
+- Default sample interval: 1 second
+- Readers access snapshots via `RwLock` (readers never block each other)
+
+Configure smoothing:
+
+```rust
+use std::time::Duration;
+use taskmill::SamplerConfig;
+
+let scheduler = Scheduler::builder()
+    .with_resource_monitoring()
+    .sampler_config(SamplerConfig {
+        interval: Duration::from_millis(500),  // sample faster
+        ewma_alpha: 0.5,                       // more responsive
+    })
+    .build()
+    .await?;
+```
+
+## Backpressure
+
+### Pressure sources
+
+Implement the `PressureSource` trait to expose a `0.0..=1.0` signal from any external source:
+
+```rust
+use taskmill::PressureSource;
+
+struct MemoryPressure;
+
+impl PressureSource for MemoryPressure {
+    fn pressure(&self) -> f32 {
+        let used = sys_info::mem_used();
+        let total = sys_info::mem_total();
+        (used as f32 / total as f32).min(1.0)
+    }
+
+    fn name(&self) -> &str { "memory" }
+}
+```
+
+### Composite pressure
+
+Multiple sources are aggregated via `CompositePressure`. The aggregate pressure is the **maximum** across all sources:
+
+```rust
+use taskmill::CompositePressure;
+
+let mut pressure = CompositePressure::new();
+pressure.add_source(Arc::new(MemoryPressure));
+pressure.add_source(Arc::new(QueueDepthPressure));
+// Aggregate = max(memory_pressure, queue_pressure)
+```
+
+Or via the builder:
+
+```rust
+let scheduler = Scheduler::builder()
+    .pressure_source(Arc::new(MemoryPressure))
+    .pressure_source(Arc::new(QueueDepthPressure))
+    .build()
+    .await?;
+```
+
+### Throttle policies
+
+`ThrottlePolicy` maps `(priority, pressure)` to dispatch decisions:
+
+```rust
+use taskmill::{ThrottlePolicy, Priority};
+
+// Default: BACKGROUND >50%, NORMAL >75%, HIGH/REALTIME never
+let policy = ThrottlePolicy::default_three_tier();
+
+// Custom thresholds
+let policy = ThrottlePolicy::new(vec![
+    (Priority::IDLE, 0.3),       // throttle IDLE at 30%
+    (Priority::BACKGROUND, 0.6), // throttle BACKGROUND at 60%
+    (Priority::NORMAL, 0.8),     // throttle NORMAL at 80%
+]);
+```
+
+### How gating works
+
+The default `DispatchGate` combines both mechanisms. A task is dispatched only when **both** pass:
+
+1. **Backpressure check** — `ThrottlePolicy::should_throttle(priority, pressure)` returns false.
+2. **IO budget check** — `has_io_headroom()` confirms the task won't saturate disk throughput.
+
+If either check fails, the task stays in the queue and is retried on the next poll cycle.
+
+### Diagnostics
+
+The `SchedulerSnapshot` includes pressure readings for debugging:
+
+```rust
+let snap = scheduler.snapshot().await?;
+println!("Aggregate pressure: {:.0}%", snap.pressure * 100.0);
+for (name, value) in &snap.pressure_breakdown {
+    println!("  {}: {:.0}%", name, value * 100.0);
+}
+```
diff --git a/docs/persistence-and-recovery.md b/docs/persistence-and-recovery.md
new file mode 100644
index 0000000..17cb840
--- /dev/null
+++ b/docs/persistence-and-recovery.md
@@ -0,0 +1,172 @@
+# Persistence & Recovery
+
+Taskmill persists all task state to SQLite, ensuring work survives process restarts, crashes, and power loss.
+
+## SQLite schema
+
+Two tables manage the task lifecycle:
+
+### `tasks` — active queue
+
+Holds pending, running, and paused tasks.
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `id` | INTEGER PRIMARY KEY | Insertion-order ID |
+| `task_type` | TEXT NOT NULL | Executor lookup name |
+| `key` | TEXT NOT NULL UNIQUE | SHA-256 dedup key |
+| `priority` | INTEGER NOT NULL | 0–255 (lower = higher priority) |
+| `status` | TEXT DEFAULT 'pending' | `pending`, `running`, or `paused` |
+| `payload` | BLOB | Opaque task data (max 1 MiB) |
+| `expected_read_bytes` | INTEGER | Estimated read IO |
+| `expected_write_bytes` | INTEGER | Estimated write IO |
+| `retry_count` | INTEGER DEFAULT 0 | Number of retries so far |
+| `last_error` | TEXT | Most recent error message |
+| `created_at` | TEXT | ISO 8601 timestamp |
+| `started_at` | TEXT | Set when dispatched, cleared on pause |
+
+**Index:** `idx_tasks_pending(status, priority ASC, id ASC) WHERE status = 'pending'` — partial index for efficient priority-ordered pop.
+
+### `task_history` — completed and failed tasks
+
+| Column | Type | Description |
+|--------|------|-------------|
+| *(all columns from `tasks`)* | | |
+| `actual_read_bytes` | INTEGER | Reported by executor |
+| `actual_write_bytes` | INTEGER | Reported by executor |
+| `completed_at` | TEXT | ISO 8601 timestamp |
+| `duration_ms` | INTEGER | Wall-clock duration |
+| `status` | TEXT | `completed` or `failed` |
+
+**Index:** `idx_history_type_completed(task_type, completed_at DESC) WHERE status = 'completed'` — for per-type history queries and throughput calculations.
+
+## Crash recovery
+
+On startup, `TaskStore::open()` runs a recovery query:
+
+```sql
+UPDATE tasks SET status = 'pending', started_at = NULL WHERE status = 'running'
+```
+
+This resets any tasks that were mid-execution when the process died. The behavior:
+
+- Tasks return to the priority queue at their original priority
+- `retry_count` is preserved (crash doesn't count as a retry)
+- Dedup keys remain occupied (no duplicate submissions during recovery)
+- Tasks are re-dispatched in priority order on the next scheduler cycle
+
+## Deduplication
+
+### How keys are generated
+
+Every task gets a SHA-256 key: `SHA-256(task_type + ":" + (explicit_key OR payload))`.
+
+- **Implicit key** — if no `key` is provided, the payload bytes are used. Tasks with the same type and payload get the same key.
+- **Explicit key** — set `TaskSubmission.key` to control deduplication yourself. Useful when two payloads represent the same logical work (e.g., different timestamps but same file path).
+- **Type scoping** — the task type is always part of the hash, so `("resize", payload)` and `("compress", payload)` never collide.
+
+### Lifecycle
+
+A key is "occupied" while the task is in the `tasks` table (pending, running, paused, or retrying). When the task moves to `task_history` (completed or failed), the key is freed and can be resubmitted.
+
+### Submission behavior
+
+```rust
+// Returns Some(id) if inserted
+let id = scheduler.submit(&submission).await?;  // Ok(Some(42))
+
+// Returns None if a task with the same key already exists
+let id = scheduler.submit(&submission).await?;  // Ok(None)
+```
+
+`submit_batch()` applies the same dedup within a single transaction:
+
+```rust
+let ids = scheduler.submit_batch(&[sub1, sub2, sub3]).await?;
+// ids = [Some(1), None, Some(2)]  — sub2 was a duplicate
+```
+
+### Looking up tasks by dedup key
+
+```rust
+use taskmill::TaskLookup;
+
+let lookup = scheduler.task_lookup("resize", "/photos/img.jpg").await?;
+match lookup {
+    TaskLookup::Active(record) => println!("still running: {:?}", record.status),
+    TaskLookup::History(record) => println!("completed: {:?}", record.completed_at),
+    TaskLookup::NotFound => println!("never submitted"),
+}
+```
+
+## History retention
+
+Without pruning, `task_history` grows without bound. Configure automatic retention:
+
+### By count
+
+Keep the N most recent records:
+
+```rust
+use taskmill::{StoreConfig, RetentionPolicy};
+
+let scheduler = Scheduler::builder()
+    .store_config(StoreConfig {
+        retention_policy: Some(RetentionPolicy::MaxCount(10_000)),
+        ..Default::default()
+    })
+    .build()
+    .await?;
+```
+
+### By age
+
+Keep records from the last N days:
+
+```rust
+let scheduler = Scheduler::builder()
+    .store_config(StoreConfig {
+        retention_policy: Some(RetentionPolicy::MaxAgeDays(90)),
+        ..Default::default()
+    })
+    .build()
+    .await?;
+```
+
+### Pruning frequency
+
+Pruning is amortized — it runs every N task completions (default 100, configurable via `StoreConfig::prune_interval`). Pruning errors are logged but don't affect the completed task.
+
+### Manual pruning
+
+```rust
+let store = scheduler.store();
+let deleted = store.prune_history_by_count(5_000).await?;
+let deleted = store.prune_history_by_age(30).await?;
+```
+
+## WAL mode
+
+The database uses SQLite WAL (Write-Ahead Logging) for concurrent reads with serialized writes. This means multiple readers can query task status while the scheduler is dispatching work.
+
+## Connection pooling
+
+The default pool size is 16 connections. Configure via `StoreConfig::max_connections`:
+
+```rust
+let scheduler = Scheduler::builder()
+    .store_config(StoreConfig {
+        max_connections: 32,
+        ..Default::default()
+    })
+    .build()
+    .await?;
+```
+
+## In-memory store for testing
+
+For tests, use an in-memory database that doesn't touch the filesystem:
+
+```rust
+let store = TaskStore::open_memory().await?;
+```
diff --git a/docs/priorities-and-preemption.md b/docs/priorities-and-preemption.md
new file mode 100644
index 0000000..36b069a
--- /dev/null
+++ b/docs/priorities-and-preemption.md
@@ -0,0 +1,105 @@
+# Priorities & Preemption
+
+## Priority levels
+
+Taskmill uses a 256-level priority scale where lower values mean higher priority. Five named constants are provided:
+
+| Constant     | Value | Behavior |
+|--------------|-------|----------|
+| `REALTIME`   | 0     | Never throttled. Triggers preemption of lower-priority work. |
+| `HIGH`       | 64    | Throttled only under extreme pressure (>75%). |
+| `NORMAL`     | 128   | Standard operations. Throttled at >75% pressure. |
+| `BACKGROUND` | 192   | Deferred under moderate load. Throttled at >50% pressure. |
+| `IDLE`       | 255   | Runs only when the system is otherwise idle. Throttled at >50% pressure. |
+
+Custom values between tiers are supported:
+
+```rust
+use taskmill::Priority;
+
+let custom = Priority::new(100); // between HIGH and NORMAL
+```
+
+## Queue ordering
+
+Tasks are popped from the queue in strict priority order (`ORDER BY priority ASC, id ASC`). Within the same priority tier, tasks are dispatched in insertion order (FIFO).
+
+A partial index on `(status, priority, id) WHERE status = 'pending'` keeps pop operations fast regardless of history size.
+
+## Preemption
+
+When a task with priority at or above `preempt_priority` (default: `REALTIME`) is submitted, the scheduler preempts lower-priority running work:
+
+1. **Cancel tokens** — the `CancellationToken` of every active task with lower priority (higher numeric value) is triggered.
+2. **Pause in store** — preempted tasks are moved to `paused` status with `started_at` cleared.
+3. **Emit events** — a `SchedulerEvent::Preempted` is emitted for each affected task.
+4. **Resume later** — paused tasks are only re-dispatched when no active preemptors remain, preventing thrashing between competing priority tiers.
+
+### Handling preemption in executors
+
+Executors should check for cancellation at natural yield points:
+
+```rust
+impl TaskExecutor for MyExecutor {
+    async fn execute<'a>(
+        &'a self, ctx: &'a TaskContext,
+    ) -> Result<TaskResult, TaskError> {
+        for chunk in chunks {
+            // Check before each unit of work
+            if ctx.token.is_cancelled() {
+                return Err(TaskError {
+                    message: "preempted".into(),
+                    retryable: true,
+                    actual_read_bytes: bytes_read_so_far,
+                    actual_write_bytes: bytes_written_so_far,
+                });
+            }
+
+            process(chunk).await;
+            ctx.progress.report_fraction(i, total, None);
+        }
+
+        Ok(TaskResult { actual_read_bytes: total_read, actual_write_bytes: total_written })
+    }
+}
+```
+
+Returning a retryable error on preemption is optional — the scheduler handles pausing regardless. But it gives the executor a chance to report partial IO and clean up.
+
+### Configuring preemption threshold
+
+```rust
+let scheduler = Scheduler::builder()
+    .preempt_priority(Priority::HIGH)  // now HIGH and REALTIME both trigger preemption
+    .build()
+    .await?;
+```
+
+## Throttle behavior
+
+Throttling is independent of preemption. It controls whether a pending task is *dispatched*, not whether a running task is *interrupted*.
+
+The default three-tier `ThrottlePolicy`:
+
+| Priority tier | Throttled when pressure exceeds |
+|---------------|-------------------------------|
+| `BACKGROUND` (192+) | 50% |
+| `NORMAL` (128+) | 75% |
+| `HIGH` / `REALTIME` | Never |
+
+Pressure is an aggregate `0.0..=1.0` value from all registered `PressureSource` implementations (see [IO & Backpressure](io-and-backpressure.md)).
+
+### Custom throttle policies
+
+```rust
+use taskmill::{ThrottlePolicy, Priority};
+
+// Custom: throttle IDLE at 30%, BACKGROUND at 60%, NORMAL at 80%
+let policy = ThrottlePolicy::new(vec![
+    (Priority::IDLE, 0.3),
+    (Priority::BACKGROUND, 0.6),
+    (Priority::NORMAL, 0.8),
+]);
+```
+
+Thresholds are evaluated from lowest priority (highest numeric value) first. A task is throttled if its priority is at or below the threshold tier and pressure exceeds the limit.
diff --git a/docs/progress-reporting.md b/docs/progress-reporting.md
new file mode 100644
index 0000000..2c65067
--- /dev/null
+++ b/docs/progress-reporting.md
@@ -0,0 +1,160 @@
+# Progress Reporting
+
+Taskmill provides real-time progress tracking for running tasks, combining executor-reported values with throughput-based extrapolation.
+
+## Reporting from executors
+
+Executors receive a `ProgressReporter` via `ctx.progress`:
+
+```rust
+impl TaskExecutor for MyExecutor {
+    async fn execute<'a>(
+        &'a self, ctx: &'a TaskContext,
+    ) -> Result<TaskResult, TaskError> {
+        let items = get_work_items();
+
+        for (i, item) in items.iter().enumerate() {
+            process(item).await;
+
+            // Percentage-based (0.0 to 1.0)
+            ctx.progress.report(
+                (i + 1) as f32 / items.len() as f32,
+                Some(format!("processed {}/{}", i + 1, items.len())),
+            );
+        }
+
+        Ok(TaskResult { actual_read_bytes: 0, actual_write_bytes: 0 })
+    }
+}
+```
+
+### Fraction-based reporting
+
+For count-based progress:
+
+```rust
+ctx.progress.report_fraction(processed, total, Some("importing".into()));
+// Automatically computes: processed as f32 / total as f32
+```
+
+## Progress events
+
+Every `report()` call emits a `SchedulerEvent::Progress`:
+
+```rust
+SchedulerEvent::Progress {
+    task_id: 42,
+    task_type: "resize".into(),
+    key: "abc123".into(),
+    percent: 0.5,
+    message: Some("resizing".into()),
+}
+```
+
+Subscribe to events for real-time UI updates:
+
+```rust
+let mut events = scheduler.subscribe();
+tokio::spawn(async move {
+    while let Ok(event) = events.recv().await {
+        if let SchedulerEvent::Progress { task_id, percent, message, .. } = event {
+            update_ui(task_id, percent, message);
+        }
+    }
+});
+```
+
+## Throughput-based extrapolation
+
+For tasks that don't report progress (or between reports), the scheduler extrapolates based on historical data:
+
+1. Fetch `history_stats(task_type)` to get the average duration for this task type.
+2. Compute throughput: `1.0 / avg_duration_ms` (completion fraction per millisecond).
+3. Multiply by elapsed time since `started_at` to get an extrapolated percentage.
+4. If the executor has reported partial progress, blend the historical throughput with the current rate.
+5. Cap at 99% — extrapolation never reaches 100% to avoid false "complete" signals.
+
+This means even tasks with no explicit progress reporting show movement in UI dashboards.
+
+## Querying progress
+
+### All running tasks
+
+```rust
+let progress = scheduler.estimated_progress().await;
+for p in &progress {
+    println!("{} ({}): {:.0}%", p.task_type, p.key, p.percent * 100.0);
+    // p.reported_percent  — last executor-reported value (if any)
+    // p.extrapolated_percent — throughput-based estimate (if any)
+    // p.percent — best available: reported if present, else extrapolated
+}
+```
+
+### Via snapshot
+
+The `SchedulerSnapshot` includes progress for all running tasks:
+
+```rust
+let snap = scheduler.snapshot().await?;
+for p in &snap.progress {
+    println!("{}: {:.0}%", p.key, p.percent * 100.0);
+}
+```
+
+## Lifecycle events
+
+All scheduler state changes are broadcast as `SchedulerEvent` variants:
+
+| Event | When |
+|-------|------|
+| `Dispatched { task_id, task_type, key }` | Task popped from queue and executor spawned |
+| `Completed { task_id, task_type, key }` | Task finished successfully |
+| `Failed { task_id, task_type, key, error, will_retry }` | Task failed (includes whether it will be retried) |
+| `Preempted { task_id, task_type, key }` | Task paused for higher-priority work |
+| `Cancelled { task_id, task_type, key }` | Task cancelled via `scheduler.cancel()` |
+| `Progress { task_id, task_type, key, percent, message }` | Progress update from executor |
+| `Paused` | Scheduler globally paused via `pause_all()` |
+| `Resumed` | Scheduler resumed via `resume_all()` |
+
+### Tauri bridge
+
+Bridge events to the frontend in a Tauri app:
+
+```rust
+let mut events = scheduler.subscribe();
+let handle = app_handle.clone();
+tokio::spawn(async move {
+    while let Ok(event) = events.recv().await {
+        handle.emit("taskmill-event", &event).unwrap();
+    }
+});
+```
+
+All events derive `Serialize`, so they can be sent directly over Tauri IPC.
+
+## Dashboard snapshot
+
+For UI dashboards, `Scheduler::snapshot()` gathers all scheduler state in a single call:
+
+```rust
+let snap = scheduler.snapshot().await?;
+// snap.running          — Vec<TaskRecord> of currently executing tasks
+// snap.pending_count    — number of tasks waiting to dispatch
+// snap.paused_count     — number of preempted tasks
+// snap.progress         — Vec<EstimatedProgress> for every running task
+// snap.pressure         — aggregate backpressure (0.0–1.0)
+// snap.pressure_breakdown — per-source diagnostics: Vec<(String, f32)>
+// snap.max_concurrency  — current concurrency limit
+// snap.is_paused        — whether the scheduler is globally paused
+```
+
+Return directly from a Tauri command:
+
+```rust
+#[tauri::command]
+async fn scheduler_status(
+    scheduler: tauri::State<'_, Scheduler>,
+) -> Result<SchedulerSnapshot, StoreError> {
+    scheduler.snapshot().await
+}
+```
diff --git a/docs/query-apis.md b/docs/query-apis.md
new file mode 100644
index 0000000..f0acaee
--- /dev/null
+++ b/docs/query-apis.md
@@ -0,0 +1,103 @@
+# Query APIs
+
+All queries are available on `TaskStore`, accessed via `scheduler.store()`.
+
+## Active task queries
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `running_tasks()` | `Vec<TaskRecord>` | All running tasks, ordered by priority. |
+| `running_count()` | `i64` | Count of running tasks. |
+| `pending_tasks(limit)` | `Vec<TaskRecord>` | Pending tasks, ordered by priority then age. |
+| `pending_count()` | `i64` | Count of pending tasks. |
+| `pending_by_type(task_type)` | `Vec<TaskRecord>` | Pending tasks filtered by type. |
+| `paused_tasks()` | `Vec<TaskRecord>` | All paused (preempted) tasks. |
+| `paused_count()` | `i64` | Count of paused tasks. |
+| `task_by_id(id)` | `Option<TaskRecord>` | Look up an active task by row ID. |
+| `task_by_key(key)` | `Option<TaskRecord>` | Look up an active task by dedup key. |
+| `running_io_totals()` | `(i64, i64)` | Sum of `(expected_read_bytes, expected_write_bytes)` across running tasks. |
+
+## History queries
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `history(limit, offset)` | `Vec<TaskHistoryRecord>` | Paginated history, newest first. |
+| `history_by_type(task_type, limit)` | `Vec<TaskHistoryRecord>` | History filtered by task type. |
+| `history_by_key(key)` | `Vec<TaskHistoryRecord>` | All past runs matching a dedup key. |
+| `failed_tasks(limit)` | `Vec<TaskHistoryRecord>` | Recent failures. |
+
+## Aggregate queries
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `history_stats(task_type)` | `TypeStats` | Aggregate stats: count, avg duration, avg IO, failure rate. |
+| `avg_throughput(task_type, limit)` | `(f64, f64)` | Average `(read_bytes/sec, write_bytes/sec)` from recent completions. |
+
+### TypeStats fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `count` | `i64` | Total completed tasks of this type. |
+| `avg_duration_ms` | `f64` | Average wall-clock duration. |
+| `avg_read_bytes` | `f64` | Average actual read bytes. |
+| `avg_write_bytes` | `f64` | Average actual write bytes. |
+| `failure_rate` | `f64` | Fraction of tasks that failed (0.0–1.0). |
+
+## Unified lookup
+
+Search both active and history tables by dedup key:
+
+```rust
+use taskmill::TaskLookup;
+
+let lookup = scheduler.task_lookup("resize", "/photos/img.jpg").await?;
+match lookup {
+    TaskLookup::Active(record) => {
+        println!("Status: {:?}, priority: {}", record.status, record.priority.value());
+    }
+    TaskLookup::History(record) => {
+        println!("Completed at: {:?}, duration: {}ms", record.completed_at, record.duration_ms);
+    }
+    TaskLookup::NotFound => {
+        println!("No task found with this key");
+    }
+}
+```
+
+Or with typed tasks:
+
+```rust
+let lookup = scheduler.lookup_typed(&ResizeTask {
+    path: "/photos/img.jpg".into(),
+    width: 300,
+}).await?;
+```
+
+## Pruning
+
+| Method | Returns | Description |
+|--------|---------|-------------|
+| `prune_history_by_count(keep)` | `u64` | Delete all but the N most recent history records. Returns count deleted. |
+| `prune_history_by_age(days)` | `u64` | Delete history records older than N days. Returns count deleted. |
+
+## Usage example
+
+```rust
+let store = scheduler.store();
+
+// Dashboard data
+let running = store.running_count().await?;
+let pending = store.pending_count().await?;
+let (read_io, write_io) = store.running_io_totals().await?;
+
+// Per-type analytics
+let stats = store.history_stats("thumbnail").await?;
+println!(
+    "thumbnail: {} completed, avg {:.0}ms, {:.1}% failure rate",
+    stats.count, stats.avg_duration_ms, stats.failure_rate * 100.0,
+);
+
+// Paginated history for a UI table
+let page = store.history(50, 0).await?;     // first 50
+let page2 = store.history(50, 50).await?;   // next 50
+```
diff --git a/docs/quick-start.md b/docs/quick-start.md
new file mode 100644
index 0000000..bc3b871
--- /dev/null
+++ b/docs/quick-start.md
@@ -0,0 +1,211 @@
+# Quick Start
+
+## Installation
+
+Add taskmill to your `Cargo.toml`:
+
+```toml
+[dependencies]
+taskmill = { path = "crates/taskmill" }
+```
+
+To disable platform resource monitoring (e.g., for mobile targets):
+
+```toml
+[dependencies]
+taskmill = { path = "crates/taskmill", default-features = false }
+```
+
+## Implement an executor
+
+Each task type needs a `TaskExecutor` implementation. The executor receives a `TaskContext` containing:
+
+- `record` — the full `TaskRecord` with payload (up to 1 MiB), priority, retry count, etc.
+- `token` — a `CancellationToken` for preemption support
+- `progress` — a `ProgressReporter` for reporting progress back to the scheduler
+- Shared application state (if registered via `.app_state()` or `register_state()`)
+
+```rust
+use std::sync::Arc;
+use taskmill::{TaskExecutor, TaskContext, TaskResult, TaskError};
+
+struct ImageResizer;
+
+impl TaskExecutor for ImageResizer {
+    async fn execute<'a>(
+        &'a self,
+        ctx: &'a TaskContext,
+    ) -> Result<TaskResult, TaskError> {
+        // Deserialize your payload
+        let data: Option<serde_json::Value> = ctx.record.deserialize_payload()?;
+
+        // Check for preemption at yield points
+        if ctx.token.is_cancelled() {
+            return Err(TaskError {
+                message: "preempted".into(),
+                retryable: true,
+                actual_read_bytes: 0,
+                actual_write_bytes: 0,
+            });
+        }
+
+        // Report progress
+        ctx.progress.report(0.5, Some("resizing".into()));
+
+        // Do work...
+
+        Ok(TaskResult {
+            actual_read_bytes: 4096,
+            actual_write_bytes: 1024,
+        })
+    }
+}
+```
+
+## Build and run the scheduler
+
+```rust
+use std::sync::Arc;
+use std::time::Duration;
+use tokio_util::sync::CancellationToken;
+use taskmill::{Scheduler, Priority, TaskSubmission, ShutdownMode};
+
+#[tokio::main]
+async fn main() {
+    // Build the scheduler — opens the DB, registers executors, starts monitoring.
+    let scheduler = Scheduler::builder()
+        .store_path("tasks.db")
+        .executor("resize", Arc::new(ImageResizer))
+        .max_concurrency(8)
+        .shutdown_mode(ShutdownMode::Graceful(Duration::from_secs(10)))
+        .with_resource_monitoring()
+        .build()
+        .await
+        .unwrap();
+
+    // Scheduler is Clone — share freely across async tasks and Tauri state.
+    let sched = scheduler.clone();
+
+    // Subscribe to lifecycle events for logging or UI updates.
+    let mut events = scheduler.subscribe();
+    tokio::spawn(async move {
+        while let Ok(event) = events.recv().await {
+            println!("Event: {:?}", event);
+        }
+    });
+
+    // Submit a single task with a typed payload.
+    scheduler.submit(&TaskSubmission::with_payload(
+        "resize",
+        Priority::NORMAL,
+        &serde_json::json!({"path": "/photos/image.jpg", "width": 300}),
+        4096,  // expected read bytes
+        1024,  // expected write bytes
+    ).unwrap()).await.unwrap();
+
+    // Submit tasks in bulk (single SQLite transaction).
+    let paths = vec!["/a.jpg", "/b.jpg", "/c.jpg"];
+    let batch: Vec<_> = paths.iter().map(|p| {
+        TaskSubmission::with_payload(
+            "resize",
+            Priority::NORMAL,
+            &serde_json::json!({"path": p}),
+            4096, 1024,
+        ).unwrap()
+    }).collect();
+    let ids = scheduler.submit_batch(&batch).await.unwrap();
+    // ids[i] is Some(row_id) if inserted, None if deduplicated.
+
+    // Run the scheduler loop (blocks until the token is cancelled).
+    let token = CancellationToken::new();
+    scheduler.run(token).await;
+}
+```
+
+## Using typed tasks
+
+For stronger type safety, implement the `TypedTask` trait:
+
+```rust
+use serde::{Serialize, Deserialize};
+use taskmill::{TypedTask, Priority};
+
+#[derive(Serialize, Deserialize)]
+struct ResizeTask {
+    path: String,
+    width: u32,
+}
+
+impl TypedTask for ResizeTask {
+    const TASK_TYPE: &'static str = "resize";
+
+    fn expected_read_bytes(&self) -> i64 { 4096 }
+    fn expected_write_bytes(&self) -> i64 { 1024 }
+    fn priority(&self) -> Priority { Priority::NORMAL }
+}
+
+// Submit:
+scheduler.submit_typed(&ResizeTask {
+    path: "/photos/img.jpg".into(),
+    width: 300,
+}).await?;
+
+// In the executor:
+let task: Option<ResizeTask> = ctx.deserialize_typed()?;
+```
+
+## Manual wiring
+
+For full control over components, use `Scheduler::new()` directly:
+
+```rust
+use std::sync::Arc;
+use taskmill::{
+    CompositePressure, Scheduler, SchedulerConfig,
+    TaskStore, TaskTypeRegistry, ThrottlePolicy,
+};
+
+let store = TaskStore::open("tasks.db").await.unwrap();
+
+let mut registry = TaskTypeRegistry::new();
+registry.register("resize", Arc::new(ImageResizer));
+
+let pressure = CompositePressure::new();
+let policy = ThrottlePolicy::default_three_tier();
+
+let scheduler = Scheduler::new(
+    store,
+    SchedulerConfig::default(),
+    Arc::new(registry),
+    pressure,
+    policy,
+);
+```
+
+## Tauri integration
+
+Taskmill is designed for Tauri. A typical setup:
+
+```rust
+use tauri::Manager;
+use taskmill::{Scheduler, SchedulerSnapshot, StoreError};
+
+// Expose scheduler status to the frontend.
+#[tauri::command]
+async fn scheduler_status(
+    scheduler: tauri::State<'_, Scheduler>,
+) -> Result<SchedulerSnapshot, StoreError> {
+    scheduler.snapshot().await
+}
+
+// Bridge events to the frontend.
+fn setup_events(app: &tauri::App, scheduler: &Scheduler) {
+    let mut events = scheduler.subscribe();
+    let handle = app.handle().clone();
+    tokio::spawn(async move {
+        while let Ok(event) = events.recv().await {
+            handle.emit("taskmill-event", &event).unwrap();
+        }
+    });
+}
+```
diff --git a/migrations/001_tasks.sql b/migrations/001_tasks.sql
new file mode 100644
index 0000000..6e92f71
--- /dev/null
+++ b/migrations/001_tasks.sql
@@ -0,0 +1,64 @@
+-- Active queue: pending, running, and paused tasks.
+-- The UNIQUE(key) constraint provides key-based deduplication —
+-- submitting a task with an existing key is a no-op (INSERT OR IGNORE).
+-- When a duplicate is submitted while the existing task is running or paused,
+-- the requeue flag is set so the task re-runs after the current execution.
+CREATE TABLE IF NOT EXISTS tasks (
+    id                   INTEGER PRIMARY KEY,
+    task_type            TEXT    NOT NULL,
+    key                  TEXT    NOT NULL,
+    priority             INTEGER NOT NULL,
+    status               TEXT    NOT NULL DEFAULT 'pending',
+    payload              BLOB,
+    expected_read_bytes  INTEGER NOT NULL DEFAULT 0,
+    expected_write_bytes INTEGER NOT NULL DEFAULT 0,
+    retry_count          INTEGER NOT NULL DEFAULT 0,
+    last_error           TEXT,
+    created_at           TEXT    NOT NULL DEFAULT (datetime('now')),
+    started_at           TEXT,
+    requeue              INTEGER NOT NULL DEFAULT 0,
+    requeue_priority     INTEGER,
+    UNIQUE(key)
+);
+
+-- Index for the scheduler hot path: pop highest-priority pending task.
+CREATE INDEX IF NOT EXISTS idx_tasks_pending
+    ON tasks (status, priority ASC, id ASC)
+    WHERE status = 'pending';
+
+-- Completed and failed task history for queries and IO learning.
+CREATE TABLE IF NOT EXISTS task_history (
+    id                   INTEGER PRIMARY KEY,
+    task_type            TEXT    NOT NULL,
+    key                  TEXT    NOT NULL,
+    priority             INTEGER NOT NULL,
+    status               TEXT    NOT NULL,
+    payload              BLOB,
+    expected_read_bytes  INTEGER NOT NULL DEFAULT 0,
+    expected_write_bytes INTEGER NOT NULL DEFAULT 0,
+    actual_read_bytes    INTEGER,
+    actual_write_bytes   INTEGER,
+    retry_count          INTEGER NOT NULL DEFAULT 0,
+    last_error           TEXT,
+    created_at           TEXT    NOT NULL,
+    started_at           TEXT,
+    completed_at         TEXT    NOT NULL DEFAULT (datetime('now')),
+    duration_ms          INTEGER
+);
+
+-- Index for IO learning: recent completions by task type.
+CREATE INDEX IF NOT EXISTS idx_history_type_completed
+    ON task_history (task_type, completed_at DESC)
+    WHERE status = 'completed';
+
+-- Index for task lookup by key (used by task dedup and status checks).
+CREATE INDEX IF NOT EXISTS idx_history_key
+    ON task_history (key, completed_at DESC);
+
+-- Index for paginating and pruning history by completion time.
+CREATE INDEX IF NOT EXISTS idx_history_completed
+    ON task_history (completed_at DESC);
+
+-- Index for filtering history by status (e.g. listing failed tasks).
+CREATE INDEX IF NOT EXISTS idx_history_status
+    ON task_history (status, completed_at DESC);
diff --git a/release-plz.toml b/release-plz.toml
new file mode 100644
index 0000000..54b6e80
--- /dev/null
+++ b/release-plz.toml
@@ -0,0 +1,22 @@
+[workspace]
+# Only release when merging the release PR, not on every push
+release_always = false
+
+# Enable changelog generation from conventional commits
+changelog_update = true
+
+# Create GitHub releases with tag
+git_release_enable = true
+git_tag_enable = true
+
+# Auto-detect pre-release versions (e.g. 0.1.0-alpha.1) and mark GitHub releases accordingly
+git_release_type = "auto"
+
+# Check semver compatibility
+semver_check = true
+
+# Label release PRs for easy identification
+pr_labels = ["release"]
+
+# Create release PRs as drafts
+pr_draft = true
diff --git a/src/backpressure.rs b/src/backpressure.rs
new file mode 100644
index 0000000..77dd4a7
--- /dev/null
+++ b/src/backpressure.rs
@@ -0,0 +1,162 @@
+use crate::priority::Priority;
+
+/// A source of pressure that signals the scheduler to slow down.
+///
+/// Consumers implement this trait to feed external signals (API load, memory
+/// pressure, queue depth, etc.) into the scheduler's throttle decisions.
+pub trait PressureSource: Send + Sync + 'static {
+    /// Current pressure level between 0.0 (idle) and 1.0 (saturated).
+    fn pressure(&self) -> f32;
+
+    /// Human-readable name for diagnostics and tracing.
+    fn name(&self) -> &str;
+}
+
+/// Maps (priority, pressure) pairs to throttle decisions.
+///
+/// Contains a list of thresholds: a task at or below a given priority
+/// (higher numeric value = lower priority) is throttled when pressure
+/// exceeds the associated limit.
+///
+/// Thresholds are evaluated from lowest priority to highest. The first
+/// matching rule applies.
+pub struct ThrottlePolicy {
+    /// Sorted from lowest priority (highest numeric value) to highest.
+    /// Each entry: (priority_floor, pressure_limit).
+    thresholds: Vec<(Priority, f32)>,
+}
+
+impl ThrottlePolicy {
+    /// Create a policy with custom thresholds.
+    ///
+    /// Each `(priority, limit)` means: any task with priority value >= `priority`
+    /// (i.e. lower or equal priority) is throttled when pressure > `limit`.
+    ///
+    /// Thresholds should be ordered from lowest priority to highest.
+    pub fn new(thresholds: Vec<(Priority, f32)>) -> Self {
+        Self { thresholds }
+    }
+
+    /// Default three-tier policy matching Shoebox's original behavior:
+    /// - BACKGROUND (192+): pause at >50% pressure
+    /// - NORMAL (128+): pause at >75% pressure
+    /// - Everything else: never pause
+    pub fn default_three_tier() -> Self {
+        Self {
+            thresholds: vec![(Priority::BACKGROUND, 0.50), (Priority::NORMAL, 0.75)],
+        }
+    }
+
+    /// Should a task at this priority be throttled given current pressure?
+    pub fn should_throttle(&self, priority: Priority, pressure: f32) -> bool {
+        for &(threshold_priority, pressure_limit) in &self.thresholds {
+            // If the task's priority value is >= threshold (lower or equal priority)
+            if priority.value() >= threshold_priority.value() && pressure > pressure_limit {
+                return true;
+            }
+        }
+        false
+    }
+}
+
+/// Combines multiple pressure sources into a single aggregate signal.
+///
+/// The aggregate pressure is the maximum across all sources — the system
+/// is as pressured as its most constrained resource.
+pub struct CompositePressure {
+    sources: Vec<Box<dyn PressureSource + 'static>>,
+}
+
+impl CompositePressure {
+    pub fn new() -> Self {
+        Self {
+            sources: Vec::new(),
+        }
+    }
+
+    /// Add a pressure source.
+    pub fn add_source(&mut self, source: Box<dyn PressureSource + 'static>) {
+        self.sources.push(source);
+    }
+
+    /// Aggregate pressure: max across all sources.
+    pub fn pressure(&self) -> f32 {
+        self.sources
+            .iter()
+            .map(|s| s.pressure())
+            .fold(0.0f32, f32::max)
+    }
+
+    /// Per-source breakdown for diagnostics.
+    pub fn breakdown(&self) -> Vec<(&str, f32)> {
+        self.sources
+            .iter()
+            .map(|s| (s.name(), s.pressure()))
+            .collect()
+    }
+}
+
+impl Default for CompositePressure {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct FixedPressure {
+        value: f32,
+        name: &'static str,
+    }
+
+    impl PressureSource for FixedPressure {
+        fn pressure(&self) -> f32 {
+            self.value
+        }
+        fn name(&self) -> &str {
+            self.name
+        }
+    }
+
+    #[test]
+    fn default_policy_background_throttles() {
+        let policy = ThrottlePolicy::default_three_tier();
+
+        // Background at 60% pressure → throttled (>50%)
+        assert!(policy.should_throttle(Priority::BACKGROUND, 0.6));
+        // Background at 40% → not throttled
+        assert!(!policy.should_throttle(Priority::BACKGROUND, 0.4));
+
+        // Normal at 60% → not throttled (<75%)
+        assert!(!policy.should_throttle(Priority::NORMAL, 0.6));
+        // Normal at 80% → throttled
+        assert!(policy.should_throttle(Priority::NORMAL, 0.8));
+
+        // Realtime never throttled
+        assert!(!policy.should_throttle(Priority::REALTIME, 1.0));
+        assert!(!policy.should_throttle(Priority::HIGH, 0.6));
+    }
+
+    #[test]
+    fn composite_takes_max() {
+        let mut comp = CompositePressure::new();
+        comp.add_source(Box::new(FixedPressure {
+            value: 0.3,
+            name: "api",
+        }));
+        comp.add_source(Box::new(FixedPressure {
+            value: 0.7,
+            name: "disk",
+        }));
+
+        assert!((comp.pressure() - 0.7).abs() < f32::EPSILON);
+    }
+
+    #[test]
+    fn empty_composite_is_zero() {
+        let comp = CompositePressure::new();
+        assert_eq!(comp.pressure(), 0.0);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..ee3db34
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,49 @@
+//! # Taskmill
+//!
+//! Adaptive priority work scheduler with IO-aware concurrency and SQLite persistence.
+//!
+//! Taskmill provides a generic task scheduling system that:
+//! - Persists tasks to SQLite so the queue survives restarts
+//! - Schedules by priority (0 = highest, 255 = lowest) with named tiers
+//! - Deduplicates tasks by key — submitting an already-queued key is a no-op
+//! - Tracks expected and actual IO bytes per task for budget-based scheduling
+//! - Monitors system CPU and disk throughput to adjust concurrency
+//! - Supports composable backpressure from arbitrary external sources
+//! - Preempts lower-priority work when high-priority tasks arrive
+//! - Retries failed tasks at the same priority level
+//! - Records completed/failed task history for queries and IO learning
+//! - Emits lifecycle events including progress for UI integration (via broadcast channel)
+//! - Supports graceful shutdown with configurable drain timeout
+//!
+//! # Feature flags
+//!
+//! - **`sysinfo-monitor`** (default): Enables the built-in `SysinfoSampler` for
+//!   cross-platform CPU and disk IO monitoring. Disable for mobile targets or
+//!   when providing a custom `ResourceSampler`.
+
+pub mod backpressure;
+pub mod priority;
+pub mod registry;
+pub mod resource;
+pub mod scheduler;
+pub mod store;
+pub mod task;
+
+// Convenience re-exports.
+pub use backpressure::{CompositePressure, PressureSource, ThrottlePolicy};
+pub use priority::Priority;
+pub use registry::{StateMap, TaskContext, TaskExecutor};
+pub use resource::sampler::{SamplerConfig, SmoothedReader};
+pub use resource::{ResourceReader, ResourceSampler, ResourceSnapshot};
+pub use scheduler::{
+    EstimatedProgress, ProgressReporter, Scheduler, SchedulerBuilder, SchedulerConfig,
+    SchedulerEvent, SchedulerSnapshot, ShutdownMode,
+};
+pub use store::{RetentionPolicy, StoreConfig, StoreError, TaskStore};
+pub use task::{
+    generate_dedup_key, HistoryStatus, SubmitOutcome, TaskError, TaskHistoryRecord, TaskLookup,
+    TaskRecord, TaskResult, TaskStatus, TaskSubmission, TypeStats, TypedTask,
+};
+
+#[cfg(feature = "sysinfo-monitor")]
+pub use resource::platform_sampler;
diff --git a/src/priority.rs b/src/priority.rs
new file mode 100644
index 0000000..aa9767d
--- /dev/null
+++ b/src/priority.rs
@@ -0,0 +1,108 @@
+use std::cmp::Ordering;
+use std::fmt;
+
+use serde::{Deserialize, Serialize};
+
+/// Numeric priority level. Lower values = higher priority.
+///
+/// Provides named constants for common tiers while allowing any value 0–255
+/// for fine-grained control.
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct Priority(u8);
+
+impl Priority {
+    /// User-interactive work. Never throttled, triggers preemption.
+    pub const REALTIME: Self = Self(0);
+    /// Functionality-blocking tasks. Throttled only under extreme load.
+    pub const HIGH: Self = Self(64);
+    /// Normal background operations. Yields to interactive work.
+    pub const NORMAL: Self = Self(128);
+    /// Low priority. Pauses under significant load.
+    pub const BACKGROUND: Self = Self(192);
+    /// Idle-only work. Runs only when system is otherwise idle.
+    pub const IDLE: Self = Self(255);
+
+    /// Construct a priority from a raw value. 0 = highest, 255 = lowest.
+    pub const fn new(level: u8) -> Self {
+        Self(level)
+    }
+
+    /// Raw numeric value.
+    pub const fn value(self) -> u8 {
+        self.0
+    }
+}
+
+/// Ordering: lower numeric value = higher priority = compares as Greater.
+/// This makes `BinaryHeap` (max-heap) pop the highest-priority item first.
+impl Ord for Priority {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // Reverse: lower value is "greater" (higher priority).
+        other.0.cmp(&self.0)
+    }
+}
+
+impl PartialOrd for Priority {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl fmt::Debug for Priority {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let label = match self.0 {
+            0 => "REALTIME",
+            64 => "HIGH",
+            128 => "NORMAL",
+            192 => "BACKGROUND",
+            255 => "IDLE",
+            _ => return write!(f, "Priority({})", self.0),
+        };
+        write!(f, "Priority::{label}")
+    }
+}
+
+impl fmt::Display for Priority {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl From<u8> for Priority {
+    fn from(v: u8) -> Self {
+        Self(v)
+    }
+}
+
+impl From<Priority> for u8 {
+    fn from(p: Priority) -> Self {
+        p.0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn realtime_is_highest() {
+        assert!(Priority::REALTIME > Priority::HIGH);
+        assert!(Priority::HIGH > Priority::NORMAL);
+        assert!(Priority::NORMAL > Priority::BACKGROUND);
+        assert!(Priority::BACKGROUND > Priority::IDLE);
+    }
+
+    #[test]
+    fn custom_priorities_between_tiers() {
+        let p = Priority::new(96);
+        assert!(p < Priority::HIGH); // lower priority than HIGH
+        assert!(p > Priority::NORMAL); // higher priority than NORMAL
+    }
+
+    #[test]
+    fn debug_named_tiers() {
+        assert_eq!(format!("{:?}", Priority::REALTIME), "Priority::REALTIME");
+        assert_eq!(format!("{:?}", Priority::new(42)), "Priority(42)");
+    }
+}
diff --git a/src/registry.rs b/src/registry.rs
new file mode 100644
index 0000000..f903c80
--- /dev/null
+++ b/src/registry.rs
@@ -0,0 +1,276 @@
+use std::any::{Any, TypeId};
+use std::collections::HashMap;
+use std::future::Future;
+use std::sync::Arc;
+
+use tokio::sync::RwLock;
+use tokio_util::sync::CancellationToken;
+
+use crate::scheduler::ProgressReporter;
+use crate::task::{TaskError, TaskRecord, TaskResult, TypedTask};
+
+// ── State Map ────────────────────────────────────────────────────────
+
+/// Type-keyed map of shared application state.
+///
+/// Multiple state types can be registered (one value per concrete type).
+/// Executors retrieve them via [`TaskContext::state::<T>()`]. This is the
+/// same pattern used by Axum `Extensions` and Tauri `State`.
+///
+/// The map supports post-build insertion via [`Scheduler::register_state`]
+/// so that library consumers (e.g. shoebox inside a Tauri app) can inject
+/// state after the scheduler has been constructed by the parent.
+#[derive(Default)]
+pub struct StateMap {
+    inner: RwLock<HashMap<TypeId, Arc<dyn Any + Send + Sync>>>,
+}
+
+impl StateMap {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Build a `StateMap` from pre-collected entries.
+    pub(crate) fn from_entries(entries: Vec<(TypeId, Arc<dyn Any + Send + Sync>)>) -> Self {
+        Self {
+            inner: RwLock::new(entries.into_iter().collect()),
+        }
+    }
+
+    /// Insert a state value. Overwrites any previous value of the same type.
+    pub async fn insert<T: Send + Sync + 'static>(&self, value: Arc<T>) {
+        self.inner.write().await.insert(TypeId::of::<T>(), value);
+    }
+}
+
+/// Snapshot of state for passing into a [`TaskContext`].
+///
+/// Created by cloning the inner map under the lock once, then used
+/// lock-free for the lifetime of the task execution.
+#[derive(Clone, Default)]
+pub(crate) struct StateSnapshot {
+    entries: HashMap<TypeId, Arc<dyn Any + Send + Sync>>,
+}
+
+impl StateSnapshot {
+    pub fn get<T: Send + Sync + 'static>(&self) -> Option<&T> {
+        self.entries
+            .get(&TypeId::of::<T>())
+            .and_then(|arc| arc.downcast_ref::<T>())
+    }
+}
+
+impl StateMap {
+    /// Take a lock-free snapshot for use inside a task context.
+    pub(crate) async fn snapshot(&self) -> StateSnapshot {
+        StateSnapshot {
+            entries: self.inner.read().await.clone(),
+        }
+    }
+}
+
+// ── Task Context ─────────────────────────────────────────────────────
+
+/// Execution context passed to a [`TaskExecutor`].
+///
+/// Bundles the task record, cancellation token, progress reporter, and
+/// optional application state into a single value. This keeps the executor
+/// signature stable when new contextual data is added in the future.
+pub struct TaskContext {
+    /// The full task record including payload, priority, and IO estimates.
+    pub record: TaskRecord,
+    /// Cancelled when the task is preempted. Check `token.is_cancelled()`
+    /// at natural yield points and return early if set.
+    pub token: CancellationToken,
+    /// Report progress back to the scheduler (0.0–1.0).
+    pub progress: ProgressReporter,
+    /// Shared application state set via [`SchedulerBuilder::app_state`].
+    pub(crate) app_state: StateSnapshot,
+}
+
+impl TaskContext {
+    /// Deserialize the payload as a [`TypedTask`].
+    ///
+    /// Convenience wrapper around [`TaskRecord::deserialize_payload`] that
+    /// mirrors the typed submission API.
+    pub fn deserialize_typed<T: TypedTask>(&self) -> Result<Option<T>, serde_json::Error> {
+        self.record.deserialize_payload()
+    }
+
+    /// Retrieve shared application state registered via
+    /// [`SchedulerBuilder::app_state`] or [`Scheduler::register_state`].
+    ///
+    /// Returns `None` if the type was never registered. Multiple types can
+    /// coexist — each is keyed by its concrete `TypeId`.
+    ///
+    /// # Example
+    ///
+    /// ```ignore
+    /// struct MyServices { db: DatabasePool, http: reqwest::Client }
+    ///
+    /// // In the executor:
+    /// let svc = ctx.state::<MyServices>().expect("app state not set");
+    /// svc.db.query("...").await?;
+    /// ```
+    pub fn state<T: Send + Sync + 'static>(&self) -> Option<&T> {
+        self.app_state.get::<T>()
+    }
+}
+
+/// Executes tasks of a registered type.
+///
+/// Each executor is associated with a named task type (e.g. `"scan-l3"`, `"exif"`).
+/// When the scheduler pops a task, it looks up the executor by `task_type` and
+/// calls `execute` with a [`TaskContext`] containing the persisted record,
+/// a cancellation token, and a progress reporter.
+///
+/// Implementors deserialize the task's `payload` blob themselves — taskmill
+/// treats it as opaque bytes.
+///
+/// # Example
+///
+/// ```ignore
+/// use taskmill::{TaskExecutor, TaskContext, TaskResult, TaskError};
+///
+/// struct MyExecutor;
+///
+/// impl TaskExecutor for MyExecutor {
+///     async fn execute<'a>(
+///         &'a self,
+///         ctx: &'a TaskContext,
+///     ) -> Result<TaskResult, TaskError> {
+///         ctx.progress.report(0.5, Some("halfway".into()));
+///         Ok(TaskResult { actual_read_bytes: 0, actual_write_bytes: 0 })
+///     }
+/// }
+/// ```
+pub trait TaskExecutor: Send + Sync + 'static {
+    /// Execute a task.
+    ///
+    /// - `ctx`: Execution context with the task record, cancellation token,
+    ///   and progress reporter.
+    ///
+    /// On success, return actual IO bytes consumed. On failure, return a
+    /// `TaskError` indicating whether retry is appropriate.
+    fn execute<'a>(
+        &'a self,
+        ctx: &'a TaskContext,
+    ) -> impl Future<Output = Result<TaskResult, TaskError>> + Send + 'a;
+}
+
+/// Registry mapping task type names to their executors.
+///
+/// Built during application startup before the scheduler begins popping tasks.
+/// After construction, the registry is immutable (shared via `Arc`).
+pub struct TaskTypeRegistry {
+    types: HashMap<String, Arc<dyn ErasedExecutor>>,
+}
+
+/// Object-safe wrapper around [`TaskExecutor`] for dynamic dispatch in the registry.
+///
+/// This trait exists because RPITIT (`impl Future`) in `TaskExecutor` is not
+/// object-safe. The blanket impl below automatically wraps any `TaskExecutor`
+/// so callers never interact with `ErasedExecutor` directly.
+pub(crate) trait ErasedExecutor: Send + Sync + 'static {
+    fn execute_erased<'a>(
+        &'a self,
+        ctx: &'a TaskContext,
+    ) -> std::pin::Pin<Box<dyn Future<Output = Result<TaskResult, TaskError>> + Send + 'a>>;
+}
+
+impl<T: TaskExecutor> ErasedExecutor for T {
+    fn execute_erased<'a>(
+        &'a self,
+        ctx: &'a TaskContext,
+    ) -> std::pin::Pin<Box<dyn Future<Output = Result<TaskResult, TaskError>> + Send + 'a>> {
+        Box::pin(self.execute(ctx))
+    }
+}
+
+impl TaskTypeRegistry {
+    pub fn new() -> Self {
+        Self {
+            types: HashMap::new(),
+        }
+    }
+
+    /// Register an executor for a named task type.
+    ///
+    /// Panics if the name is already registered (catch configuration errors
+    /// at startup, not at runtime).
+    pub fn register<E: TaskExecutor>(&mut self, name: &str, executor: Arc<E>) {
+        if self.types.contains_key(name) {
+            panic!("task type '{name}' already registered");
+        }
+        self.types
+            .insert(name.to_string(), executor as Arc<dyn ErasedExecutor>);
+    }
+
+    /// Look up the executor for a task type.
+    pub(crate) fn get(&self, name: &str) -> Option<&Arc<dyn ErasedExecutor>> {
+        self.types.get(name)
+    }
+
+    /// All registered type names.
+    pub fn type_names(&self) -> Vec<&str> {
+        self.types.keys().map(|s| s.as_str()).collect()
+    }
+
+    /// Number of registered types.
+    pub fn len(&self) -> usize {
+        self.types.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.types.is_empty()
+    }
+
+    /// Register a pre-erased executor. Used by the builder which already holds
+    /// `Arc<dyn ErasedExecutor>`.
+    pub(crate) fn register_erased(&mut self, name: &str, executor: Arc<dyn ErasedExecutor>) {
+        if self.types.contains_key(name) {
+            panic!("task type '{name}' already registered");
+        }
+        self.types.insert(name.to_string(), executor);
+    }
+}
+
+impl Default for TaskTypeRegistry {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct NoopExecutor;
+
+    impl TaskExecutor for NoopExecutor {
+        async fn execute<'a>(&'a self, _ctx: &'a TaskContext) -> Result<TaskResult, TaskError> {
+            Ok(TaskResult {
+                actual_read_bytes: 0,
+                actual_write_bytes: 0,
+            })
+        }
+    }
+
+    #[test]
+    fn register_and_lookup() {
+        let mut reg = TaskTypeRegistry::new();
+        reg.register("test-type", Arc::new(NoopExecutor));
+
+        assert!(reg.get("test-type").is_some());
+        assert!(reg.get("unknown").is_none());
+        assert_eq!(reg.len(), 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "already registered")]
+    fn duplicate_registration_panics() {
+        let mut reg = TaskTypeRegistry::new();
+        reg.register("dup", Arc::new(NoopExecutor));
+        reg.register("dup", Arc::new(NoopExecutor));
+    }
+}
diff --git a/src/resource/mod.rs b/src/resource/mod.rs
new file mode 100644
index 0000000..5b7aae4
--- /dev/null
+++ b/src/resource/mod.rs
@@ -0,0 +1,63 @@
+pub mod sampler;
+
+#[cfg(feature = "sysinfo-monitor")]
+pub mod sysinfo_monitor;
+
+use serde::{Deserialize, Serialize};
+
+/// Point-in-time snapshot of system resource utilization.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResourceSnapshot {
+    /// CPU utilization 0.0 to 1.0 (EWMA-smoothed).
+    pub cpu_usage: f64,
+    /// Disk read throughput in bytes/sec (EWMA-smoothed).
+    pub io_read_bytes_per_sec: f64,
+    /// Disk write throughput in bytes/sec (EWMA-smoothed).
+    pub io_write_bytes_per_sec: f64,
+}
+
+impl Default for ResourceSnapshot {
+    fn default() -> Self {
+        Self {
+            cpu_usage: 0.0,
+            io_read_bytes_per_sec: 0.0,
+            io_write_bytes_per_sec: 0.0,
+        }
+    }
+}
+
+/// Trait for sampling raw system resources.
+///
+/// Implementations read platform-specific counters and return raw deltas.
+/// The sampler loop handles EWMA smoothing separately.
+///
+/// To override the built-in monitor (e.g. for container cgroup-aware monitoring),
+/// implement this trait and pass it to the scheduler.
+pub trait ResourceSampler: Send + Sync + 'static {
+    /// Take a raw sample. Called periodically by the sampler loop.
+    /// Returns a snapshot with absolute values (not smoothed — the sampler
+    /// applies EWMA).
+    fn sample(&mut self) -> ResourceSnapshot;
+}
+
+/// Read-only access to the latest smoothed resource snapshot.
+///
+/// This is the interface consumed by the scheduler for IO budget decisions.
+/// The sampler loop updates it; the scheduler reads it. Separating this from
+/// [`ResourceSampler`] keeps the public API clean — consumers only see the
+/// latest reading, never the sampling mechanics.
+pub trait ResourceReader: Send + Sync + 'static {
+    /// The most recent smoothed snapshot.
+    fn latest(&self) -> ResourceSnapshot;
+}
+
+/// Create the platform-appropriate sampler.
+///
+/// Uses `sysinfo` for cross-platform CPU and disk IO monitoring on
+/// Linux, macOS, and Windows.
+///
+/// Only available with the `sysinfo-monitor` feature (enabled by default).
+#[cfg(feature = "sysinfo-monitor")]
+pub fn platform_sampler() -> Box<dyn ResourceSampler> {
+    Box::new(sysinfo_monitor::SysinfoSampler::new())
+}
diff --git a/src/resource/sampler.rs b/src/resource/sampler.rs
new file mode 100644
index 0000000..160e686
--- /dev/null
+++ b/src/resource/sampler.rs
@@ -0,0 +1,153 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use tokio::sync::RwLock;
+use tokio_util::sync::CancellationToken;
+
+use super::{ResourceReader, ResourceSampler, ResourceSnapshot};
+
+/// Configuration for the background resource sampling loop.
+pub struct SamplerConfig {
+    /// How often to sample system resources. Default: 1 second.
+    pub interval: Duration,
+    /// EWMA smoothing factor (alpha). Default: 0.3.
+    /// Higher = more responsive to changes, lower = smoother.
+    pub ewma_alpha: f64,
+}
+
+impl Default for SamplerConfig {
+    fn default() -> Self {
+        Self {
+            interval: Duration::from_secs(1),
+            ewma_alpha: 0.3,
+        }
+    }
+}
+
+/// Apply EWMA smoothing: new_value = alpha * raw + (1 - alpha) * old.
+fn ewma(old: f64, raw: f64, alpha: f64) -> f64 {
+    if old == 0.0 {
+        raw // First sample — no history to blend with.
+    } else {
+        alpha * raw + (1.0 - alpha) * old
+    }
+}
+
+/// Shared, lock-protected store for the latest smoothed snapshot.
+///
+/// The sampler loop writes to this; the scheduler reads from it.
+/// Uses `RwLock` so readers never block each other.
+#[derive(Clone)]
+pub struct SmoothedReader {
+    inner: Arc<RwLock<ResourceSnapshot>>,
+}
+
+impl SmoothedReader {
+    pub fn new() -> Self {
+        Self {
+            inner: Arc::new(RwLock::new(ResourceSnapshot::default())),
+        }
+    }
+
+    async fn update(&self, snapshot: ResourceSnapshot) {
+        *self.inner.write().await = snapshot;
+    }
+}
+
+impl Default for SmoothedReader {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ResourceReader for SmoothedReader {
+    fn latest(&self) -> ResourceSnapshot {
+        // Use try_read to avoid async in a sync trait method.
+        // If the lock is held by the writer, return the default (zero) snapshot
+        // which makes the scheduler skip IO budgeting for that cycle.
+        self.inner
+            .try_read()
+            .map(|guard| guard.clone())
+            .unwrap_or_default()
+    }
+}
+
+/// Run the resource sampling loop in the background.
+///
+/// Periodically calls `sampler.sample()`, applies EWMA smoothing, and
+/// stores the result in the `reader`. The scheduler reads
+/// `reader.latest()` when making IO budget decisions.
+pub async fn run_sampler(
+    sampler: Arc<tokio::sync::Mutex<Box<dyn ResourceSampler>>>,
+    reader: SmoothedReader,
+    config: SamplerConfig,
+    token: CancellationToken,
+) {
+    tracing::debug!(
+        interval_ms = config.interval.as_millis() as u64,
+        alpha = config.ewma_alpha,
+        "resource sampler started"
+    );
+
+    let mut smoothed = ResourceSnapshot::default();
+
+    loop {
+        tokio::select! {
+            _ = token.cancelled() => {
+                tracing::debug!("resource sampler shutting down");
+                break;
+            }
+            _ = tokio::time::sleep(config.interval) => {
+                let raw = sampler.lock().await.sample();
+
+                smoothed.cpu_usage = ewma(smoothed.cpu_usage, raw.cpu_usage, config.ewma_alpha);
+                smoothed.io_read_bytes_per_sec = ewma(
+                    smoothed.io_read_bytes_per_sec,
+                    raw.io_read_bytes_per_sec,
+                    config.ewma_alpha,
+                );
+                smoothed.io_write_bytes_per_sec = ewma(
+                    smoothed.io_write_bytes_per_sec,
+                    raw.io_write_bytes_per_sec,
+                    config.ewma_alpha,
+                );
+
+                reader.update(smoothed.clone()).await;
+
+                tracing::trace!(
+                    cpu = format!("{:.1}%", smoothed.cpu_usage * 100.0),
+                    read_mbps = format!("{:.1}", smoothed.io_read_bytes_per_sec / 1_048_576.0),
+                    write_mbps = format!("{:.1}", smoothed.io_write_bytes_per_sec / 1_048_576.0),
+                    "resource sample"
+                );
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn ewma_first_sample_is_raw() {
+        assert_eq!(ewma(0.0, 42.0, 0.3), 42.0);
+    }
+
+    #[test]
+    fn ewma_blends_with_history() {
+        let result = ewma(100.0, 0.0, 0.3);
+        // 0.3 * 0 + 0.7 * 100 = 70
+        assert!((result - 70.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn ewma_converges() {
+        let mut v = 0.0;
+        for _ in 0..50 {
+            v = ewma(v, 100.0, 0.3);
+        }
+        // Should converge to ~100
+        assert!((v - 100.0).abs() < 1.0);
+    }
+}
diff --git a/src/resource/sysinfo_monitor.rs b/src/resource/sysinfo_monitor.rs
new file mode 100644
index 0000000..a2e7592
--- /dev/null
+++ b/src/resource/sysinfo_monitor.rs
@@ -0,0 +1,94 @@
+use std::time::Instant;
+
+use sysinfo::{Disks, System};
+
+use crate::resource::{ResourceSampler, ResourceSnapshot};
+
+/// Cross-platform resource sampler using the `sysinfo` crate.
+///
+/// Works on Linux, macOS, and Windows. Tracks CPU utilization and
+/// aggregate disk IO throughput across all mounted disks.
+pub struct SysinfoSampler {
+    sys: System,
+    disks: Disks,
+    prev_read_bytes: u64,
+    prev_write_bytes: u64,
+    prev_sample: Option<Instant>,
+}
+
+impl SysinfoSampler {
+    pub fn new() -> Self {
+        let mut sys = System::new();
+        sys.refresh_cpu_usage();
+
+        let disks = Disks::new_with_refreshed_list();
+
+        // Take initial disk totals so first delta is meaningful.
+        let (read, write) = disk_totals(&disks);
+
+        Self {
+            sys,
+            disks,
+            prev_read_bytes: read,
+            prev_write_bytes: write,
+            prev_sample: Some(Instant::now()),
+        }
+    }
+}
+
+impl Default for SysinfoSampler {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ResourceSampler for SysinfoSampler {
+    fn sample(&mut self) -> ResourceSnapshot {
+        // CPU: sysinfo needs two refresh calls to compute usage delta.
+        self.sys.refresh_cpu_usage();
+        let cpu_usage = self.sys.global_cpu_usage() as f64 / 100.0;
+
+        // Disk IO: compute bytes/sec since last sample.
+        self.disks.refresh(true);
+        let (read_bytes, write_bytes) = disk_totals(&self.disks);
+        let now = Instant::now();
+
+        let (read_bps, write_bps) = if let Some(prev_ts) = self.prev_sample {
+            let elapsed = now.duration_since(prev_ts).as_secs_f64();
+            if elapsed > 0.0 {
+                let read_delta = read_bytes.saturating_sub(self.prev_read_bytes);
+                let write_delta = write_bytes.saturating_sub(self.prev_write_bytes);
+                (read_delta as f64 / elapsed, write_delta as f64 / elapsed)
+            } else {
+                (0.0, 0.0)
+            }
+        } else {
+            (0.0, 0.0)
+        };
+
+        self.prev_read_bytes = read_bytes;
+        self.prev_write_bytes = write_bytes;
+        self.prev_sample = Some(now);
+
+        ResourceSnapshot {
+            cpu_usage,
+            io_read_bytes_per_sec: read_bps,
+            io_write_bytes_per_sec: write_bps,
+        }
+    }
+}
+
+/// Sum read/write bytes across all disks.
+fn disk_totals(disks: &Disks) -> (u64, u64) {
+    let mut total_read = 0u64;
+    let mut total_write = 0u64;
+    for disk in disks.list() {
+        // sysinfo::Disk exposes usage(); total/available space but not IO counters
+        // directly. We use the disk-level process IO as a proxy.
+        // Note: sysinfo 0.33+ tracks disk IO via the Disks API on supported platforms.
+        let usage = disk.usage();
+        total_read += usage.read_bytes;
+        total_write += usage.written_bytes;
+    }
+    (total_read, total_write)
+}
diff --git a/src/scheduler/dispatch.rs b/src/scheduler/dispatch.rs
new file mode 100644
index 0000000..e55b173
--- /dev/null
+++ b/src/scheduler/dispatch.rs
@@ -0,0 +1,310 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use tokio::sync::Mutex;
+use tokio_util::sync::CancellationToken;
+
+use crate::priority::Priority;
+use crate::registry::TaskContext;
+use crate::store::TaskStore;
+use crate::task::TaskRecord;
+
+use super::progress::ProgressReporter;
+use super::SchedulerEvent;
+
+// ── Active Task ────────────────────────────────────────────────────
+
+/// Handle to a running task for preemption and progress tracking.
+pub(crate) struct ActiveTask {
+    pub record: TaskRecord,
+    pub token: CancellationToken,
+    /// Last reported progress from the executor (0.0 to 1.0).
+    pub reported_progress: Option<f32>,
+    /// When the last progress report was received.
+    pub reported_at: Option<chrono::DateTime<chrono::Utc>>,
+}
+
+// ── Active Task Map ────────────────────────────────────────────────
+
+/// Thread-safe map of currently running tasks.
+///
+/// Wraps the active-task bookkeeping that was previously inlined in
+/// `Scheduler`, making preemption and progress queries independently
+/// testable.
+#[derive(Clone)]
+pub(crate) struct ActiveTaskMap {
+    inner: Arc<Mutex<HashMap<i64, ActiveTask>>>,
+}
+
+impl ActiveTaskMap {
+    pub fn new() -> Self {
+        Self {
+            inner: Arc::new(Mutex::new(HashMap::new())),
+        }
+    }
+
+    pub async fn count(&self) -> usize {
+        self.inner.lock().await.len()
+    }
+
+    pub async fn insert(&self, id: i64, task: ActiveTask) {
+        self.inner.lock().await.insert(id, task);
+    }
+
+    pub async fn remove(&self, id: i64) -> Option<ActiveTask> {
+        self.inner.lock().await.remove(&id)
+    }
+
+    /// Snapshot of all active task records.
+    pub async fn records(&self) -> Vec<TaskRecord> {
+        self.inner
+            .lock()
+            .await
+            .values()
+            .map(|at| at.record.clone())
+            .collect()
+    }
+
+    /// Snapshot of progress data for all active tasks.
+    pub async fn progress_snapshots(
+        &self,
+    ) -> Vec<(
+        TaskRecord,
+        Option<f32>,
+        Option<chrono::DateTime<chrono::Utc>>,
+    )> {
+        self.inner
+            .lock()
+            .await
+            .values()
+            .map(|at| (at.record.clone(), at.reported_progress, at.reported_at))
+            .collect()
+    }
+
+    /// Update reported progress for a specific task.
+    pub async fn update_progress(&self, task_id: i64, percent: f32) {
+        let mut map = self.inner.lock().await;
+        if let Some(at) = map.get_mut(&task_id) {
+            at.reported_progress = Some(percent);
+            at.reported_at = Some(chrono::Utc::now());
+        }
+    }
+
+    /// Preempt active tasks with priority lower than the incoming priority.
+    ///
+    /// Cancels their tokens, pauses them in the store, and emits
+    /// `SchedulerEvent::Preempted`. Returns the IDs of preempted tasks.
+    pub async fn preempt_below(
+        &self,
+        incoming_priority: Priority,
+        store: &TaskStore,
+        event_tx: &tokio::sync::broadcast::Sender<SchedulerEvent>,
+    ) -> Vec<i64> {
+        let mut active = self.inner.lock().await;
+        let to_preempt: Vec<i64> = active
+            .iter()
+            .filter(|(_, at)| at.record.priority.value() > incoming_priority.value())
+            .map(|(id, _)| *id)
+            .collect();
+
+        let mut preempted = Vec::new();
+        for id in to_preempt {
+            if let Some(at) = active.remove(&id) {
+                tracing::info!(
+                    task_id = id,
+                    task_type = at.record.task_type,
+                    "preempting task for higher-priority work"
+                );
+                at.token.cancel();
+                let _ = store.pause(id).await;
+                let _ = event_tx.send(SchedulerEvent::Preempted {
+                    task_id: id,
+                    task_type: at.record.task_type.clone(),
+                    key: at.record.key.clone(),
+                });
+                preempted.push(id);
+            }
+        }
+
+        preempted
+    }
+
+    /// Check whether any active task would preempt work at the given priority.
+    pub async fn has_preemptors_for(
+        &self,
+        priority: Priority,
+        preempt_threshold: Priority,
+    ) -> bool {
+        let active = self.inner.lock().await;
+        active.values().any(|at| {
+            at.record.priority.value() <= preempt_threshold.value()
+                && at.record.priority.value() < priority.value()
+        })
+    }
+
+    /// Cancel all active tasks (for shutdown).
+    pub async fn cancel_all(&self) {
+        let mut active = self.inner.lock().await;
+        for (_, at) in active.drain() {
+            at.token.cancel();
+        }
+    }
+
+    /// Pause all active tasks: cancel their tokens and move them to paused
+    /// state in the store. Returns the number of tasks paused.
+    pub async fn pause_all(
+        &self,
+        store: &TaskStore,
+        event_tx: &tokio::sync::broadcast::Sender<SchedulerEvent>,
+    ) -> usize {
+        let mut active = self.inner.lock().await;
+        let count = active.len();
+        for (id, at) in active.drain() {
+            at.token.cancel();
+            let _ = store.pause(id).await;
+            let _ = event_tx.send(SchedulerEvent::Preempted {
+                task_id: id,
+                task_type: at.record.task_type.clone(),
+                key: at.record.key.clone(),
+            });
+        }
+        count
+    }
+}
+
+// ── Spawn ──────────────────────────────────────────────────────────
+
+/// Spawn a task executor and wire up completion/failure handling.
+///
+/// Inserts the task into the active map, starts a progress listener,
+/// and spawns the executor.
+pub(crate) async fn spawn_task(
+    task: TaskRecord,
+    executor: Arc<dyn crate::registry::ErasedExecutor>,
+    store: TaskStore,
+    active: ActiveTaskMap,
+    event_tx: tokio::sync::broadcast::Sender<SchedulerEvent>,
+    max_retries: i32,
+    app_state: crate::registry::StateSnapshot,
+) {
+    let child_token = CancellationToken::new();
+
+    // Insert into active map before spawning to avoid races.
+    active
+        .insert(
+            task.id,
+            ActiveTask {
+                record: task.clone(),
+                token: child_token.clone(),
+                reported_progress: None,
+                reported_at: None,
+            },
+        )
+        .await;
+
+    // Build execution context.
+    let ctx = TaskContext {
+        record: task.clone(),
+        token: child_token.clone(),
+        progress: ProgressReporter::new(
+            task.id,
+            task.task_type.clone(),
+            task.key.clone(),
+            event_tx.clone(),
+        ),
+        app_state,
+    };
+
+    // Emit dispatched event.
+    let _ = event_tx.send(SchedulerEvent::Dispatched {
+        task_id: task.id,
+        task_type: task.task_type.clone(),
+        key: task.key.clone(),
+    });
+
+    // Spawn progress listener — bridges broadcast events into the active map.
+    let active_for_progress = active.clone();
+    let mut progress_rx = event_tx.subscribe();
+    let progress_task_id = task.id;
+    tokio::spawn(async move {
+        while let Ok(evt) = progress_rx.recv().await {
+            if let SchedulerEvent::Progress {
+                task_id, percent, ..
+            } = evt
+            {
+                if task_id == progress_task_id {
+                    active_for_progress.update_progress(task_id, percent).await;
+                    if percent >= 1.0 {
+                        break;
+                    }
+                }
+            }
+        }
+    });
+
+    // Spawn executor.
+    let token_for_spawn = child_token.clone();
+    tokio::spawn(async move {
+        let task_id = task.id;
+        let result = executor.execute_erased(&ctx).await;
+
+        // Drop the context (and its progress reporter) — executor is done.
+        drop(ctx);
+
+        match result {
+            Ok(tr) => {
+                if let Err(e) = store.complete(task_id, &tr).await {
+                    tracing::error!(task_id, error = %e, "failed to record task completion");
+                }
+                // Remove from active tracking AFTER the store write completes.
+                // This keeps the concurrency slot occupied, preventing the
+                // scheduler from dispatching new tasks that would create
+                // concurrent SQLite write transactions (which cause SQLITE_BUSY).
+                active.remove(task_id).await;
+                let _ = event_tx.send(SchedulerEvent::Completed {
+                    task_id,
+                    task_type: task.task_type.clone(),
+                    key: task.key.clone(),
+                });
+            }
+            Err(te) => {
+                // If cancelled (preempted), the scheduler already paused it.
+                if token_for_spawn.is_cancelled() {
+                    active.remove(task_id).await;
+                    return;
+                }
+                let will_retry = te.retryable && task.retry_count < max_retries;
+                tracing::warn!(
+                    task_id,
+                    task_type = task.task_type,
+                    error = %te.message,
+                    retryable = te.retryable,
+                    will_retry,
+                    "task failed"
+                );
+                if let Err(e) = store
+                    .fail(
+                        task_id,
+                        &te.message,
+                        te.retryable,
+                        max_retries,
+                        te.actual_read_bytes,
+                        te.actual_write_bytes,
+                    )
+                    .await
+                {
+                    tracing::error!(task_id, error = %e, "failed to record task failure");
+                }
+                // Remove from active tracking AFTER the store write completes.
+                active.remove(task_id).await;
+                let _ = event_tx.send(SchedulerEvent::Failed {
+                    task_id,
+                    task_type: task.task_type.clone(),
+                    key: task.key.clone(),
+                    error: te.message,
+                    will_retry,
+                });
+            }
+        }
+    });
+}
diff --git a/src/scheduler/gate.rs b/src/scheduler/gate.rs
new file mode 100644
index 0000000..af04120
--- /dev/null
+++ b/src/scheduler/gate.rs
@@ -0,0 +1,181 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use crate::backpressure::{CompositePressure, ThrottlePolicy};
+use crate::resource::ResourceReader;
+use crate::store::{StoreError, TaskStore};
+use crate::task::TaskRecord;
+
+/// Boxed future returned by [`DispatchGate`] methods.
+type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
+
+// ── Gate Context ───────────────────────────────────────────────────
+
+/// Context provided to a [`DispatchGate`] for admission decisions.
+///
+/// Built by the scheduler each dispatch cycle so gate implementations
+/// can query store state and resource snapshots without owning them.
+pub struct GateContext<'a> {
+    /// The task store — available for queries like running IO totals.
+    pub store: &'a TaskStore,
+    /// The current resource reader, if monitoring is enabled.
+    pub resource_reader: Option<&'a Arc<dyn ResourceReader>>,
+}
+
+// ── Dispatch Gate ──────────────────────────────────────────────────
+
+/// Decides whether a popped task should be dispatched or requeued.
+///
+/// The scheduler calls [`admit`](DispatchGate::admit) after popping a
+/// task from the store but before spawning the executor. Returning
+/// `Ok(false)` causes the task to be requeued for a later cycle.
+///
+/// The default [`DefaultDispatchGate`] applies backpressure throttling
+/// and IO-budget checks. Custom implementations can add per-type rate
+/// limiting, cost-model gating, feature flags, etc.
+///
+/// # Example
+///
+/// ```ignore
+/// use taskmill::scheduler::gate::{DispatchGate, GateContext};
+/// use taskmill::store::StoreError;
+/// use taskmill::task::TaskRecord;
+///
+/// struct AlwaysAdmit;
+///
+/// impl DispatchGate for AlwaysAdmit {
+///     fn admit<'a>(
+///         &'a self,
+///         _task: &'a TaskRecord,
+///         _ctx: &'a GateContext<'a>,
+///     ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<bool, StoreError>> + Send + 'a>> {
+///         Box::pin(async { Ok(true) })
+///     }
+/// }
+/// ```
+pub trait DispatchGate: Send + Sync + 'static {
+    /// Check whether `task` should be dispatched given the current context.
+    ///
+    /// Return `Ok(true)` to dispatch, `Ok(false)` to requeue.
+    fn admit<'a>(
+        &'a self,
+        task: &'a TaskRecord,
+        ctx: &'a GateContext<'a>,
+    ) -> BoxFuture<'a, Result<bool, StoreError>>;
+
+    /// Current aggregate pressure (0.0–1.0). Returns 0.0 by default.
+    fn pressure<'a>(&'a self) -> BoxFuture<'a, f32> {
+        Box::pin(async { 0.0 })
+    }
+
+    /// Per-source pressure breakdown for diagnostics. Empty by default.
+    fn pressure_breakdown<'a>(&'a self) -> BoxFuture<'a, Vec<(String, f32)>> {
+        Box::pin(async { Vec::new() })
+    }
+}
+
+// ── Default Gate ───────────────────────────────────────────────────
+
+/// Default gate: backpressure throttling + IO budget.
+///
+/// This is what the scheduler uses unless you provide a custom gate via
+/// [`SchedulerBuilder::dispatch_gate`](super::SchedulerBuilder::dispatch_gate).
+pub struct DefaultDispatchGate {
+    pub(crate) pressure: tokio::sync::Mutex<CompositePressure>,
+    pub(crate) policy: ThrottlePolicy,
+}
+
+impl DefaultDispatchGate {
+    pub fn new(pressure: CompositePressure, policy: ThrottlePolicy) -> Self {
+        Self {
+            pressure: tokio::sync::Mutex::new(pressure),
+            policy,
+        }
+    }
+}
+
+impl DispatchGate for DefaultDispatchGate {
+    fn admit<'a>(
+        &'a self,
+        task: &'a TaskRecord,
+        ctx: &'a GateContext<'a>,
+    ) -> BoxFuture<'a, Result<bool, StoreError>> {
+        Box::pin(async move {
+            // Backpressure check.
+            let current_pressure = self.pressure.lock().await.pressure();
+            if self.policy.should_throttle(task.priority, current_pressure) {
+                tracing::trace!(
+                    priority = task.priority.value(),
+                    pressure = current_pressure,
+                    "task throttled by backpressure — requeuing"
+                );
+                return Ok(false);
+            }
+
+            // IO budget check.
+            if !has_io_headroom(task, ctx).await? {
+                tracing::trace!(
+                    task_type = task.task_type,
+                    expected_read = task.expected_read_bytes,
+                    expected_write = task.expected_write_bytes,
+                    "task deferred — IO budget exhausted — requeuing"
+                );
+                return Ok(false);
+            }
+
+            Ok(true)
+        })
+    }
+
+    fn pressure<'a>(&'a self) -> BoxFuture<'a, f32> {
+        Box::pin(async { self.pressure.lock().await.pressure() })
+    }
+
+    fn pressure_breakdown<'a>(&'a self) -> BoxFuture<'a, Vec<(String, f32)>> {
+        Box::pin(async {
+            self.pressure
+                .lock()
+                .await
+                .breakdown()
+                .into_iter()
+                .map(|(name, val)| (name.to_owned(), val))
+                .collect()
+        })
+    }
+}
+
+// ── IO Budget ──────────────────────────────────────────────────────
+
+/// Check if there is IO headroom for a task given current running IO
+/// and system capacity.
+///
+/// This is a utility function that custom [`DispatchGate`] implementations
+/// can reuse if they want IO-budget awareness alongside their own logic.
+pub async fn has_io_headroom(task: &TaskRecord, ctx: &GateContext<'_>) -> Result<bool, StoreError> {
+    let Some(reader) = ctx.resource_reader else {
+        // No monitor configured — always allow.
+        return Ok(true);
+    };
+
+    let snapshot = reader.latest();
+    // If we have no IO data yet, allow the task.
+    if snapshot.io_read_bytes_per_sec == 0.0 && snapshot.io_write_bytes_per_sec == 0.0 {
+        return Ok(true);
+    }
+
+    let (running_read, running_write) = ctx.store.running_io_totals().await?;
+
+    // Simple heuristic: if running tasks' expected IO already exceeds
+    // 80% of observed system throughput (per second × 2s budget window),
+    // defer new work.
+    let read_capacity = snapshot.io_read_bytes_per_sec * 2.0;
+    let write_capacity = snapshot.io_write_bytes_per_sec * 2.0;
+
+    let read_ok = read_capacity == 0.0
+        || (running_read + task.expected_read_bytes) as f64 <= read_capacity * 0.8;
+    let write_ok = write_capacity == 0.0
+        || (running_write + task.expected_write_bytes) as f64 <= write_capacity * 0.8;
+
+    Ok(read_ok && write_ok)
+}
diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs
new file mode 100644
index 0000000..84ce287
--- /dev/null
+++ b/src/scheduler/mod.rs
@@ -0,0 +1,1526 @@
+pub(crate) mod dispatch;
+pub(crate) mod gate;
+pub mod progress;
+
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering};
+use std::sync::Arc;
+
+use serde::{Deserialize, Serialize};
+use tokio::sync::{Mutex, Notify};
+use tokio::time::Duration;
+use tokio_util::sync::CancellationToken;
+
+use crate::backpressure::{CompositePressure, ThrottlePolicy};
+use crate::priority::Priority;
+use crate::registry::{TaskExecutor, TaskTypeRegistry};
+use crate::resource::sampler::{SamplerConfig, SmoothedReader};
+use crate::resource::{ResourceReader, ResourceSampler};
+use crate::store::{StoreConfig, StoreError, TaskStore};
+use crate::task::{generate_dedup_key, SubmitOutcome, TaskLookup, TaskSubmission, TypedTask};
+
+use dispatch::ActiveTaskMap;
+use gate::{DefaultDispatchGate, GateContext};
+
+pub use progress::{EstimatedProgress, ProgressReporter};
+
+// ── Snapshot ────────────────────────────────────────────────────────
+
+/// Single-call status snapshot for dashboard UIs.
+///
+/// Captures queue depths, running tasks, progress, and backpressure in
+/// one serializable struct — ideal for returning from a Tauri command.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SchedulerSnapshot {
+    /// Tasks currently executing.
+    pub running: Vec<crate::task::TaskRecord>,
+    /// Number of tasks waiting to be dispatched.
+    pub pending_count: i64,
+    /// Number of tasks paused (preempted).
+    pub paused_count: i64,
+    /// Progress estimates for every running task.
+    pub progress: Vec<EstimatedProgress>,
+    /// Aggregate backpressure (0.0–1.0).
+    pub pressure: f32,
+    /// Per-source pressure breakdown for diagnostics.
+    pub pressure_breakdown: Vec<(String, f32)>,
+    /// Current maximum concurrency setting.
+    pub max_concurrency: usize,
+    /// Whether the scheduler is globally paused.
+    pub is_paused: bool,
+}
+
+// ── Events ──────────────────────────────────────────────────────────
+
+/// Events emitted by the scheduler for UI integration and observability.
+///
+/// Subscribe via the `tokio::sync::broadcast::Receiver` returned by
+/// [`Scheduler::subscribe`] or passed through the builder.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type", content = "data")]
+pub enum SchedulerEvent {
+    /// A task was dispatched and is now running.
+    Dispatched {
+        task_id: i64,
+        task_type: String,
+        key: String,
+    },
+    /// A task completed successfully.
+    Completed {
+        task_id: i64,
+        task_type: String,
+        key: String,
+    },
+    /// A task failed (may be retried or permanently failed).
+    Failed {
+        task_id: i64,
+        task_type: String,
+        key: String,
+        error: String,
+        will_retry: bool,
+    },
+    /// A task was preempted by higher-priority work.
+    Preempted {
+        task_id: i64,
+        task_type: String,
+        key: String,
+    },
+    /// A task was cancelled by the application.
+    Cancelled {
+        task_id: i64,
+        task_type: String,
+        key: String,
+    },
+    /// Progress update from a running task.
+    Progress {
+        task_id: i64,
+        task_type: String,
+        key: String,
+        /// Progress percentage (0.0 to 1.0).
+        percent: f32,
+        /// Optional human-readable message from the executor.
+        message: Option<String>,
+    },
+    /// The scheduler was globally paused via [`Scheduler::pause_all`].
+    Paused,
+    /// The scheduler was resumed via [`Scheduler::resume_all`].
+    Resumed,
+}
+
+// ── Config ──────────────────────────────────────────────────────────
+
+/// How the scheduler behaves during shutdown.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ShutdownMode {
+    /// Cancel all running tasks immediately (default).
+    Hard,
+    /// Stop accepting new dispatches, wait for running tasks to complete
+    /// (up to the given timeout), then cancel any remaining.
+    Graceful(Duration),
+}
+
+/// Scheduler configuration.
+pub struct SchedulerConfig {
+    /// Maximum concurrent running tasks. Adjusted dynamically via
+    /// [`Scheduler::set_max_concurrency`].
+    pub max_concurrency: usize,
+    /// Maximum retries before permanent failure. Default: 3.
+    pub max_retries: i32,
+    /// Priority threshold: tasks at or above this priority (lower numeric value)
+    /// trigger preemption of lower-priority running tasks.
+    pub preempt_priority: Priority,
+    /// Interval between scheduler polls when idle. Default: 500ms.
+    pub poll_interval: Duration,
+    /// How many recent tasks to consider for IO throughput estimation.
+    pub throughput_sample_size: i32,
+    /// Shutdown behavior. Default: Hard.
+    pub shutdown_mode: ShutdownMode,
+}
+
+impl Default for SchedulerConfig {
+    fn default() -> Self {
+        Self {
+            max_concurrency: 4,
+            max_retries: 3,
+            preempt_priority: Priority::REALTIME,
+            poll_interval: Duration::from_millis(500),
+            throughput_sample_size: 20,
+            shutdown_mode: ShutdownMode::Hard,
+        }
+    }
+}
+
+// ── Scheduler ───────────────────────────────────────────────────────
+
+/// Shared inner state behind `Arc` so `Scheduler` can be `Clone`.
+#[allow(dead_code)]
+struct SchedulerInner {
+    store: TaskStore,
+    max_concurrency: AtomicUsize,
+    max_retries: i32,
+    preempt_priority: Priority,
+    poll_interval: Duration,
+    throughput_sample_size: i32,
+    shutdown_mode: ShutdownMode,
+    registry: Arc<TaskTypeRegistry>,
+    gate: Box<dyn gate::DispatchGate>,
+    resource_reader: Mutex<Option<Arc<dyn ResourceReader>>>,
+    /// In-memory tracking of active tasks and their cancellation tokens.
+    active: ActiveTaskMap,
+    /// Broadcast channel for lifecycle events.
+    event_tx: tokio::sync::broadcast::Sender<SchedulerEvent>,
+    /// Token to cancel the background resource sampler (if started).
+    sampler_token: CancellationToken,
+    /// Type-keyed application state passed to every executor via [`TaskContext::state`].
+    app_state: Arc<crate::registry::StateMap>,
+    /// Global pause flag — when `true`, the run loop skips dispatching.
+    paused: AtomicBool,
+    /// Wakes the run loop when new work is submitted or the scheduler is resumed.
+    work_notify: Notify,
+}
+
+/// IO-aware priority scheduler.
+///
+/// Coordinates task execution by:
+/// 1. Popping highest-priority pending tasks from the SQLite store
+/// 2. Checking IO budget against running task estimates and system capacity
+/// 3. Applying backpressure throttling based on external pressure sources
+/// 4. Preempting lower-priority tasks when high-priority work arrives
+/// 5. Managing retries and failure recording
+/// 6. Emitting lifecycle events for UI integration
+///
+/// `Scheduler` is `Clone` — each clone shares the same underlying state.
+/// This makes it easy to hold in `tauri::State<Scheduler>` or share across
+/// async tasks.
+#[derive(Clone)]
+pub struct Scheduler {
+    inner: Arc<SchedulerInner>,
+}
+
+impl Scheduler {
+    pub fn new(
+        store: TaskStore,
+        config: SchedulerConfig,
+        registry: Arc<TaskTypeRegistry>,
+        pressure: CompositePressure,
+        policy: ThrottlePolicy,
+    ) -> Self {
+        let gate = Box::new(DefaultDispatchGate::new(pressure, policy));
+        Self::with_gate(
+            store,
+            config,
+            registry,
+            gate,
+            Arc::new(crate::registry::StateMap::new()),
+        )
+    }
+
+    /// Create a scheduler with a custom dispatch gate.
+    fn with_gate(
+        store: TaskStore,
+        config: SchedulerConfig,
+        registry: Arc<TaskTypeRegistry>,
+        gate: Box<dyn gate::DispatchGate>,
+        app_state: Arc<crate::registry::StateMap>,
+    ) -> Self {
+        let (event_tx, _) = tokio::sync::broadcast::channel(256);
+        Self {
+            inner: Arc::new(SchedulerInner {
+                store,
+                max_concurrency: AtomicUsize::new(config.max_concurrency),
+                max_retries: config.max_retries,
+                preempt_priority: config.preempt_priority,
+                poll_interval: config.poll_interval,
+                throughput_sample_size: config.throughput_sample_size,
+                shutdown_mode: config.shutdown_mode,
+                registry,
+                gate,
+                resource_reader: Mutex::new(None),
+                active: ActiveTaskMap::new(),
+                event_tx,
+                sampler_token: CancellationToken::new(),
+                app_state,
+                paused: AtomicBool::new(false),
+                work_notify: Notify::new(),
+            }),
+        }
+    }
+
+    /// Create a [`SchedulerBuilder`] for ergonomic construction.
+    pub fn builder() -> SchedulerBuilder {
+        SchedulerBuilder::new()
+    }
+
+    /// Subscribe to scheduler lifecycle events.
+    ///
+    /// Returns a broadcast receiver. Events are emitted on task dispatch,
+    /// completion, failure, preemption, cancellation, and progress. Useful for
+    /// bridging to a Tauri frontend or updating UI state.
+    pub fn subscribe(&self) -> tokio::sync::broadcast::Receiver<SchedulerEvent> {
+        self.inner.event_tx.subscribe()
+    }
+
+    /// Set the resource reader for IO-aware scheduling.
+    pub async fn set_resource_reader(&self, reader: Arc<dyn ResourceReader>) {
+        *self.inner.resource_reader.lock().await = Some(reader);
+    }
+
+    /// Get a reference to the underlying store for direct queries.
+    pub fn store(&self) -> &TaskStore {
+        &self.inner.store
+    }
+
+    /// Register shared application state after the scheduler has been built.
+    ///
+    /// This is useful when library code (e.g. shoebox) needs to inject its
+    /// own state into a scheduler that was constructed by a parent
+    /// application. Multiple types can coexist — each is keyed by `TypeId`.
+    pub async fn register_state<T: Send + Sync + 'static>(&self, state: Arc<T>) {
+        self.inner.app_state.insert(state).await;
+    }
+
+    /// Submit a task.
+    ///
+    /// If the task's priority meets the preemption threshold, running tasks
+    /// with lower priority are preempted (their cancellation tokens are cancelled
+    /// and they are paused in the store).
+    pub async fn submit(&self, sub: &TaskSubmission) -> Result<SubmitOutcome, StoreError> {
+        let outcome = self.inner.store.submit(sub).await?;
+
+        if !matches!(outcome, SubmitOutcome::Duplicate) {
+            // Preempt if this is a high-priority task.
+            if sub.priority.value() <= self.inner.preempt_priority.value() {
+                self.inner
+                    .active
+                    .preempt_below(sub.priority, &self.inner.store, &self.inner.event_tx)
+                    .await;
+            }
+
+            // Wake the scheduler loop so it picks up the new/upgraded task.
+            self.inner.work_notify.notify_one();
+        }
+
+        Ok(outcome)
+    }
+
+    /// Submit multiple tasks in a single SQLite transaction.
+    ///
+    /// Preemption is triggered once at the end if any inserted or upgraded
+    /// task has high enough priority.
+    pub async fn submit_batch(
+        &self,
+        submissions: &[TaskSubmission],
+    ) -> Result<Vec<SubmitOutcome>, StoreError> {
+        let results = self.inner.store.submit_batch(submissions).await?;
+
+        // Find the highest (lowest numeric value) priority among tasks that
+        // were inserted or had their priority upgraded.
+        let best_priority = submissions
+            .iter()
+            .zip(results.iter())
+            .filter(|(_, outcome)| !matches!(outcome, SubmitOutcome::Duplicate))
+            .map(|(sub, _)| sub.priority)
+            .min_by_key(|p| p.value());
+
+        let any_changed = results
+            .iter()
+            .any(|o| !matches!(o, SubmitOutcome::Duplicate));
+
+        if let Some(priority) = best_priority {
+            if priority.value() <= self.inner.preempt_priority.value() {
+                self.inner
+                    .active
+                    .preempt_below(priority, &self.inner.store, &self.inner.event_tx)
+                    .await;
+            }
+        }
+
+        if any_changed {
+            self.inner.work_notify.notify_one();
+        }
+
+        Ok(results)
+    }
+
+    /// Submit a [`TypedTask`], handling serialization automatically.
+    ///
+    /// Uses the priority from [`TypedTask::priority()`].
+    pub async fn submit_typed<T: TypedTask>(&self, task: &T) -> Result<SubmitOutcome, StoreError> {
+        let sub = TaskSubmission::from_typed(task)?;
+        self.submit(&sub).await
+    }
+
+    /// Submit a [`TypedTask`] with an explicit priority override.
+    ///
+    /// The provided `priority` replaces whatever [`TypedTask::priority()`]
+    /// would return, keeping priority out of the serialized payload.
+    pub async fn submit_typed_at<T: TypedTask>(
+        &self,
+        task: &T,
+        priority: Priority,
+    ) -> Result<SubmitOutcome, StoreError> {
+        let mut sub = TaskSubmission::from_typed(task)?;
+        sub.priority = priority;
+        self.submit(&sub).await
+    }
+
+    /// Look up a task by the same inputs used during submission.
+    ///
+    /// Computes the dedup key from `task_type` and `dedup_input` (the
+    /// explicit key string or payload bytes — whichever was used when
+    /// submitting), then checks the active queue and history in one call.
+    ///
+    /// # Examples
+    ///
+    /// ```ignore
+    /// // Using an explicit key (same as TaskSubmission.key = Some("my-file.jpg"))
+    /// let result = scheduler.task_lookup("thumbnail", Some(b"my-file.jpg")).await?;
+    ///
+    /// // Using payload-based dedup (same as TaskSubmission.key = None, payload = ...)
+    /// let result = scheduler.task_lookup("ingest", Some(&payload_bytes)).await?;
+    /// ```
+    pub async fn task_lookup(
+        &self,
+        task_type: &str,
+        dedup_input: Option<&[u8]>,
+    ) -> Result<TaskLookup, StoreError> {
+        let key = generate_dedup_key(task_type, dedup_input);
+        self.inner.store.task_lookup(&key).await
+    }
+
+    /// Look up a [`TypedTask`] by value, using its serialized form as the
+    /// dedup input.
+    ///
+    /// This mirrors [`submit_typed`](Self::submit_typed) — pass the same
+    /// struct you would submit and get back its current status.
+    pub async fn lookup_typed<T: TypedTask>(&self, task: &T) -> Result<TaskLookup, StoreError> {
+        let payload = serde_json::to_vec(task)?;
+        let key = generate_dedup_key(T::TASK_TYPE, Some(&payload));
+        self.inner.store.task_lookup(&key).await
+    }
+
+    /// Cancel a task by id.
+    ///
+    /// If the task is currently running, its cancellation token is triggered
+    /// and it is removed from the active map. If it is pending or paused,
+    /// it is deleted from the store. Returns `true` if the task was found
+    /// and cancelled.
+    pub async fn cancel(&self, task_id: i64) -> Result<bool, StoreError> {
+        // Check if it's an active (running) task first.
+        if let Some(at) = self.inner.active.remove(task_id).await {
+            at.token.cancel();
+            self.inner.store.delete(task_id).await?;
+            let _ = self.inner.event_tx.send(SchedulerEvent::Cancelled {
+                task_id,
+                task_type: at.record.task_type.clone(),
+                key: at.record.key.clone(),
+            });
+            return Ok(true);
+        }
+
+        // Not active — try to delete from the queue (pending/paused).
+        let deleted = self.inner.store.delete(task_id).await?;
+        Ok(deleted)
+    }
+
+    /// Try to pop and execute the next task.
+    ///
+    /// Returns `true` if a task was dispatched, `false` if no work was available
+    /// (empty queue, concurrency limit, IO budget exhausted, or throttled).
+    pub async fn try_dispatch(&self) -> Result<bool, StoreError> {
+        // Check concurrency limit.
+        let active_count = self.inner.active.count().await;
+        let max = self.inner.max_concurrency.load(AtomicOrdering::Relaxed);
+        if active_count >= max {
+            return Ok(false);
+        }
+
+        // Peek at the next candidate without changing its status.
+        let Some(candidate) = self.inner.store.peek_next().await? else {
+            return Ok(false);
+        };
+
+        // Build gate context from current state.
+        let reader_guard = self.inner.resource_reader.lock().await;
+        let gate_ctx = GateContext {
+            store: &self.inner.store,
+            resource_reader: reader_guard.as_ref(),
+        };
+
+        // Admission check while the task is still pending — no running
+        // window if the gate rejects.
+        if !self.inner.gate.admit(&candidate, &gate_ctx).await? {
+            drop(reader_guard);
+            return Ok(false);
+        }
+        drop(reader_guard);
+
+        // Atomically claim the task. Returns None if another dispatcher
+        // claimed it (or it was cancelled) between peek and now.
+        let Some(task) = self.inner.store.pop_by_id(candidate.id).await? else {
+            return Ok(false);
+        };
+
+        // Look up executor.
+        let Some(executor) = self.inner.registry.get(&task.task_type) else {
+            tracing::error!(
+                task_type = task.task_type,
+                "no executor registered — failing task"
+            );
+            self.inner
+                .store
+                .fail(
+                    task.id,
+                    &format!("no executor registered for type '{}'", task.task_type),
+                    false,
+                    0,
+                    0,
+                    0,
+                )
+                .await?;
+            return Ok(true);
+        };
+        let executor = Arc::clone(executor);
+
+        // Spawn the task — this inserts into the active map, builds the
+        // context, emits Dispatched, and wires up completion handling.
+        dispatch::spawn_task(
+            task,
+            executor,
+            self.inner.store.clone(),
+            self.inner.active.clone(),
+            self.inner.event_tx.clone(),
+            self.inner.max_retries,
+            self.inner.app_state.snapshot().await,
+        )
+        .await;
+
+        Ok(true)
+    }
+
+    /// Run the scheduler loop until the cancellation token is triggered.
+    ///
+    /// This is the main entry point. The loop wakes on three conditions:
+    /// 1. Cancellation — triggers shutdown.
+    /// 2. Notification — a task was submitted or the scheduler was resumed.
+    /// 3. Poll interval — periodic housekeeping (e.g. resuming paused tasks).
+    ///
+    /// On mobile targets (iOS/Android), the notify-based wake avoids the
+    /// constant 500ms polling that would otherwise prevent the CPU from sleeping.
+    pub async fn run(&self, token: CancellationToken) {
+        tracing::info!(
+            max_concurrency = self.inner.max_concurrency.load(AtomicOrdering::Relaxed),
+            "taskmill scheduler started"
+        );
+
+        loop {
+            tokio::select! {
+                _ = token.cancelled() => {
+                    tracing::info!("taskmill scheduler shutting down");
+                    self.shutdown().await;
+                    break;
+                }
+                _ = self.inner.work_notify.notified() => {
+                    self.poll_and_dispatch().await;
+                }
+                _ = tokio::time::sleep(self.inner.poll_interval) => {
+                    self.poll_and_dispatch().await;
+                }
+            }
+        }
+    }
+
+    /// Resume paused tasks and dispatch pending work.
+    async fn poll_and_dispatch(&self) {
+        if self.is_paused() {
+            return;
+        }
+
+        // Resume paused tasks only if no active preemptors exist.
+        if let Ok(paused) = self.inner.store.paused_tasks().await {
+            for task in paused {
+                if !self
+                    .inner
+                    .active
+                    .has_preemptors_for(task.priority, self.inner.preempt_priority)
+                    .await
+                {
+                    let _ = self.inner.store.resume(task.id).await;
+                }
+            }
+        }
+
+        // Try to dispatch tasks until we can't.
+        loop {
+            match self.try_dispatch().await {
+                Ok(true) => continue,
+                Ok(false) => break,
+                Err(e) => {
+                    tracing::error!(error = %e, "scheduler dispatch error");
+                    break;
+                }
+            }
+        }
+    }
+
+    /// Perform shutdown according to the configured `ShutdownMode`.
+    async fn shutdown(&self) {
+        // Stop the resource sampler.
+        self.inner.sampler_token.cancel();
+
+        match self.inner.shutdown_mode {
+            ShutdownMode::Hard => {
+                self.inner.active.cancel_all().await;
+            }
+            ShutdownMode::Graceful(timeout) => {
+                tracing::info!(
+                    timeout_ms = timeout.as_millis() as u64,
+                    "graceful shutdown — waiting for running tasks"
+                );
+
+                let deadline = tokio::time::Instant::now() + timeout;
+                loop {
+                    let count = self.inner.active.count().await;
+                    if count == 0 {
+                        tracing::info!("all tasks completed during graceful shutdown");
+                        break;
+                    }
+                    if tokio::time::Instant::now() >= deadline {
+                        tracing::warn!(
+                            remaining = count,
+                            "graceful shutdown timeout — cancelling remaining tasks"
+                        );
+                        self.inner.active.cancel_all().await;
+                        break;
+                    }
+                    tokio::time::sleep(Duration::from_millis(50)).await;
+                }
+            }
+        }
+
+        // Flush WAL and close the database.
+        self.inner.store.close().await;
+    }
+
+    /// Snapshot of currently active (in-memory) tasks.
+    pub async fn active_tasks(&self) -> Vec<crate::task::TaskRecord> {
+        self.inner.active.records().await
+    }
+
+    /// Get estimated progress for all running tasks.
+    ///
+    /// Combines executor-reported progress with throughput-based extrapolation
+    /// using historical average duration for each task type.
+    pub async fn estimated_progress(&self) -> Vec<EstimatedProgress> {
+        let snapshots: Vec<_> = self.inner.active.progress_snapshots().await;
+        let mut results = Vec::with_capacity(snapshots.len());
+        for (record, reported, reported_at) in snapshots {
+            results.push(
+                progress::extrapolate(&record, reported, reported_at, &self.inner.store).await,
+            );
+        }
+        results
+    }
+
+    /// Capture a single status snapshot for dashboard UIs.
+    ///
+    /// Gathers running tasks, queue depths, progress estimates, and
+    /// backpressure in one call — exactly what a Tauri command would
+    /// return to the frontend.
+    pub async fn snapshot(&self) -> Result<SchedulerSnapshot, StoreError> {
+        let running = self.inner.active.records().await;
+        let pending_count = self.inner.store.pending_count().await?;
+        let paused_count = self.inner.store.paused_count().await?;
+        let progress = self.estimated_progress().await;
+        let pressure = self.inner.gate.pressure().await;
+        let pressure_breakdown = self.inner.gate.pressure_breakdown().await;
+        let max_concurrency = self.max_concurrency();
+
+        Ok(SchedulerSnapshot {
+            running,
+            pending_count,
+            paused_count,
+            progress,
+            pressure,
+            pressure_breakdown,
+            max_concurrency,
+            is_paused: self.is_paused(),
+        })
+    }
+
+    /// Update max concurrency at runtime (e.g., from adaptive controller or
+    /// in response to battery/thermal state).
+    pub fn set_max_concurrency(&self, limit: usize) {
+        self.inner
+            .max_concurrency
+            .store(limit, AtomicOrdering::Relaxed);
+        tracing::info!(new_limit = limit, "concurrency limit updated");
+    }
+
+    /// Read current max concurrency setting.
+    pub fn max_concurrency(&self) -> usize {
+        self.inner.max_concurrency.load(AtomicOrdering::Relaxed)
+    }
+
+    /// Pause the entire scheduler.
+    ///
+    /// Stops the run loop from dispatching new tasks and pauses all
+    /// currently running tasks (their cancellation tokens are triggered
+    /// and they are moved back to the `paused` state in the store so
+    /// they will be re-dispatched on resume).
+    ///
+    /// Useful when the app is backgrounded, the laptop goes to sleep,
+    /// or the user clicks "pause all" in the UI.
+    pub async fn pause_all(&self) {
+        self.inner.paused.store(true, AtomicOrdering::Release);
+        let count = self
+            .inner
+            .active
+            .pause_all(&self.inner.store, &self.inner.event_tx)
+            .await;
+        let _ = self.inner.event_tx.send(SchedulerEvent::Paused);
+        tracing::info!(paused_tasks = count, "scheduler paused");
+    }
+
+    /// Resume the scheduler after a [`pause_all`](Self::pause_all).
+    ///
+    /// Clears the pause flag so the run loop will resume dispatching on
+    /// its next poll tick. Tasks that were paused in the store will be
+    /// picked up automatically.
+    pub async fn resume_all(&self) {
+        self.inner.paused.store(false, AtomicOrdering::Release);
+        self.inner.work_notify.notify_one();
+        let _ = self.inner.event_tx.send(SchedulerEvent::Resumed);
+        tracing::info!("scheduler resumed");
+    }
+
+    /// Returns `true` if the scheduler is globally paused.
+    pub fn is_paused(&self) -> bool {
+        self.inner.paused.load(AtomicOrdering::Acquire)
+    }
+}
+
+// ── Builder ─────────────────────────────────────────────────────────
+
+/// Ergonomic builder for constructing a [`Scheduler`] with all its dependencies.
+///
+/// Hides the `Arc<Mutex<...>>` wiring and manages the resource sampler lifecycle.
+///
+/// # Example
+///
+/// ```no_run
+/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+/// use std::sync::Arc;
+/// use taskmill::{Scheduler, Priority};
+///
+/// let scheduler = Scheduler::builder()
+///     .store_path("tasks.db")
+///     // .executor("scan", Arc::new(my_scan_executor))
+///     .max_concurrency(8)
+///     .with_resource_monitoring()
+///     .build()
+///     .await?;
+/// # Ok(())
+/// # }
+/// ```
+pub struct SchedulerBuilder {
+    store_path: Option<String>,
+    store_config: StoreConfig,
+    store: Option<TaskStore>,
+    executors: Vec<(String, Arc<dyn crate::registry::ErasedExecutor>)>,
+    config: SchedulerConfig,
+    pressure_sources: Vec<Box<dyn crate::backpressure::PressureSource + 'static>>,
+    policy: Option<ThrottlePolicy>,
+    enable_resource_monitoring: bool,
+    custom_sampler: Option<Box<dyn ResourceSampler>>,
+    sampler_config: SamplerConfig,
+    app_state_entries: Vec<(std::any::TypeId, Arc<dyn std::any::Any + Send + Sync>)>,
+}
+
+impl SchedulerBuilder {
+    pub fn new() -> Self {
+        Self {
+            store_path: None,
+            store_config: StoreConfig::default(),
+            store: None,
+            executors: Vec::new(),
+            config: SchedulerConfig::default(),
+            pressure_sources: Vec::new(),
+            policy: None,
+            enable_resource_monitoring: false,
+            custom_sampler: None,
+            sampler_config: SamplerConfig::default(),
+            app_state_entries: Vec::new(),
+        }
+    }
+
+    /// Set the SQLite database path. Either this or [`store`] must be called.
+    pub fn store_path(mut self, path: &str) -> Self {
+        self.store_path = Some(path.to_string());
+        self
+    }
+
+    /// Configure the SQLite connection pool.
+    pub fn store_config(mut self, config: StoreConfig) -> Self {
+        self.store_config = config;
+        self
+    }
+
+    /// Use a pre-opened [`TaskStore`] instead of opening one from a path.
+    pub fn store(mut self, store: TaskStore) -> Self {
+        self.store = Some(store);
+        self
+    }
+
+    /// Register a task executor for a named type.
+    pub fn executor<E: TaskExecutor>(mut self, name: &str, executor: Arc<E>) -> Self {
+        self.executors.push((
+            name.to_string(),
+            executor as Arc<dyn crate::registry::ErasedExecutor>,
+        ));
+        self
+    }
+
+    /// Register an executor using the task type name from a [`TypedTask`].
+    ///
+    /// Equivalent to `.executor(T::TASK_TYPE, executor)`.
+    pub fn typed_executor<T: TypedTask, E: TaskExecutor>(self, executor: Arc<E>) -> Self {
+        self.executor(T::TASK_TYPE, executor)
+    }
+
+    /// Set maximum concurrent tasks. Default: 4.
+    pub fn max_concurrency(mut self, limit: usize) -> Self {
+        self.config.max_concurrency = limit;
+        self
+    }
+
+    /// Set maximum retries before permanent failure. Default: 3.
+    pub fn max_retries(mut self, retries: i32) -> Self {
+        self.config.max_retries = retries;
+        self
+    }
+
+    /// Set the priority threshold for preemption. Default: REALTIME.
+    pub fn preempt_priority(mut self, priority: Priority) -> Self {
+        self.config.preempt_priority = priority;
+        self
+    }
+
+    /// Set the poll interval. Default: 500ms.
+    pub fn poll_interval(mut self, interval: Duration) -> Self {
+        self.config.poll_interval = interval;
+        self
+    }
+
+    /// Set the shutdown mode. Default: Hard.
+    pub fn shutdown_mode(mut self, mode: ShutdownMode) -> Self {
+        self.config.shutdown_mode = mode;
+        self
+    }
+
+    /// Add a backpressure source (used by the default gate).
+    pub fn pressure_source(
+        mut self,
+        source: Box<dyn crate::backpressure::PressureSource + 'static>,
+    ) -> Self {
+        self.pressure_sources.push(source);
+        self
+    }
+
+    /// Set a custom throttle policy (used by the default gate). Default: three-tier.
+    pub fn throttle_policy(mut self, policy: ThrottlePolicy) -> Self {
+        self.policy = Some(policy);
+        self
+    }
+
+    /// Enable platform resource monitoring (CPU, disk IO) using `sysinfo`.
+    ///
+    /// This starts a background sampler task that feeds IO data to the
+    /// scheduler for budget-based dispatch decisions. The sampler is
+    /// automatically stopped when the scheduler shuts down.
+    pub fn with_resource_monitoring(mut self) -> Self {
+        self.enable_resource_monitoring = true;
+        self
+    }
+
+    /// Provide a custom [`ResourceSampler`] instead of the default platform one.
+    pub fn resource_sampler(mut self, sampler: Box<dyn ResourceSampler>) -> Self {
+        self.custom_sampler = Some(sampler);
+        self.enable_resource_monitoring = true;
+        self
+    }
+
+    /// Configure the resource sampler loop.
+    pub fn sampler_config(mut self, config: SamplerConfig) -> Self {
+        self.sampler_config = config;
+        self
+    }
+
+    /// Register shared application state accessible from every executor via
+    /// [`TaskContext::state`].
+    ///
+    /// Multiple types can be registered — each is keyed by its concrete
+    /// `TypeId`. Calling this twice with the same `T` overwrites the
+    /// previous value.
+    ///
+    /// The state is stored as `Arc<T>` internally, so it is shared (not
+    /// cloned) across all running tasks. This mirrors how Axum, Actix, and
+    /// Tauri handle shared application state.
+    ///
+    /// # Example
+    ///
+    /// ```ignore
+    /// struct AppServices { http: reqwest::Client, db: DatabasePool }
+    ///
+    /// let services = AppServices { /* ... */ };
+    /// Scheduler::builder()
+    ///     .app_state(services)
+    ///     .build()
+    ///     .await?;
+    /// ```
+    pub fn app_state<T: Send + Sync + 'static>(self, state: T) -> Self {
+        self.app_state_arc(Arc::new(state))
+    }
+
+    /// Register shared application state from a pre-existing `Arc`.
+    ///
+    /// Use this instead of [`app_state`](Self::app_state) when you already
+    /// have an `Arc<T>` and need to retain a handle for use outside the
+    /// scheduler (e.g. to populate `OnceLock` fields after build). Avoids
+    /// double-wrapping (`Arc<Arc<T>>`), which would cause
+    /// [`TaskContext::state`] downcasts to fail.
+    ///
+    /// Multiple types can be registered — each is keyed by its concrete
+    /// `TypeId`.
+    pub fn app_state_arc<T: Send + Sync + 'static>(mut self, state: Arc<T>) -> Self {
+        self.app_state_entries
+            .push((std::any::TypeId::of::<T>(), state));
+        self
+    }
+
+    /// Build the scheduler. Opens the database and wires all components.
+    ///
+    /// If resource monitoring is enabled, the sampler background loop is
+    /// started and will be stopped automatically when the scheduler shuts
+    /// down (via the token passed to [`Scheduler::run`]).
+    pub async fn build(self) -> Result<Scheduler, StoreError> {
+        // Open or use provided store.
+        let store = if let Some(store) = self.store {
+            store
+        } else if let Some(path) = &self.store_path {
+            TaskStore::open_with_config(path, self.store_config).await?
+        } else {
+            return Err(StoreError::Database(
+                "SchedulerBuilder requires either store_path() or store()".into(),
+            ));
+        };
+
+        // Build registry.
+        let mut registry = TaskTypeRegistry::new();
+        for (name, executor) in self.executors {
+            if registry.get(&name).is_some() {
+                panic!("task type '{name}' already registered");
+            }
+            registry.register_erased(&name, executor);
+        }
+
+        // Build gate from pressure sources + policy.
+        let mut pressure = CompositePressure::new();
+        for source in self.pressure_sources {
+            pressure.add_source(source);
+        }
+        let policy = self
+            .policy
+            .unwrap_or_else(ThrottlePolicy::default_three_tier);
+        let gate = Box::new(DefaultDispatchGate::new(pressure, policy));
+
+        let app_state = Arc::new(crate::registry::StateMap::from_entries(
+            self.app_state_entries,
+        ));
+
+        let scheduler =
+            Scheduler::with_gate(store, self.config, Arc::new(registry), gate, app_state);
+
+        // Set up resource monitoring.
+        if self.enable_resource_monitoring {
+            #[cfg(feature = "sysinfo-monitor")]
+            let sampler: Box<dyn ResourceSampler> = self
+                .custom_sampler
+                .unwrap_or_else(|| crate::resource::platform_sampler());
+
+            #[cfg(not(feature = "sysinfo-monitor"))]
+            let sampler: Box<dyn ResourceSampler> = self
+                .custom_sampler
+                .expect("resource monitoring enabled but no custom sampler provided and sysinfo-monitor feature is disabled");
+
+            let reader = SmoothedReader::new();
+            scheduler
+                .set_resource_reader(Arc::new(reader.clone()))
+                .await;
+
+            // Spawn sampler loop — it will stop when the scheduler's sampler_token is cancelled.
+            let sampler_arc = Arc::new(tokio::sync::Mutex::new(sampler));
+            let sampler_config = self.sampler_config;
+            let sampler_token = scheduler.inner.sampler_token.clone();
+            tokio::spawn(crate::resource::sampler::run_sampler(
+                sampler_arc,
+                reader,
+                sampler_config,
+                sampler_token,
+            ));
+        }
+
+        Ok(scheduler)
+    }
+}
+
+impl Default for SchedulerBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::registry::{TaskContext, TaskExecutor};
+    use crate::task::{TaskError, TaskResult};
+
+    struct InstantExecutor;
+
+    impl TaskExecutor for InstantExecutor {
+        async fn execute<'a>(&'a self, _ctx: &'a TaskContext) -> Result<TaskResult, TaskError> {
+            Ok(TaskResult {
+                actual_read_bytes: 100,
+                actual_write_bytes: 50,
+            })
+        }
+    }
+
+    struct SlowExecutor;
+
+    impl TaskExecutor for SlowExecutor {
+        async fn execute<'a>(&'a self, ctx: &'a TaskContext) -> Result<TaskResult, TaskError> {
+            tokio::select! {
+                _ = ctx.token.cancelled() => {
+                    Err(TaskError {
+                        message: "cancelled".into(),
+                        retryable: false,
+                        actual_read_bytes: 0,
+                        actual_write_bytes: 0,
+                    })
+                }
+                _ = tokio::time::sleep(Duration::from_secs(60)) => {
+                    Ok(TaskResult {
+                        actual_read_bytes: 100,
+                        actual_write_bytes: 50,
+                    })
+                }
+            }
+        }
+    }
+
+    #[allow(dead_code)]
+    struct FailingExecutor;
+
+    impl TaskExecutor for FailingExecutor {
+        async fn execute<'a>(&'a self, _ctx: &'a TaskContext) -> Result<TaskResult, TaskError> {
+            Err(TaskError {
+                message: "boom".into(),
+                retryable: true,
+                actual_read_bytes: 0,
+                actual_write_bytes: 0,
+            })
+        }
+    }
+
+    async fn setup(executor: Arc<dyn crate::registry::ErasedExecutor>) -> Scheduler {
+        let store = TaskStore::open_memory().await.unwrap();
+        let mut registry = TaskTypeRegistry::new();
+        registry.register_erased("test", executor);
+
+        Scheduler::new(
+            store,
+            SchedulerConfig::default(),
+            Arc::new(registry),
+            CompositePressure::new(),
+            ThrottlePolicy::default_three_tier(),
+        )
+    }
+
+    fn arc_erased<E: TaskExecutor>(e: E) -> Arc<dyn crate::registry::ErasedExecutor> {
+        Arc::new(e) as Arc<dyn crate::registry::ErasedExecutor>
+    }
+
+    #[tokio::test]
+    async fn dispatch_executes_task() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("k1".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        let dispatched = sched.try_dispatch().await.unwrap();
+        assert!(dispatched);
+
+        // Give spawned task time to complete.
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        // Task should be completed and in history.
+        let k1 = crate::task::generate_dedup_key("test", Some(b"k1"));
+        assert!(sched.store().task_by_key(&k1).await.unwrap().is_none());
+        let hist = sched.store().history_by_key(&k1).await.unwrap();
+        assert_eq!(hist.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn dispatch_returns_false_when_empty() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        let dispatched = sched.try_dispatch().await.unwrap();
+        assert!(!dispatched);
+    }
+
+    #[tokio::test]
+    async fn unregistered_type_fails_task() {
+        let store = TaskStore::open_memory().await.unwrap();
+        let registry = TaskTypeRegistry::new(); // empty — no executors
+
+        let sched = Scheduler::new(
+            store,
+            SchedulerConfig::default(),
+            Arc::new(registry),
+            CompositePressure::new(),
+            ThrottlePolicy::default_three_tier(),
+        );
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "unknown".into(),
+                key: Some("k".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let failed = sched.store().failed_tasks(10).await.unwrap();
+        assert_eq!(failed.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn dedup_via_scheduler() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        let sub = TaskSubmission {
+            task_type: "test".into(),
+            key: Some("dup".into()),
+            priority: Priority::NORMAL,
+            payload: None,
+            expected_read_bytes: 0,
+            expected_write_bytes: 0,
+        };
+
+        let first = sched.submit(&sub).await.unwrap();
+        let second = sched.submit(&sub).await.unwrap();
+        assert!(first.is_inserted());
+        assert_eq!(second, SubmitOutcome::Duplicate);
+    }
+
+    #[tokio::test]
+    async fn set_max_concurrency_works() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        assert_eq!(sched.max_concurrency(), 4);
+        sched.set_max_concurrency(8);
+        assert_eq!(sched.max_concurrency(), 8);
+    }
+
+    #[tokio::test]
+    async fn cancel_pending_task() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        let id = sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("cancel-me".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap()
+            .id()
+            .unwrap();
+
+        let cancelled = sched.cancel(id).await.unwrap();
+        assert!(cancelled);
+
+        // Task should be gone.
+        let cancel_key = crate::task::generate_dedup_key("test", Some(b"cancel-me"));
+        assert!(sched
+            .store()
+            .task_by_key(&cancel_key)
+            .await
+            .unwrap()
+            .is_none());
+    }
+
+    #[tokio::test]
+    async fn cancel_running_task() {
+        let sched = setup(arc_erased(SlowExecutor)).await;
+
+        let id = sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("cancel-running".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap()
+            .id()
+            .unwrap();
+
+        // Dispatch it so it's running.
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(10)).await;
+
+        let cancelled = sched.cancel(id).await.unwrap();
+        assert!(cancelled);
+    }
+
+    #[tokio::test]
+    async fn event_emitted_on_complete() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        let mut rx = sched.subscribe();
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("evt".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        sched.try_dispatch().await.unwrap();
+
+        // Should get Dispatched event.
+        let evt = rx.recv().await.unwrap();
+        assert!(matches!(evt, SchedulerEvent::Dispatched { .. }));
+
+        // Wait for completion.
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let evt = rx.recv().await.unwrap();
+        assert!(matches!(evt, SchedulerEvent::Completed { .. }));
+    }
+
+    #[tokio::test]
+    async fn scheduler_is_clone() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        let sched2 = sched.clone();
+
+        // Both should share the same store.
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("shared".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        // The clone can see the task.
+        let shared_key = crate::task::generate_dedup_key("test", Some(b"shared"));
+        let task = sched2.store().task_by_key(&shared_key).await.unwrap();
+        assert!(task.is_some());
+    }
+
+    #[tokio::test]
+    async fn submit_typed_enqueues_task() {
+        use serde::{Deserialize as De, Serialize as Ser};
+
+        #[derive(Ser, De, Debug, PartialEq)]
+        struct Thumb {
+            path: String,
+        }
+
+        impl crate::task::TypedTask for Thumb {
+            const TASK_TYPE: &'static str = "test";
+
+            fn expected_read_bytes(&self) -> i64 {
+                4096
+            }
+
+            fn expected_write_bytes(&self) -> i64 {
+                512
+            }
+        }
+
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        let task = Thumb {
+            path: "/a.jpg".into(),
+        };
+        let outcome = sched.submit_typed(&task).await.unwrap();
+        assert!(outcome.is_inserted());
+
+        // Verify the stored record has correct metadata.
+        let record = sched
+            .store()
+            .task_by_id(outcome.id().unwrap())
+            .await
+            .unwrap()
+            .expect("task should exist");
+        assert_eq!(record.task_type, "test");
+        assert_eq!(record.expected_read_bytes, 4096);
+        assert_eq!(record.expected_write_bytes, 512);
+
+        // Payload round-trips.
+        let recovered: Thumb = record.deserialize_payload().unwrap().unwrap();
+        assert_eq!(recovered, task);
+    }
+
+    #[tokio::test]
+    async fn snapshot_returns_dashboard_state() {
+        let sched = setup(arc_erased(SlowExecutor)).await;
+
+        // Submit two tasks.
+        for key in &["snap-a", "snap-b"] {
+            sched
+                .submit(&TaskSubmission {
+                    task_type: "test".into(),
+                    key: Some(key.to_string()),
+                    priority: Priority::NORMAL,
+                    payload: None,
+                    expected_read_bytes: 0,
+                    expected_write_bytes: 0,
+                })
+                .await
+                .unwrap();
+        }
+
+        // Dispatch one so it becomes running.
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(10)).await;
+
+        let snap = sched.snapshot().await.unwrap();
+
+        assert_eq!(snap.running.len(), 1);
+        assert_eq!(snap.pending_count, 1);
+        assert_eq!(snap.paused_count, 0);
+        assert_eq!(snap.progress.len(), 1);
+        assert_eq!(snap.pressure, 0.0); // no pressure sources
+        assert!(snap.pressure_breakdown.is_empty());
+        assert_eq!(snap.max_concurrency, 4);
+    }
+
+    #[tokio::test]
+    async fn pause_all_stops_dispatching() {
+        let sched = setup(arc_erased(SlowExecutor)).await;
+
+        // Submit two tasks.
+        for key in &["pa-1", "pa-2"] {
+            sched
+                .submit(&TaskSubmission {
+                    task_type: "test".into(),
+                    key: Some(key.to_string()),
+                    priority: Priority::NORMAL,
+                    payload: None,
+                    expected_read_bytes: 0,
+                    expected_write_bytes: 0,
+                })
+                .await
+                .unwrap();
+        }
+
+        // Dispatch one so it's running.
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(10)).await;
+        assert_eq!(sched.active_tasks().await.len(), 1);
+
+        // Pause — running task should be cancelled and moved to paused in store.
+        sched.pause_all().await;
+        assert!(sched.is_paused());
+        assert_eq!(sched.active_tasks().await.len(), 0);
+
+        // try_dispatch should still work at the store level (it doesn't check
+        // the pause flag itself — the run loop does), but we can verify that
+        // the snapshot shows is_paused.
+        let snap = sched.snapshot().await.unwrap();
+        assert!(snap.is_paused);
+
+        // Resume — flag should clear.
+        sched.resume_all().await;
+        assert!(!sched.is_paused());
+        let snap = sched.snapshot().await.unwrap();
+        assert!(!snap.is_paused);
+    }
+
+    #[tokio::test]
+    async fn pause_resume_events_emitted() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        let mut rx = sched.subscribe();
+
+        sched.pause_all().await;
+        let evt = rx.recv().await.unwrap();
+        assert!(matches!(evt, SchedulerEvent::Paused));
+
+        sched.resume_all().await;
+        let evt = rx.recv().await.unwrap();
+        assert!(matches!(evt, SchedulerEvent::Resumed));
+    }
+
+    #[tokio::test]
+    async fn app_state_accessible_from_executor() {
+        use std::sync::atomic::{AtomicBool, Ordering};
+
+        struct MyState {
+            flag: Arc<AtomicBool>,
+        }
+
+        struct StateCheckExecutor;
+
+        impl TaskExecutor for StateCheckExecutor {
+            async fn execute<'a>(&'a self, ctx: &'a TaskContext) -> Result<TaskResult, TaskError> {
+                let state = ctx.state::<MyState>().expect("state should be set");
+                state.flag.store(true, Ordering::SeqCst);
+                Ok(TaskResult {
+                    actual_read_bytes: 0,
+                    actual_write_bytes: 0,
+                })
+            }
+        }
+
+        let flag = Arc::new(AtomicBool::new(false));
+
+        let sched = Scheduler::builder()
+            .store(TaskStore::open_memory().await.unwrap())
+            .executor("test", Arc::new(StateCheckExecutor))
+            .app_state(MyState { flag: flag.clone() })
+            .build()
+            .await
+            .unwrap();
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("state-test".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        assert!(flag.load(Ordering::SeqCst));
+    }
+
+    #[tokio::test]
+    async fn task_lookup_pending() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("lookup-1".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        let result = sched.task_lookup("test", Some(b"lookup-1")).await.unwrap();
+        assert!(matches!(
+            result,
+            crate::task::TaskLookup::Active(ref r) if r.status == crate::task::TaskStatus::Pending
+        ));
+    }
+
+    #[tokio::test]
+    async fn task_lookup_completed() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        sched
+            .submit(&TaskSubmission {
+                task_type: "test".into(),
+                key: Some("lookup-done".into()),
+                priority: Priority::NORMAL,
+                payload: None,
+                expected_read_bytes: 0,
+                expected_write_bytes: 0,
+            })
+            .await
+            .unwrap();
+
+        sched.try_dispatch().await.unwrap();
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let result = sched
+            .task_lookup("test", Some(b"lookup-done"))
+            .await
+            .unwrap();
+        assert!(matches!(result, crate::task::TaskLookup::History(_)));
+    }
+
+    #[tokio::test]
+    async fn task_lookup_not_found() {
+        let sched = setup(arc_erased(InstantExecutor)).await;
+        let result = sched
+            .task_lookup("test", Some(b"does-not-exist"))
+            .await
+            .unwrap();
+        assert!(matches!(result, crate::task::TaskLookup::NotFound));
+    }
+
+    #[tokio::test]
+    async fn lookup_typed_works() {
+        use serde::{Deserialize as De, Serialize as Ser};
+
+        #[derive(Ser, De, Debug, PartialEq)]
+        struct Thumb {
+            path: String,
+        }
+
+        impl crate::task::TypedTask for Thumb {
+            const TASK_TYPE: &'static str = "test";
+        }
+
+        let sched = setup(arc_erased(InstantExecutor)).await;
+
+        let task = Thumb {
+            path: "/a.jpg".into(),
+        };
+        sched.submit_typed(&task).await.unwrap();
+
+        let result = sched.lookup_typed(&task).await.unwrap();
+        assert!(matches!(result, crate::task::TaskLookup::Active(_)));
+    }
+}
diff --git a/src/scheduler/progress.rs b/src/scheduler/progress.rs
new file mode 100644
index 0000000..bee4b13
--- /dev/null
+++ b/src/scheduler/progress.rs
@@ -0,0 +1,132 @@
+use serde::{Deserialize, Serialize};
+
+use crate::store::TaskStore;
+use crate::task::TaskRecord;
+
+use super::SchedulerEvent;
+
+// ── Progress Reporter ──────────────────────────────────────────────
+
+/// Handle passed to executors for reporting progress back to the scheduler.
+///
+/// Progress reports are emitted as `SchedulerEvent::Progress` events,
+/// making them available to the UI via the same broadcast channel.
+#[derive(Clone)]
+pub struct ProgressReporter {
+    task_id: i64,
+    task_type: String,
+    key: String,
+    event_tx: tokio::sync::broadcast::Sender<SchedulerEvent>,
+}
+
+impl ProgressReporter {
+    pub(crate) fn new(
+        task_id: i64,
+        task_type: String,
+        key: String,
+        event_tx: tokio::sync::broadcast::Sender<SchedulerEvent>,
+    ) -> Self {
+        Self {
+            task_id,
+            task_type,
+            key,
+            event_tx,
+        }
+    }
+
+    /// Report progress as a percentage (0.0 to 1.0) with an optional message.
+    pub fn report(&self, percent: f32, message: Option<String>) {
+        let _ = self.event_tx.send(SchedulerEvent::Progress {
+            task_id: self.task_id,
+            task_type: self.task_type.clone(),
+            key: self.key.clone(),
+            percent: percent.clamp(0.0, 1.0),
+            message,
+        });
+    }
+
+    /// Report progress as a fraction (completed / total) with an optional message.
+    pub fn report_fraction(&self, completed: u64, total: u64, message: Option<String>) {
+        let percent = if total == 0 {
+            1.0
+        } else {
+            completed as f32 / total as f32
+        };
+        self.report(percent, message);
+    }
+}
+
+// ── Estimated Progress ─────────────────────────────────────────────
+
+/// Estimated progress for a running task, combining executor-reported progress
+/// with throughput-based extrapolation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct EstimatedProgress {
+    pub task_id: i64,
+    pub task_type: String,
+    pub key: String,
+    /// Executor-reported progress (0.0 to 1.0), if available.
+    pub reported_percent: Option<f32>,
+    /// Throughput-extrapolated progress (0.0 to 1.0), if history data exists.
+    pub extrapolated_percent: Option<f32>,
+    /// Best available progress estimate.
+    pub percent: f32,
+}
+
+/// Extrapolate progress for a single active task using historical throughput.
+///
+/// Blends executor-reported progress with time-based extrapolation from
+/// `store.history_stats()`. This is a pure query — no side effects.
+pub(crate) async fn extrapolate(
+    record: &TaskRecord,
+    reported_progress: Option<f32>,
+    reported_at: Option<chrono::DateTime<chrono::Utc>>,
+    store: &TaskStore,
+) -> EstimatedProgress {
+    let reported = reported_progress;
+
+    let extrapolated = if let Some(started) = record.started_at {
+        let now = chrono::Utc::now();
+        if let Ok(stats) = store.history_stats(&record.task_type).await {
+            if stats.avg_duration_ms > 0.0 {
+                // Historical throughput: fraction of work completed per ms.
+                let hist_throughput = 1.0 / stats.avg_duration_ms;
+
+                match (reported, reported_at) {
+                    // We have a progress anchor — blend throughputs and
+                    // extrapolate from the last report.
+                    (Some(rp), Some(rat)) => {
+                        let elapsed_to_report = (rat - started).num_milliseconds().max(1) as f64;
+                        let current_throughput = rp as f64 / elapsed_to_report;
+                        let blended = (hist_throughput + current_throughput) / 2.0;
+                        let since_report = (now - rat).num_milliseconds().max(0) as f64;
+                        Some((rp as f64 + blended * since_report).min(0.99) as f32)
+                    }
+                    // No report yet — pure time-based extrapolation.
+                    _ => {
+                        let elapsed_ms = (now - started).num_milliseconds() as f64;
+                        Some((elapsed_ms * hist_throughput).min(0.99) as f32)
+                    }
+                }
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    } else {
+        None
+    };
+
+    // Best estimate: prefer reported, fall back to extrapolated, then 0.
+    let percent = reported.or(extrapolated).unwrap_or(0.0);
+
+    EstimatedProgress {
+        task_id: record.id,
+        task_type: record.task_type.clone(),
+        key: record.key.clone(),
+        reported_percent: reported,
+        extrapolated_percent: extrapolated,
+        percent,
+    }
+}
diff --git a/src/store.rs b/src/store.rs
new file mode 100644
index 0000000..12dd188
--- /dev/null
+++ b/src/store.rs
@@ -0,0 +1,1732 @@
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions, SqliteSynchronous};
+use sqlx::{Row, SqlitePool};
+
+use crate::priority::Priority;
+use crate::task::{
+    HistoryStatus, SubmitOutcome, TaskHistoryRecord, TaskLookup, TaskRecord, TaskResult,
+    TaskStatus, TaskSubmission, TypeStats, MAX_PAYLOAD_BYTES,
+};
+
+/// Serde-friendly error type for Tauri IPC and API boundaries.
+///
+/// Wraps the internal `sqlx::Error` into a serializable form so that
+/// callers do not need manual conversion at every call site.
+#[derive(Debug, Clone, Serialize, Deserialize, thiserror::Error)]
+pub enum StoreError {
+    #[error("payload exceeds maximum size of {MAX_PAYLOAD_BYTES} bytes")]
+    PayloadTooLarge,
+    #[error("serialization error: {0}")]
+    Serialization(String),
+    #[error("database error: {0}")]
+    Database(String),
+}
+
+impl From<sqlx::Error> for StoreError {
+    fn from(e: sqlx::Error) -> Self {
+        StoreError::Database(e.to_string())
+    }
+}
+
+impl From<serde_json::Error> for StoreError {
+    fn from(e: serde_json::Error) -> Self {
+        StoreError::Serialization(e.to_string())
+    }
+}
+
+/// History retention policy for automatic pruning of old records.
+///
+/// Applied during `complete()` and `fail()` to keep the `task_history`
+/// table bounded. Set to `None` to disable auto-pruning.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum RetentionPolicy {
+    /// Keep at most this many history records (oldest pruned first).
+    MaxCount(i64),
+    /// Keep records from the last N days.
+    MaxAgeDays(i64),
+}
+
+/// Configuration for the SQLite connection pool.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StoreConfig {
+    /// Maximum number of connections in the pool.
+    ///
+    /// Higher values reduce contention when multiple Tauri commands and
+    /// background tasks access the store concurrently. Setting this too
+    /// high on a single SQLite file provides diminishing returns since
+    /// SQLite serializes writes.
+    ///
+    /// Default: 16.
+    pub max_connections: u32,
+
+    /// Optional retention policy for automatic history pruning.
+    ///
+    /// When set, completed/failed tasks are pruned during `complete()` and
+    /// `fail()` to keep the history table bounded.
+    pub retention_policy: Option<RetentionPolicy>,
+
+    /// How many completions between automatic prune runs.
+    ///
+    /// Pruning runs once every `prune_interval` calls to `complete()` or
+    /// `fail()` instead of on every call. Default: 100.
+    pub prune_interval: u64,
+}
+
+impl Default for StoreConfig {
+    fn default() -> Self {
+        Self {
+            max_connections: 16,
+            retention_policy: None,
+            prune_interval: 100,
+        }
+    }
+}
+
+/// SQLite-backed persistence layer for the task queue and history.
+#[derive(Clone)]
+pub struct TaskStore {
+    pool: SqlitePool,
+    retention_policy: Option<RetentionPolicy>,
+    prune_interval: u64,
+    completion_count: std::sync::Arc<AtomicU64>,
+}
+
+impl TaskStore {
+    /// Open (or create) a taskmill database at the given path with default config.
+    pub async fn open(path: &str) -> Result<Self, StoreError> {
+        Self::open_with_config(path, StoreConfig::default()).await
+    }
+
+    /// Open (or create) a taskmill database at the given path with custom config.
+    pub async fn open_with_config(path: &str, config: StoreConfig) -> Result<Self, StoreError> {
+        let opts = SqliteConnectOptions::new()
+            .filename(path)
+            .create_if_missing(true)
+            .journal_mode(SqliteJournalMode::Wal)
+            .synchronous(SqliteSynchronous::Normal)
+            .busy_timeout(std::time::Duration::from_secs(5));
+
+        let pool = SqlitePoolOptions::new()
+            .max_connections(config.max_connections)
+            .connect_with(opts)
+            .await?;
+
+        let store = Self {
+            pool,
+            retention_policy: config.retention_policy,
+            prune_interval: config.prune_interval,
+            completion_count: std::sync::Arc::new(AtomicU64::new(0)),
+        };
+        store.migrate().await?;
+        store.recover_running().await?;
+        Ok(store)
+    }
+
+    /// Open an in-memory database (for testing).
+    pub async fn open_memory() -> Result<Self, StoreError> {
+        let opts = SqliteConnectOptions::new()
+            .filename(":memory:")
+            .journal_mode(SqliteJournalMode::Wal)
+            .synchronous(SqliteSynchronous::Normal)
+            .busy_timeout(std::time::Duration::from_secs(5));
+
+        let pool = SqlitePoolOptions::new()
+            .max_connections(1)
+            .connect_with(opts)
+            .await?;
+
+        let store = Self {
+            pool,
+            retention_policy: None,
+            prune_interval: 100,
+            completion_count: std::sync::Arc::new(AtomicU64::new(0)),
+        };
+        store.migrate().await?;
+        Ok(store)
+    }
+
+    /// Run the migration SQL.
+    async fn migrate(&self) -> Result<(), StoreError> {
+        sqlx::raw_sql(include_str!("../migrations/001_tasks.sql"))
+            .execute(&self.pool)
+            .await?;
+        Ok(())
+    }
+
+    /// Restart recovery: reset any `running` tasks back to `pending`.
+    async fn recover_running(&self) -> Result<(), StoreError> {
+        let result = sqlx::query(
+            "UPDATE tasks SET status = 'pending', started_at = NULL WHERE status = 'running'",
+        )
+        .execute(&self.pool)
+        .await?;
+        let count = result.rows_affected();
+        if count > 0 {
+            tracing::info!(count, "recovered interrupted tasks back to pending");
+        }
+        Ok(())
+    }
+
+    /// Get a reference to the underlying connection pool.
+    pub fn pool(&self) -> &SqlitePool {
+        &self.pool
+    }
+
+    /// Begin an IMMEDIATE transaction for write operations.
+    ///
+    /// Unlike `pool.begin()` which uses `BEGIN DEFERRED`, this acquires the
+    /// write lock upfront. This prevents deadlocks when multiple transactions
+    /// read-then-write concurrently — the busy_timeout is properly honored
+    /// instead of SQLite returning SQLITE_BUSY immediately.
+    ///
+    /// The returned connection auto-rollbacks on drop (sqlx resets pooled
+    /// connections with open transactions).
+    async fn begin_write(&self) -> Result<sqlx::pool::PoolConnection<sqlx::Sqlite>, StoreError> {
+        let mut conn = self.pool.acquire().await?;
+        sqlx::query("BEGIN IMMEDIATE").execute(&mut *conn).await?;
+        Ok(conn)
+    }
+
+    // ── Submit ──────────────────────────────────────────────────────
+
+    /// Submit a new task.
+    ///
+    /// Returns [`SubmitOutcome::Inserted`] if the task was enqueued,
+    /// [`SubmitOutcome::Upgraded`] if a duplicate existed but its priority
+    /// was upgraded, or [`SubmitOutcome::Duplicate`] if a duplicate existed
+    /// with equal or higher priority.
+    ///
+    /// When `sub.key` is `None`, the dedup key is auto-generated by hashing
+    /// the task type and payload.
+    pub async fn submit(&self, sub: &TaskSubmission) -> Result<SubmitOutcome, StoreError> {
+        if let Some(ref p) = sub.payload {
+            if p.len() > MAX_PAYLOAD_BYTES {
+                return Err(StoreError::PayloadTooLarge);
+            }
+        }
+
+        let key = sub.effective_key();
+        let priority = sub.priority.value() as i32;
+        tracing::debug!(task_type = %sub.task_type, "store.submit: INSERT start");
+        let result = sqlx::query(
+            "INSERT OR IGNORE INTO tasks (task_type, key, priority, payload, expected_read_bytes, expected_write_bytes)
+             VALUES (?, ?, ?, ?, ?, ?)",
+        )
+        .bind(&sub.task_type)
+        .bind(&key)
+        .bind(priority)
+        .bind(&sub.payload)
+        .bind(sub.expected_read_bytes)
+        .bind(sub.expected_write_bytes)
+        .execute(&self.pool)
+        .await?;
+        tracing::debug!(task_type = %sub.task_type, "store.submit: INSERT end");
+
+        if result.rows_affected() > 0 {
+            return Ok(SubmitOutcome::Inserted(result.last_insert_rowid()));
+        }
+
+        // Dedup hit — try to upgrade priority on pending/paused tasks.
+        // Lower numeric value = higher priority, so `priority > ?` means
+        // the existing task has lower importance than the new submission.
+        let row = sqlx::query(
+            "UPDATE tasks SET priority = ?
+             WHERE key = ? AND status IN ('pending', 'paused') AND priority > ?
+             RETURNING id",
+        )
+        .bind(priority)
+        .bind(&key)
+        .bind(priority)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        if let Some(r) = row {
+            return Ok(SubmitOutcome::Upgraded(r.get("id")));
+        }
+
+        // Dedup hit on running/paused task — mark for re-queue so the task
+        // runs again after the current execution completes.
+        let row = sqlx::query(
+            "UPDATE tasks SET requeue = 1, requeue_priority = ?
+             WHERE key = ? AND status IN ('running', 'paused')
+               AND (requeue = 0 OR requeue_priority > ?)
+             RETURNING id",
+        )
+        .bind(priority)
+        .bind(&key)
+        .bind(priority)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        match row {
+            Some(r) => Ok(SubmitOutcome::Requeued(r.get("id"))),
+            None => Ok(SubmitOutcome::Duplicate),
+        }
+    }
+
+    /// Submit multiple tasks in a single transaction. Returns a `Vec` with one
+    /// [`SubmitOutcome`] per input.
+    ///
+    /// This is significantly faster than calling [`submit`](Self::submit) in a
+    /// loop because all inserts share a single SQLite transaction (one
+    /// `BEGIN`/`COMMIT` pair instead of N implicit transactions).
+    pub async fn submit_batch(
+        &self,
+        submissions: &[TaskSubmission],
+    ) -> Result<Vec<SubmitOutcome>, StoreError> {
+        // Pre-validate all payloads before starting the transaction
+        // to avoid partial inserts on validation errors.
+        for sub in submissions {
+            if let Some(ref p) = sub.payload {
+                if p.len() > MAX_PAYLOAD_BYTES {
+                    return Err(StoreError::PayloadTooLarge);
+                }
+            }
+        }
+
+        let mut results = Vec::with_capacity(submissions.len());
+
+        let mut conn = self.begin_write().await?;
+
+        for sub in submissions {
+            let key = sub.effective_key();
+            let priority = sub.priority.value() as i32;
+            let result = sqlx::query(
+                "INSERT OR IGNORE INTO tasks (task_type, key, priority, payload, expected_read_bytes, expected_write_bytes)
+                 VALUES (?, ?, ?, ?, ?, ?)",
+            )
+            .bind(&sub.task_type)
+            .bind(&key)
+            .bind(priority)
+            .bind(&sub.payload)
+            .bind(sub.expected_read_bytes)
+            .bind(sub.expected_write_bytes)
+            .execute(&mut *conn)
+            .await?;
+
+            if result.rows_affected() > 0 {
+                results.push(SubmitOutcome::Inserted(result.last_insert_rowid()));
+            } else {
+                // Dedup hit — try to upgrade priority on pending/paused tasks.
+                let row = sqlx::query(
+                    "UPDATE tasks SET priority = ?
+                     WHERE key = ? AND status IN ('pending', 'paused') AND priority > ?
+                     RETURNING id",
+                )
+                .bind(priority)
+                .bind(&key)
+                .bind(priority)
+                .fetch_optional(&mut *conn)
+                .await?;
+
+                if let Some(r) = row {
+                    results.push(SubmitOutcome::Upgraded(r.get("id")));
+                } else {
+                    // Try requeue on running/paused tasks.
+                    let row = sqlx::query(
+                        "UPDATE tasks SET requeue = 1, requeue_priority = ?
+                         WHERE key = ? AND status IN ('running', 'paused')
+                           AND (requeue = 0 OR requeue_priority > ?)
+                         RETURNING id",
+                    )
+                    .bind(priority)
+                    .bind(&key)
+                    .bind(priority)
+                    .fetch_optional(&mut *conn)
+                    .await?;
+
+                    match row {
+                        Some(r) => results.push(SubmitOutcome::Requeued(r.get("id"))),
+                        None => results.push(SubmitOutcome::Duplicate),
+                    }
+                }
+            }
+        }
+
+        sqlx::query("COMMIT").execute(&mut *conn).await?;
+        Ok(results)
+    }
+
+    // ── Pop / lifecycle ─────────────────────────────────────────────
+
+    /// Peek at the highest-priority pending task without modifying it.
+    /// Returns `None` if the queue is empty.
+    pub async fn peek_next(&self) -> Result<Option<TaskRecord>, StoreError> {
+        let row = sqlx::query(
+            "SELECT * FROM tasks
+             WHERE status = 'pending'
+             ORDER BY priority ASC, id ASC
+             LIMIT 1",
+        )
+        .fetch_optional(&self.pool)
+        .await?;
+
+        Ok(row.as_ref().map(row_to_task_record))
+    }
+
+    /// Atomically claim a specific pending task by id, setting it to running.
+    /// Returns `None` if the task is no longer pending (e.g. claimed by another
+    /// dispatcher or cancelled).
+    pub async fn pop_by_id(&self, id: i64) -> Result<Option<TaskRecord>, StoreError> {
+        tracing::debug!(task_id = id, "store.pop_by_id: UPDATE start");
+        let row = sqlx::query(
+            "UPDATE tasks SET status = 'running', started_at = datetime('now')
+             WHERE id = ? AND status = 'pending'
+             RETURNING *",
+        )
+        .bind(id)
+        .fetch_optional(&self.pool)
+        .await?;
+        tracing::debug!(task_id = id, "store.pop_by_id: UPDATE end");
+
+        Ok(row.as_ref().map(row_to_task_record))
+    }
+
+    /// Pop the highest-priority pending task and mark it as running.
+    /// Returns `None` if the queue is empty.
+    pub async fn pop_next(&self) -> Result<Option<TaskRecord>, StoreError> {
+        // Single atomic statement: find + update + return.
+        let row = sqlx::query(
+            "UPDATE tasks SET status = 'running', started_at = datetime('now')
+             WHERE id = (
+                 SELECT id FROM tasks
+                 WHERE status = 'pending'
+                 ORDER BY priority ASC, id ASC
+                 LIMIT 1
+             )
+             RETURNING *",
+        )
+        .fetch_optional(&self.pool)
+        .await?;
+
+        Ok(row.map(|r| row_to_task_record(&r)))
+    }
+
+    /// Atomically requeue a running task back to pending.
+    ///
+    /// Used when a task is popped but then rejected by backpressure or IO
+    /// budget checks. Unlike pause+resume, this is a single atomic operation
+    /// that never puts the task in an intermediate state visible to queries.
+    pub async fn requeue(&self, id: i64) -> Result<(), StoreError> {
+        sqlx::query(
+            "UPDATE tasks SET status = 'pending', started_at = NULL WHERE id = ? AND status = 'running'",
+        )
+        .bind(id)
+        .execute(&self.pool)
+        .await?;
+        Ok(())
+    }
+
+    /// Mark a task as completed and move it to history.
+    pub async fn complete(&self, id: i64, result: &TaskResult) -> Result<(), StoreError> {
+        tracing::debug!(task_id = id, "store.complete: BEGIN tx");
+        let mut conn = self.begin_write().await?;
+
+        // Fetch the task to move.
+        let row = sqlx::query("SELECT * FROM tasks WHERE id = ?")
+            .bind(id)
+            .fetch_optional(&mut *conn)
+            .await?;
+
+        let Some(row) = row else { return Ok(()) };
+        let task = row_to_task_record(&row);
+
+        // Compute duration.
+        let duration_ms: Option<i64> = if task.started_at.is_some() {
+            sqlx::query_scalar(
+                "SELECT CAST((julianday('now') - julianday(?)) * 86400000 AS INTEGER)",
+            )
+            .bind(
+                task.started_at
+                    .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()),
+            )
+            .fetch_one(&mut *conn)
+            .await?
+        } else {
+            None
+        };
+
+        // Insert into history.
+        sqlx::query(
+            "INSERT INTO task_history (task_type, key, priority, status, payload,
+                expected_read_bytes, expected_write_bytes, actual_read_bytes, actual_write_bytes,
+                retry_count, last_error, created_at, started_at, duration_ms)
+             VALUES (?, ?, ?, 'completed', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+        )
+        .bind(&task.task_type)
+        .bind(&task.key)
+        .bind(task.priority.value() as i32)
+        .bind(&task.payload)
+        .bind(task.expected_read_bytes)
+        .bind(task.expected_write_bytes)
+        .bind(result.actual_read_bytes)
+        .bind(result.actual_write_bytes)
+        .bind(task.retry_count)
+        .bind(&task.last_error)
+        .bind(task.created_at.format("%Y-%m-%d %H:%M:%S").to_string())
+        .bind(
+            task.started_at
+                .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()),
+        )
+        .bind(duration_ms)
+        .execute(&mut *conn)
+        .await?;
+
+        if task.requeue {
+            // Requeue flag set — reset to pending with requeue_priority
+            // instead of removing from the active queue.
+            let requeue_priority = task
+                .requeue_priority
+                .map(|p| p.value() as i32)
+                .unwrap_or(task.priority.value() as i32);
+            sqlx::query(
+                "UPDATE tasks SET status = 'pending', priority = ?,
+                    started_at = NULL, retry_count = 0, last_error = NULL,
+                    requeue = 0, requeue_priority = NULL
+                 WHERE id = ?",
+            )
+            .bind(requeue_priority)
+            .bind(id)
+            .execute(&mut *conn)
+            .await?;
+        } else {
+            // Remove from active queue.
+            sqlx::query("DELETE FROM tasks WHERE id = ?")
+                .bind(id)
+                .execute(&mut *conn)
+                .await?;
+        }
+
+        sqlx::query("COMMIT").execute(&mut *conn).await?;
+        tracing::debug!(task_id = id, "store.complete: COMMIT ok");
+
+        self.maybe_prune().await;
+
+        Ok(())
+    }
+
+    /// Mark a task as failed. If `retryable` and under max retries, requeue
+    /// it as pending with the same priority. Otherwise move to history as failed.
+    pub async fn fail(
+        &self,
+        id: i64,
+        error: &str,
+        retryable: bool,
+        max_retries: i32,
+        actual_read_bytes: i64,
+        actual_write_bytes: i64,
+    ) -> Result<(), StoreError> {
+        tracing::debug!(task_id = id, "store.fail: BEGIN tx");
+        let mut conn = self.begin_write().await?;
+        tracing::debug!(task_id = id, "store.fail: BEGIN acquired");
+
+        let row = sqlx::query("SELECT * FROM tasks WHERE id = ?")
+            .bind(id)
+            .fetch_optional(&mut *conn)
+            .await?;
+
+        let Some(row) = row else { return Ok(()) };
+        let task = row_to_task_record(&row);
+
+        if retryable && task.retry_count < max_retries {
+            // Requeue with incremented retry count, same priority.
+            sqlx::query(
+                "UPDATE tasks SET status = 'pending', started_at = NULL,
+                    retry_count = retry_count + 1, last_error = ?
+                 WHERE id = ?",
+            )
+            .bind(error)
+            .bind(id)
+            .execute(&mut *conn)
+            .await?;
+        } else {
+            // Permanent failure — move to history.
+            let duration_ms: Option<i64> = if task.started_at.is_some() {
+                sqlx::query_scalar(
+                    "SELECT CAST((julianday('now') - julianday(?)) * 86400000 AS INTEGER)",
+                )
+                .bind(
+                    task.started_at
+                        .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()),
+                )
+                .fetch_one(&mut *conn)
+                .await?
+            } else {
+                None
+            };
+
+            sqlx::query(
+                "INSERT INTO task_history (task_type, key, priority, status, payload,
+                    expected_read_bytes, expected_write_bytes, actual_read_bytes, actual_write_bytes,
+                    retry_count, last_error, created_at, started_at, duration_ms)
+                 VALUES (?, ?, ?, 'failed', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            )
+            .bind(&task.task_type)
+            .bind(&task.key)
+            .bind(task.priority.value() as i32)
+            .bind(&task.payload)
+            .bind(task.expected_read_bytes)
+            .bind(task.expected_write_bytes)
+            .bind(actual_read_bytes)
+            .bind(actual_write_bytes)
+            .bind(task.retry_count + 1)
+            .bind(error)
+            .bind(task.created_at.format("%Y-%m-%d %H:%M:%S").to_string())
+            .bind(task.started_at.map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()))
+            .bind(duration_ms)
+            .execute(&mut *conn)
+            .await?;
+
+            sqlx::query("DELETE FROM tasks WHERE id = ?")
+                .bind(id)
+                .execute(&mut *conn)
+                .await?;
+        }
+
+        sqlx::query("COMMIT").execute(&mut *conn).await?;
+        tracing::debug!(task_id = id, "store.fail: COMMIT ok");
+
+        self.maybe_prune().await;
+
+        Ok(())
+    }
+
+    /// Pause a running task (for preemption). Sets status to paused.
+    pub async fn pause(&self, id: i64) -> Result<(), StoreError> {
+        sqlx::query("UPDATE tasks SET status = 'paused', started_at = NULL WHERE id = ?")
+            .bind(id)
+            .execute(&self.pool)
+            .await?;
+        Ok(())
+    }
+
+    /// Resume a paused task back to pending.
+    pub async fn resume(&self, id: i64) -> Result<(), StoreError> {
+        sqlx::query("UPDATE tasks SET status = 'pending' WHERE id = ? AND status = 'paused'")
+            .bind(id)
+            .execute(&self.pool)
+            .await?;
+        Ok(())
+    }
+
+    // ── Query: active queue ─────────────────────────────────────────
+
+    /// All currently running tasks.
+    pub async fn running_tasks(&self) -> Result<Vec<TaskRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM tasks WHERE status = 'running' ORDER BY priority ASC, id ASC",
+        )
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_task_record).collect())
+    }
+
+    /// Count of running tasks.
+    pub async fn running_count(&self) -> Result<i64, StoreError> {
+        let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM tasks WHERE status = 'running'")
+            .fetch_one(&self.pool)
+            .await?;
+        Ok(count.0)
+    }
+
+    /// Pending tasks, ordered by priority then age. Limit controls page size.
+    pub async fn pending_tasks(&self, limit: i32) -> Result<Vec<TaskRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM tasks WHERE status = 'pending' ORDER BY priority ASC, id ASC LIMIT ?",
+        )
+        .bind(limit)
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_task_record).collect())
+    }
+
+    /// Count of pending tasks.
+    pub async fn pending_count(&self) -> Result<i64, StoreError> {
+        let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM tasks WHERE status = 'pending'")
+            .fetch_one(&self.pool)
+            .await?;
+        Ok(count.0)
+    }
+
+    /// Pending tasks filtered by type.
+    pub async fn pending_by_type(&self, task_type: &str) -> Result<Vec<TaskRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM tasks WHERE status = 'pending' AND task_type = ? ORDER BY priority ASC, id ASC",
+        )
+        .bind(task_type)
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_task_record).collect())
+    }
+
+    /// Count of paused tasks.
+    pub async fn paused_count(&self) -> Result<i64, StoreError> {
+        let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM tasks WHERE status = 'paused'")
+            .fetch_one(&self.pool)
+            .await?;
+        Ok(count.0)
+    }
+
+    /// Paused tasks.
+    pub async fn paused_tasks(&self) -> Result<Vec<TaskRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM tasks WHERE status = 'paused' ORDER BY priority ASC, id ASC",
+        )
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_task_record).collect())
+    }
+
+    /// Look up an active task by its row id. Returns `None` if no active
+    /// task with that id exists.
+    pub async fn task_by_id(&self, id: i64) -> Result<Option<TaskRecord>, StoreError> {
+        let row = sqlx::query("SELECT * FROM tasks WHERE id = ?")
+            .bind(id)
+            .fetch_optional(&self.pool)
+            .await?;
+        Ok(row.as_ref().map(row_to_task_record))
+    }
+
+    /// Look up an active task by its dedup key. Returns `None` if no active
+    /// task with that key exists.
+    pub async fn task_by_key(&self, key: &str) -> Result<Option<TaskRecord>, StoreError> {
+        let row = sqlx::query("SELECT * FROM tasks WHERE key = ?")
+            .bind(key)
+            .fetch_optional(&self.pool)
+            .await?;
+        Ok(row.as_ref().map(row_to_task_record))
+    }
+
+    /// Sum of expected read/write bytes for all running tasks.
+    pub async fn running_io_totals(&self) -> Result<(i64, i64), StoreError> {
+        let row: (i64, i64) = sqlx::query_as(
+            "SELECT COALESCE(SUM(expected_read_bytes), 0), COALESCE(SUM(expected_write_bytes), 0)
+             FROM tasks WHERE status = 'running'",
+        )
+        .fetch_one(&self.pool)
+        .await?;
+        Ok(row)
+    }
+
+    // ── Query: history ──────────────────────────────────────────────
+
+    /// Look up a history record by its row id.
+    pub async fn history_by_id(&self, id: i64) -> Result<Option<TaskHistoryRecord>, StoreError> {
+        let row = sqlx::query("SELECT * FROM task_history WHERE id = ?")
+            .bind(id)
+            .fetch_optional(&self.pool)
+            .await?;
+        Ok(row.as_ref().map(row_to_history_record))
+    }
+
+    /// Recent history entries, newest first.
+    pub async fn history(
+        &self,
+        limit: i32,
+        offset: i32,
+    ) -> Result<Vec<TaskHistoryRecord>, StoreError> {
+        let rows =
+            sqlx::query("SELECT * FROM task_history ORDER BY completed_at DESC LIMIT ? OFFSET ?")
+                .bind(limit)
+                .bind(offset)
+                .fetch_all(&self.pool)
+                .await?;
+        Ok(rows.iter().map(row_to_history_record).collect())
+    }
+
+    /// History filtered by task type.
+    pub async fn history_by_type(
+        &self,
+        task_type: &str,
+        limit: i32,
+    ) -> Result<Vec<TaskHistoryRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM task_history WHERE task_type = ? ORDER BY completed_at DESC LIMIT ?",
+        )
+        .bind(task_type)
+        .bind(limit)
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_history_record).collect())
+    }
+
+    /// History for a specific key (all past runs of that key).
+    pub async fn history_by_key(&self, key: &str) -> Result<Vec<TaskHistoryRecord>, StoreError> {
+        let rows =
+            sqlx::query("SELECT * FROM task_history WHERE key = ? ORDER BY completed_at DESC")
+                .bind(key)
+                .fetch_all(&self.pool)
+                .await?;
+        Ok(rows.iter().map(row_to_history_record).collect())
+    }
+
+    /// Failed tasks from history.
+    pub async fn failed_tasks(&self, limit: i32) -> Result<Vec<TaskHistoryRecord>, StoreError> {
+        let rows = sqlx::query(
+            "SELECT * FROM task_history WHERE status = 'failed' ORDER BY completed_at DESC LIMIT ?",
+        )
+        .bind(limit)
+        .fetch_all(&self.pool)
+        .await?;
+        Ok(rows.iter().map(row_to_history_record).collect())
+    }
+
+    /// Aggregate stats for a task type from completed history.
+    pub async fn history_stats(&self, task_type: &str) -> Result<TypeStats, StoreError> {
+        let row = sqlx::query(
+            "SELECT
+                COUNT(*) as total,
+                COALESCE(AVG(CASE WHEN status = 'completed' THEN duration_ms END), 0.0) as avg_dur,
+                COALESCE(AVG(CASE WHEN status = 'completed' THEN actual_read_bytes END), 0.0) as avg_read,
+                COALESCE(AVG(CASE WHEN status = 'completed' THEN actual_write_bytes END), 0.0) as avg_write,
+                CAST(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) AS REAL) / MAX(COUNT(*), 1) as fail_rate
+             FROM task_history WHERE task_type = ?",
+        )
+        .bind(task_type)
+        .fetch_one(&self.pool)
+        .await?;
+
+        Ok(TypeStats {
+            count: row.get::<i64, _>("total"),
+            avg_duration_ms: row.get::<f64, _>("avg_dur"),
+            avg_read_bytes: row.get::<f64, _>("avg_read"),
+            avg_write_bytes: row.get::<f64, _>("avg_write"),
+            failure_rate: row.get::<f64, _>("fail_rate"),
+        })
+    }
+
+    /// Average IO throughput (bytes/sec) for recently completed tasks of a type.
+    /// Used by the scheduler for IO budget estimation.
+    pub async fn avg_throughput(
+        &self,
+        task_type: &str,
+        recent_limit: i32,
+    ) -> Result<(f64, f64), StoreError> {
+        let row: (f64, f64) = sqlx::query_as(
+            "SELECT
+                COALESCE(AVG(CASE WHEN duration_ms > 0 THEN actual_read_bytes * 1000.0 / duration_ms END), 0),
+                COALESCE(AVG(CASE WHEN duration_ms > 0 THEN actual_write_bytes * 1000.0 / duration_ms END), 0)
+             FROM (
+                 SELECT actual_read_bytes, actual_write_bytes, duration_ms
+                 FROM task_history
+                 WHERE task_type = ? AND status = 'completed' AND duration_ms > 0
+                 ORDER BY completed_at DESC
+                 LIMIT ?
+             )",
+        )
+        .bind(task_type)
+        .bind(recent_limit)
+        .fetch_one(&self.pool)
+        .await?;
+        Ok(row)
+    }
+
+    // ── Unified lookup ──────────────────────────────────────────────
+
+    /// Look up a task by its dedup key, checking the active queue first
+    /// and falling back to history.
+    ///
+    /// This is the low-level building block for [`Scheduler::task_lookup`].
+    /// The `key` parameter is the pre-computed SHA-256 dedup key (as
+    /// returned by [`generate_dedup_key`](crate::task::generate_dedup_key)
+    /// or [`TaskSubmission::effective_key`]).
+    pub async fn task_lookup(&self, key: &str) -> Result<TaskLookup, StoreError> {
+        // Check active queue first (pending / running / paused).
+        if let Some(record) = self.task_by_key(key).await? {
+            return Ok(TaskLookup::Active(record));
+        }
+
+        // Fall back to the most recent history entry.
+        let row = sqlx::query(
+            "SELECT * FROM task_history WHERE key = ? ORDER BY completed_at DESC LIMIT 1",
+        )
+        .bind(key)
+        .fetch_optional(&self.pool)
+        .await?;
+
+        match row {
+            Some(r) => Ok(TaskLookup::History(row_to_history_record(&r))),
+            None => Ok(TaskLookup::NotFound),
+        }
+    }
+
+    // ── Pruning ─────────────────────────────────────────────────────
+
+    /// Prune history records older than `max_age_days` days.
+    /// Returns the number of records deleted.
+    pub async fn prune_history_by_age(&self, max_age_days: i64) -> Result<u64, StoreError> {
+        let result =
+            sqlx::query("DELETE FROM task_history WHERE completed_at < datetime('now', ?)")
+                .bind(format!("-{max_age_days} days"))
+                .execute(&self.pool)
+                .await?;
+        Ok(result.rows_affected())
+    }
+
+    /// Prune history to keep at most `keep_latest` records.
+    /// Returns the number of records deleted.
+    pub async fn prune_history_by_count(&self, keep_latest: i64) -> Result<u64, StoreError> {
+        let result = sqlx::query(
+            "DELETE FROM task_history WHERE id NOT IN (
+                 SELECT id FROM task_history ORDER BY completed_at DESC LIMIT ?
+             )",
+        )
+        .bind(keep_latest)
+        .execute(&self.pool)
+        .await?;
+        Ok(result.rows_affected())
+    }
+
+    /// Increment the completion counter and prune every `prune_interval` completions.
+    /// Errors are logged rather than propagated since the task itself already committed.
+    async fn maybe_prune(&self) {
+        if self.retention_policy.is_none() {
+            return;
+        }
+        let count = self.completion_count.fetch_add(1, Ordering::Relaxed);
+        if count % self.prune_interval != 0 {
+            return;
+        }
+        if let Err(e) = self.auto_prune().await {
+            tracing::warn!("history prune failed: {e}");
+        }
+    }
+
+    /// Apply the configured retention policy, if any.
+    async fn auto_prune(&self) -> Result<(), StoreError> {
+        match &self.retention_policy {
+            Some(RetentionPolicy::MaxCount(n)) => {
+                self.prune_history_by_count(*n).await?;
+            }
+            Some(RetentionPolicy::MaxAgeDays(days)) => {
+                self.prune_history_by_age(*days).await?;
+            }
+            None => {}
+        }
+        Ok(())
+    }
+
+    /// Close the store and flush WAL.
+    pub async fn close(&self) {
+        // Consolidate the WAL file into the main database before closing.
+        if let Err(e) = sqlx::raw_sql("PRAGMA wal_checkpoint(TRUNCATE)")
+            .execute(&self.pool)
+            .await
+        {
+            tracing::warn!(error = %e, "WAL checkpoint failed during close");
+        }
+        self.pool.close().await;
+    }
+
+    /// Delete a task from the active queue by id. Returns true if a row was deleted.
+    pub async fn delete(&self, id: i64) -> Result<bool, StoreError> {
+        let result = sqlx::query("DELETE FROM tasks WHERE id = ?")
+            .bind(id)
+            .execute(&self.pool)
+            .await?;
+        Ok(result.rows_affected() > 0)
+    }
+}
+
+// ── Row mapping helpers ─────────────────────────────────────────────
+
+fn parse_datetime(s: &str) -> DateTime<Utc> {
+    // SQLite stores as "YYYY-MM-DD HH:MM:SS". Parse with chrono.
+    chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S")
+        .map(|ndt| ndt.and_utc())
+        .unwrap_or_default()
+}
+
+fn row_to_task_record(row: &sqlx::sqlite::SqliteRow) -> TaskRecord {
+    let priority_val: i32 = row.get("priority");
+    let status_str: String = row.get("status");
+    let created_at_str: String = row.get("created_at");
+    let started_at_str: Option<String> = row.get("started_at");
+
+    let requeue_val: i32 = row.get("requeue");
+    let requeue_priority_val: Option<i32> = row.get("requeue_priority");
+
+    TaskRecord {
+        id: row.get("id"),
+        task_type: row.get("task_type"),
+        key: row.get("key"),
+        priority: Priority::new(priority_val as u8),
+        status: status_str.parse().unwrap_or(TaskStatus::Pending),
+        payload: row.get("payload"),
+        expected_read_bytes: row.get("expected_read_bytes"),
+        expected_write_bytes: row.get("expected_write_bytes"),
+        retry_count: row.get("retry_count"),
+        last_error: row.get("last_error"),
+        created_at: parse_datetime(&created_at_str),
+        started_at: started_at_str.map(|s| parse_datetime(&s)),
+        requeue: requeue_val != 0,
+        requeue_priority: requeue_priority_val.map(|p| Priority::new(p as u8)),
+    }
+}
+
+fn row_to_history_record(row: &sqlx::sqlite::SqliteRow) -> TaskHistoryRecord {
+    let priority_val: i32 = row.get("priority");
+    let status_str: String = row.get("status");
+    let created_at_str: String = row.get("created_at");
+    let started_at_str: Option<String> = row.get("started_at");
+    let completed_at_str: String = row.get("completed_at");
+
+    TaskHistoryRecord {
+        id: row.get("id"),
+        task_type: row.get("task_type"),
+        key: row.get("key"),
+        priority: Priority::new(priority_val as u8),
+        status: status_str.parse().unwrap_or(HistoryStatus::Failed),
+        payload: row.get("payload"),
+        expected_read_bytes: row.get("expected_read_bytes"),
+        expected_write_bytes: row.get("expected_write_bytes"),
+        actual_read_bytes: row.get("actual_read_bytes"),
+        actual_write_bytes: row.get("actual_write_bytes"),
+        retry_count: row.get("retry_count"),
+        last_error: row.get("last_error"),
+        created_at: parse_datetime(&created_at_str),
+        started_at: started_at_str.map(|s| parse_datetime(&s)),
+        completed_at: parse_datetime(&completed_at_str),
+        duration_ms: row.get("duration_ms"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    async fn test_store() -> TaskStore {
+        TaskStore::open_memory().await.unwrap()
+    }
+
+    fn make_submission(key: &str, priority: Priority) -> TaskSubmission {
+        TaskSubmission {
+            task_type: "test".into(),
+            key: Some(key.into()),
+            priority,
+            payload: Some(b"hello".to_vec()),
+            expected_read_bytes: 1000,
+            expected_write_bytes: 500,
+        }
+    }
+
+    #[tokio::test]
+    async fn submit_and_pop() {
+        let store = test_store().await;
+        let sub = make_submission("job-1", Priority::NORMAL);
+        let expected_key = sub.effective_key();
+
+        let outcome = store.submit(&sub).await.unwrap();
+        assert!(outcome.is_inserted());
+
+        let task = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(task.key, expected_key);
+        assert_eq!(task.status, TaskStatus::Running);
+        assert!(task.started_at.is_some());
+    }
+
+    #[tokio::test]
+    async fn dedup_prevents_duplicate_key() {
+        let store = test_store().await;
+        let sub = make_submission("dup-key", Priority::NORMAL);
+
+        let first = store.submit(&sub).await.unwrap();
+        assert!(first.is_inserted());
+
+        let second = store.submit(&sub).await.unwrap();
+        assert_eq!(second, SubmitOutcome::Duplicate); // same priority → no upgrade
+    }
+
+    #[tokio::test]
+    async fn dedup_upgrades_priority() {
+        let store = test_store().await;
+
+        // Submit at NORMAL priority.
+        let sub_normal = make_submission("upgrade-me", Priority::NORMAL);
+        let first = store.submit(&sub_normal).await.unwrap();
+        assert!(first.is_inserted());
+
+        // Submit same key at HIGH priority — should upgrade.
+        let sub_high = make_submission("upgrade-me", Priority::HIGH);
+        let second = store.submit(&sub_high).await.unwrap();
+        assert!(matches!(second, SubmitOutcome::Upgraded(_)));
+
+        // Verify the stored priority was upgraded.
+        let key = sub_normal.effective_key();
+        let task = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(task.priority, Priority::HIGH);
+
+        // Submit at BACKGROUND (lower importance) — should not upgrade.
+        let sub_bg = make_submission("upgrade-me", Priority::BACKGROUND);
+        let third = store.submit(&sub_bg).await.unwrap();
+        assert_eq!(third, SubmitOutcome::Duplicate);
+
+        // Priority should still be HIGH.
+        let task = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(task.priority, Priority::HIGH);
+    }
+
+    #[tokio::test]
+    async fn dedup_requeues_when_running() {
+        let store = test_store().await;
+
+        // Submit and pop (transitions to running).
+        let sub = make_submission("running-task", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        // Submit same key at HIGH priority — should be Requeued since task is running.
+        let sub_high = make_submission("running-task", Priority::HIGH);
+        let outcome = store.submit(&sub_high).await.unwrap();
+        assert!(matches!(outcome, SubmitOutcome::Requeued(_)));
+
+        // Verify the requeue flag is set on the running task.
+        let key = sub.effective_key();
+        let running = store.task_by_key(&key).await.unwrap().unwrap();
+        assert!(running.requeue);
+        assert_eq!(running.requeue_priority, Some(Priority::HIGH));
+
+        // Complete the running task — should reset to pending with requeue_priority.
+        store
+            .complete(
+                task.id,
+                &TaskResult {
+                    actual_read_bytes: 0,
+                    actual_write_bytes: 0,
+                },
+            )
+            .await
+            .unwrap();
+
+        // Task should now be pending at HIGH priority.
+        let requeued = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(requeued.status, TaskStatus::Pending);
+        assert_eq!(requeued.priority, Priority::HIGH);
+        assert!(!requeued.requeue);
+        assert_eq!(requeued.requeue_priority, None);
+
+        // Pop should return it.
+        let popped = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(popped.id, task.id);
+    }
+
+    #[tokio::test]
+    async fn dedup_requeue_already_requeued_same_priority() {
+        let store = test_store().await;
+
+        let sub = make_submission("rq-dup", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+        store.pop_next().await.unwrap();
+
+        // First requeue at HIGH.
+        let sub_high = make_submission("rq-dup", Priority::HIGH);
+        let outcome = store.submit(&sub_high).await.unwrap();
+        assert!(matches!(outcome, SubmitOutcome::Requeued(_)));
+
+        // Second requeue at same priority — should be Duplicate.
+        let outcome2 = store.submit(&sub_high).await.unwrap();
+        assert_eq!(outcome2, SubmitOutcome::Duplicate);
+    }
+
+    #[tokio::test]
+    async fn dedup_requeue_upgrades_priority() {
+        let store = test_store().await;
+
+        let sub = make_submission("rq-upgrade", Priority::BACKGROUND);
+        store.submit(&sub).await.unwrap();
+        store.pop_next().await.unwrap();
+
+        // First requeue at NORMAL.
+        let sub_normal = make_submission("rq-upgrade", Priority::NORMAL);
+        let outcome = store.submit(&sub_normal).await.unwrap();
+        assert!(matches!(outcome, SubmitOutcome::Requeued(_)));
+
+        // Second requeue at HIGH — should upgrade requeue_priority.
+        let sub_high = make_submission("rq-upgrade", Priority::HIGH);
+        let outcome2 = store.submit(&sub_high).await.unwrap();
+        assert!(matches!(outcome2, SubmitOutcome::Requeued(_)));
+
+        let key = sub.effective_key();
+        let task = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(task.requeue_priority, Some(Priority::HIGH));
+    }
+
+    #[tokio::test]
+    async fn permanent_failure_drops_requeue() {
+        let store = test_store().await;
+
+        let sub = make_submission("fail-rq", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        // Mark for requeue.
+        let sub_high = make_submission("fail-rq", Priority::HIGH);
+        store.submit(&sub_high).await.unwrap();
+
+        // Permanent failure — requeue flag is dropped.
+        store.fail(task.id, "boom", false, 0, 0, 0).await.unwrap();
+
+        // Key should be free for reuse.
+        let outcome = store.submit(&sub).await.unwrap();
+        assert!(outcome.is_inserted());
+    }
+
+    #[tokio::test]
+    async fn dedup_allows_same_key_different_types() {
+        let store = test_store().await;
+
+        let sub_a = TaskSubmission {
+            task_type: "type_a".into(),
+            key: Some("shared-key".into()),
+            priority: Priority::NORMAL,
+            payload: None,
+            expected_read_bytes: 0,
+            expected_write_bytes: 0,
+        };
+        let sub_b = TaskSubmission {
+            task_type: "type_b".into(),
+            key: Some("shared-key".into()),
+            priority: Priority::NORMAL,
+            payload: None,
+            expected_read_bytes: 0,
+            expected_write_bytes: 0,
+        };
+
+        let first = store.submit(&sub_a).await.unwrap();
+        assert!(first.is_inserted());
+
+        // Same logical key, different task type — should NOT dedup.
+        let second = store.submit(&sub_b).await.unwrap();
+        assert!(second.is_inserted());
+    }
+
+    #[tokio::test]
+    async fn dedup_by_payload_when_no_key() {
+        let store = test_store().await;
+
+        let sub = TaskSubmission {
+            task_type: "ingest".into(),
+            key: None,
+            priority: Priority::NORMAL,
+            payload: Some(b"same-data".to_vec()),
+            expected_read_bytes: 0,
+            expected_write_bytes: 0,
+        };
+
+        let first = store.submit(&sub).await.unwrap();
+        assert!(first.is_inserted());
+
+        // Same type + payload → dedup.
+        let second = store.submit(&sub).await.unwrap();
+        assert_eq!(second, SubmitOutcome::Duplicate);
+
+        // Different payload → no dedup.
+        let sub2 = TaskSubmission {
+            payload: Some(b"different-data".to_vec()),
+            ..sub.clone()
+        };
+        let third = store.submit(&sub2).await.unwrap();
+        assert!(third.is_inserted());
+    }
+
+    #[tokio::test]
+    async fn priority_ordering() {
+        let store = test_store().await;
+
+        let bg = make_submission("bg", Priority::BACKGROUND);
+        let rt = make_submission("rt", Priority::REALTIME);
+        let normal = make_submission("normal", Priority::NORMAL);
+
+        let bg_key = bg.effective_key();
+        let rt_key = rt.effective_key();
+        let normal_key = normal.effective_key();
+
+        store.submit(&bg).await.unwrap();
+        store.submit(&rt).await.unwrap();
+        store.submit(&normal).await.unwrap();
+
+        let first = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(first.key, rt_key);
+
+        let second = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(second.key, normal_key);
+
+        let third = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(third.key, bg_key);
+    }
+
+    #[tokio::test]
+    async fn complete_moves_to_history() {
+        let store = test_store().await;
+        let sub = make_submission("done", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        store
+            .complete(
+                task.id,
+                &TaskResult {
+                    actual_read_bytes: 2000,
+                    actual_write_bytes: 1000,
+                },
+            )
+            .await
+            .unwrap();
+
+        // Should be gone from active queue.
+        assert!(store.task_by_key(&key).await.unwrap().is_none());
+
+        // Should be in history.
+        let hist = store.history_by_key(&key).await.unwrap();
+        assert_eq!(hist.len(), 1);
+        assert_eq!(hist[0].status, HistoryStatus::Completed);
+        assert_eq!(hist[0].actual_read_bytes, Some(2000));
+    }
+
+    #[tokio::test]
+    async fn fail_retryable_requeues() {
+        let store = test_store().await;
+        let sub = make_submission("retry-me", Priority::HIGH);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        store
+            .fail(task.id, "transient error", true, 3, 0, 0)
+            .await
+            .unwrap();
+
+        // Should still be in active queue as pending with retry_count=1.
+        let requeued = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(requeued.status, TaskStatus::Pending);
+        assert_eq!(requeued.retry_count, 1);
+        assert_eq!(requeued.last_error.as_deref(), Some("transient error"));
+    }
+
+    #[tokio::test]
+    async fn fail_exhausted_retries_moves_to_history() {
+        let store = test_store().await;
+        let sub = make_submission("permanent", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        // First fail: retry_count 0 < 1, requeued with retry_count=1.
+        store.fail(task.id, "err1", true, 1, 0, 0).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(task.retry_count, 1);
+        // Second fail: retry_count 1 >= max_retries 1, moves to history.
+        store.fail(task.id, "err2", true, 1, 100, 50).await.unwrap();
+
+        // Should be in history now.
+        assert!(store.task_by_key(&key).await.unwrap().is_none());
+        let hist = store.failed_tasks(10).await.unwrap();
+        assert_eq!(hist.len(), 1);
+        assert_eq!(hist[0].status, HistoryStatus::Failed);
+    }
+
+    #[tokio::test]
+    async fn payload_size_limit() {
+        let store = test_store().await;
+        let mut sub = make_submission("big", Priority::NORMAL);
+        sub.payload = Some(vec![0u8; MAX_PAYLOAD_BYTES + 1]);
+
+        let err = store.submit(&sub).await.unwrap_err();
+        assert!(matches!(err, StoreError::PayloadTooLarge));
+    }
+
+    #[tokio::test]
+    async fn running_io_totals() {
+        let store = test_store().await;
+
+        let mut sub = make_submission("io-1", Priority::NORMAL);
+        sub.expected_read_bytes = 5000;
+        sub.expected_write_bytes = 2000;
+        store.submit(&sub).await.unwrap();
+
+        let mut sub2 = make_submission("io-2", Priority::NORMAL);
+        sub2.expected_read_bytes = 3000;
+        sub2.expected_write_bytes = 1000;
+        store.submit(&sub2).await.unwrap();
+
+        // Pop both so they're running.
+        store.pop_next().await.unwrap();
+        store.pop_next().await.unwrap();
+
+        let (read, write) = store.running_io_totals().await.unwrap();
+        assert_eq!(read, 8000);
+        assert_eq!(write, 3000);
+    }
+
+    #[tokio::test]
+    async fn key_freed_after_completion() {
+        let store = test_store().await;
+        let sub = make_submission("reuse", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+        store
+            .complete(
+                task.id,
+                &TaskResult {
+                    actual_read_bytes: 0,
+                    actual_write_bytes: 0,
+                },
+            )
+            .await
+            .unwrap();
+
+        // Key should be free for reuse.
+        let outcome = store.submit(&sub).await.unwrap();
+        assert!(outcome.is_inserted());
+    }
+
+    #[tokio::test]
+    async fn history_stats_computation() {
+        let store = test_store().await;
+
+        // Complete a few tasks.
+        for i in 0..3 {
+            let sub = make_submission(&format!("stat-{i}"), Priority::NORMAL);
+            store.submit(&sub).await.unwrap();
+            let task = store.pop_next().await.unwrap().unwrap();
+            store
+                .complete(
+                    task.id,
+                    &TaskResult {
+                        actual_read_bytes: 1000,
+                        actual_write_bytes: 500,
+                    },
+                )
+                .await
+                .unwrap();
+        }
+
+        let stats = store.history_stats("test").await.unwrap();
+        assert_eq!(stats.count, 3);
+        assert!(stats.failure_rate == 0.0);
+    }
+
+    #[tokio::test]
+    async fn pause_and_resume() {
+        let store = test_store().await;
+        store
+            .submit(&make_submission("pausable", Priority::NORMAL))
+            .await
+            .unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        store.pause(task.id).await.unwrap();
+        let paused = store.paused_tasks().await.unwrap();
+        assert_eq!(paused.len(), 1);
+        assert_eq!(paused[0].status, TaskStatus::Paused);
+
+        store.resume(task.id).await.unwrap();
+        let pending = store.pending_tasks(10).await.unwrap();
+        assert_eq!(pending.len(), 1);
+        assert_eq!(pending[0].status, TaskStatus::Pending);
+    }
+
+    #[tokio::test]
+    async fn open_with_custom_config() {
+        let store = TaskStore::open_memory().await.unwrap();
+        // Basic smoke test — store is usable.
+        let count = store.pending_count().await.unwrap();
+        assert_eq!(count, 0);
+    }
+
+    #[tokio::test]
+    async fn delete_task() {
+        let store = test_store().await;
+        let sub = make_submission("del-me", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+
+        let task = store.task_by_key(&key).await.unwrap().unwrap();
+        assert!(store.delete(task.id).await.unwrap());
+        assert!(store.task_by_key(&key).await.unwrap().is_none());
+
+        // Deleting again returns false.
+        assert!(!store.delete(task.id).await.unwrap());
+    }
+
+    #[tokio::test]
+    async fn task_by_id_lookup() {
+        let store = test_store().await;
+        let sub = make_submission("by-id", Priority::NORMAL);
+        let id = store.submit(&sub).await.unwrap().id().unwrap();
+
+        let task = store.task_by_id(id).await.unwrap().unwrap();
+        assert_eq!(task.id, id);
+        assert_eq!(task.key, sub.effective_key());
+
+        // Non-existent id returns None.
+        assert!(store.task_by_id(9999).await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn history_by_id_lookup() {
+        let store = test_store().await;
+        let sub = make_submission("hist-id", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        store
+            .complete(
+                task.id,
+                &TaskResult {
+                    actual_read_bytes: 100,
+                    actual_write_bytes: 50,
+                },
+            )
+            .await
+            .unwrap();
+
+        // Fetch from history by key to get the history id.
+        let hist = store.history_by_key(&sub.effective_key()).await.unwrap();
+        assert_eq!(hist.len(), 1);
+        let hist_id = hist[0].id;
+
+        let record = store.history_by_id(hist_id).await.unwrap().unwrap();
+        assert_eq!(record.key, sub.effective_key());
+        assert_eq!(record.actual_read_bytes, Some(100));
+
+        // Non-existent id returns None.
+        assert!(store.history_by_id(9999).await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn requeue_running_task() {
+        let store = test_store().await;
+        let sub = make_submission("rq", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+        assert_eq!(task.status, TaskStatus::Running);
+
+        store.requeue(task.id).await.unwrap();
+        let t = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(t.status, TaskStatus::Pending);
+        assert!(t.started_at.is_none());
+    }
+
+    #[tokio::test]
+    async fn peek_next_does_not_modify_status() {
+        let store = test_store().await;
+        let sub = make_submission("peek-me", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+
+        // Peek should return the task but leave it pending.
+        let peeked = store.peek_next().await.unwrap().unwrap();
+        assert_eq!(peeked.key, key);
+        assert_eq!(peeked.status, TaskStatus::Pending);
+
+        // Verify it's still pending in the store.
+        let t = store.task_by_key(&key).await.unwrap().unwrap();
+        assert_eq!(t.status, TaskStatus::Pending);
+        assert!(t.started_at.is_none());
+
+        // Peeking again returns the same task.
+        let peeked2 = store.peek_next().await.unwrap().unwrap();
+        assert_eq!(peeked2.id, peeked.id);
+    }
+
+    #[tokio::test]
+    async fn peek_next_empty_queue() {
+        let store = test_store().await;
+        assert!(store.peek_next().await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn pop_by_id_claims_pending_task() {
+        let store = test_store().await;
+        let sub = make_submission("claim-me", Priority::NORMAL);
+        let key = sub.effective_key();
+        let id = store.submit(&sub).await.unwrap().id().unwrap();
+
+        let task = store.pop_by_id(id).await.unwrap().unwrap();
+        assert_eq!(task.key, key);
+        assert_eq!(task.status, TaskStatus::Running);
+        assert!(task.started_at.is_some());
+    }
+
+    #[tokio::test]
+    async fn pop_by_id_returns_none_if_already_running() {
+        let store = test_store().await;
+        let sub = make_submission("already-taken", Priority::NORMAL);
+        store.submit(&sub).await.unwrap();
+
+        // Pop via pop_next first.
+        let task = store.pop_next().await.unwrap().unwrap();
+
+        // pop_by_id on the same task should return None (already running).
+        assert!(store.pop_by_id(task.id).await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn pop_by_id_returns_none_for_nonexistent() {
+        let store = test_store().await;
+        assert!(store.pop_by_id(9999).await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn peek_then_pop_by_id_workflow() {
+        let store = test_store().await;
+        let sub = make_submission("peek-pop", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+
+        // Peek, then claim.
+        let peeked = store.peek_next().await.unwrap().unwrap();
+        let claimed = store.pop_by_id(peeked.id).await.unwrap().unwrap();
+        assert_eq!(claimed.key, key);
+        assert_eq!(claimed.status, TaskStatus::Running);
+
+        // Queue should now be empty for peek.
+        assert!(store.peek_next().await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn prune_by_count() {
+        let store = test_store().await;
+
+        // Complete 5 tasks.
+        for i in 0..5 {
+            let sub = make_submission(&format!("prune-{i}"), Priority::NORMAL);
+            store.submit(&sub).await.unwrap();
+            let task = store.pop_next().await.unwrap().unwrap();
+            store
+                .complete(
+                    task.id,
+                    &TaskResult {
+                        actual_read_bytes: 0,
+                        actual_write_bytes: 0,
+                    },
+                )
+                .await
+                .unwrap();
+        }
+
+        let hist = store.history(100, 0).await.unwrap();
+        assert_eq!(hist.len(), 5);
+
+        let deleted = store.prune_history_by_count(3).await.unwrap();
+        assert_eq!(deleted, 2);
+
+        let hist = store.history(100, 0).await.unwrap();
+        assert_eq!(hist.len(), 3);
+    }
+
+    #[tokio::test]
+    async fn submit_batch_inserts_all() {
+        let store = test_store().await;
+        let subs: Vec<_> = (0..5)
+            .map(|i| make_submission(&format!("batch-{i}"), Priority::NORMAL))
+            .collect();
+
+        let results = store.submit_batch(&subs).await.unwrap();
+        assert_eq!(results.len(), 5);
+        assert!(results.iter().all(|r| r.is_inserted()));
+
+        let count = store.pending_count().await.unwrap();
+        assert_eq!(count, 5);
+    }
+
+    #[tokio::test]
+    async fn submit_batch_dedup() {
+        let store = test_store().await;
+        let sub = make_submission("dup", Priority::NORMAL);
+
+        let results = store
+            .submit_batch(&[sub.clone(), sub.clone()])
+            .await
+            .unwrap();
+        assert!(results[0].is_inserted());
+        assert_eq!(results[1], SubmitOutcome::Duplicate); // dedup within same batch
+
+        // Submitting again should also dedup.
+        let results = store.submit_batch(&[sub]).await.unwrap();
+        assert_eq!(results[0], SubmitOutcome::Duplicate);
+    }
+
+    #[tokio::test]
+    async fn submit_batch_empty() {
+        let store = test_store().await;
+        let results = store.submit_batch(&[]).await.unwrap();
+        assert!(results.is_empty());
+    }
+
+    #[tokio::test]
+    async fn task_lookup_active() {
+        let store = test_store().await;
+        let sub = make_submission("lookup-active", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+
+        let result = store.task_lookup(&key).await.unwrap();
+        assert!(matches!(result, TaskLookup::Active(ref r) if r.status == TaskStatus::Pending));
+
+        // Pop so it's running.
+        store.pop_next().await.unwrap();
+        let result = store.task_lookup(&key).await.unwrap();
+        assert!(matches!(result, TaskLookup::Active(ref r) if r.status == TaskStatus::Running));
+    }
+
+    #[tokio::test]
+    async fn task_lookup_history() {
+        let store = test_store().await;
+        let sub = make_submission("lookup-hist", Priority::NORMAL);
+        let key = sub.effective_key();
+        store.submit(&sub).await.unwrap();
+        let task = store.pop_next().await.unwrap().unwrap();
+        store
+            .complete(
+                task.id,
+                &TaskResult {
+                    actual_read_bytes: 0,
+                    actual_write_bytes: 0,
+                },
+            )
+            .await
+            .unwrap();
+
+        let result = store.task_lookup(&key).await.unwrap();
+        assert!(
+            matches!(result, TaskLookup::History(ref r) if r.status == HistoryStatus::Completed)
+        );
+    }
+
+    #[tokio::test]
+    async fn task_lookup_not_found() {
+        let store = test_store().await;
+        let key = crate::task::generate_dedup_key("nope", Some(b"nope"));
+        let result = store.task_lookup(&key).await.unwrap();
+        assert!(matches!(result, TaskLookup::NotFound));
+    }
+
+    #[tokio::test]
+    async fn submit_batch_rejects_oversized_payload() {
+        let store = test_store().await;
+        let sub = make_submission("ok", Priority::NORMAL);
+        let big = TaskSubmission {
+            task_type: "test".into(),
+            key: Some("big".into()),
+            priority: Priority::NORMAL,
+            payload: Some(vec![0u8; MAX_PAYLOAD_BYTES + 1]),
+            expected_read_bytes: 0,
+            expected_write_bytes: 0,
+        };
+
+        // The oversized payload should fail the entire batch — no partial inserts.
+        let err = store.submit_batch(&[sub.clone(), big]).await.unwrap_err();
+        assert!(matches!(err, StoreError::PayloadTooLarge));
+
+        // The first task should NOT have been committed (transaction rolled back).
+        let count = store.pending_count().await.unwrap();
+        assert_eq!(count, 0);
+    }
+}
diff --git a/src/task.rs b/src/task.rs
new file mode 100644
index 0000000..c75d13c
--- /dev/null
+++ b/src/task.rs
@@ -0,0 +1,403 @@
+use chrono::{DateTime, Utc};
+use serde::de::DeserializeOwned;
+use serde::{Deserialize, Serialize};
+use sha2::{Digest, Sha256};
+
+use crate::priority::Priority;
+
+/// Maximum payload size in bytes (1 MiB).
+pub const MAX_PAYLOAD_BYTES: usize = 1_048_576;
+
+/// Lifecycle state of a task in the active queue.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum TaskStatus {
+    Pending,
+    Running,
+    Paused,
+}
+
+impl TaskStatus {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Pending => "pending",
+            Self::Running => "running",
+            Self::Paused => "paused",
+        }
+    }
+}
+
+impl std::str::FromStr for TaskStatus {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "pending" => Ok(Self::Pending),
+            "running" => Ok(Self::Running),
+            "paused" => Ok(Self::Paused),
+            other => Err(format!("unknown TaskStatus: {other}")),
+        }
+    }
+}
+
+/// Terminal state of a task in history.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum HistoryStatus {
+    Completed,
+    Failed,
+}
+
+impl HistoryStatus {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Completed => "completed",
+            Self::Failed => "failed",
+        }
+    }
+}
+
+impl std::str::FromStr for HistoryStatus {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "completed" => Ok(Self::Completed),
+            "failed" => Ok(Self::Failed),
+            other => Err(format!("unknown HistoryStatus: {other}")),
+        }
+    }
+}
+
+/// A task in the active queue (pending, running, or paused).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskRecord {
+    pub id: i64,
+    pub task_type: String,
+    pub key: String,
+    pub priority: Priority,
+    pub status: TaskStatus,
+    pub payload: Option<Vec<u8>>,
+    pub expected_read_bytes: i64,
+    pub expected_write_bytes: i64,
+    pub retry_count: i32,
+    pub last_error: Option<String>,
+    pub created_at: DateTime<Utc>,
+    pub started_at: Option<DateTime<Utc>>,
+    pub requeue: bool,
+    pub requeue_priority: Option<Priority>,
+}
+
+impl TaskRecord {
+    /// Deserialize the payload blob into a typed value.
+    ///
+    /// Returns `None` if the payload is absent, or an error if deserialization fails.
+    pub fn deserialize_payload<T: serde::de::DeserializeOwned>(
+        &self,
+    ) -> Result<Option<T>, serde_json::Error> {
+        match &self.payload {
+            Some(bytes) => serde_json::from_slice(bytes).map(Some),
+            None => Ok(None),
+        }
+    }
+}
+
+/// A task that has completed or permanently failed.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskHistoryRecord {
+    pub id: i64,
+    pub task_type: String,
+    pub key: String,
+    pub priority: Priority,
+    pub status: HistoryStatus,
+    pub payload: Option<Vec<u8>>,
+    pub expected_read_bytes: i64,
+    pub expected_write_bytes: i64,
+    pub actual_read_bytes: Option<i64>,
+    pub actual_write_bytes: Option<i64>,
+    pub retry_count: i32,
+    pub last_error: Option<String>,
+    pub created_at: DateTime<Utc>,
+    pub started_at: Option<DateTime<Utc>>,
+    pub completed_at: DateTime<Utc>,
+    pub duration_ms: Option<i64>,
+}
+
+/// Reported by the executor on successful completion.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskResult {
+    pub actual_read_bytes: i64,
+    pub actual_write_bytes: i64,
+}
+
+/// Reported by the executor on failure.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskError {
+    pub message: String,
+    pub retryable: bool,
+    pub actual_read_bytes: i64,
+    pub actual_write_bytes: i64,
+}
+
+impl std::fmt::Display for TaskError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.message)
+    }
+}
+
+impl std::error::Error for TaskError {}
+
+/// Result of a task submission attempt.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SubmitOutcome {
+    /// Task was inserted as new.
+    Inserted(i64),
+    /// Duplicate key existed; its priority was upgraded (pending/paused tasks only).
+    Upgraded(i64),
+    /// Duplicate key existed and is running/paused; marked for re-queue after completion.
+    Requeued(i64),
+    /// Duplicate key existed; no changes were made.
+    Duplicate,
+}
+
+impl SubmitOutcome {
+    /// Returns the task ID if the task was inserted, upgraded, or requeued.
+    pub fn id(&self) -> Option<i64> {
+        match self {
+            Self::Inserted(id) | Self::Upgraded(id) | Self::Requeued(id) => Some(*id),
+            Self::Duplicate => None,
+        }
+    }
+
+    /// Returns `true` if a new task was inserted.
+    pub fn is_inserted(&self) -> bool {
+        matches!(self, Self::Inserted(_))
+    }
+}
+
+/// Generate a dedup key by hashing the task type and payload.
+///
+/// Produces a hex-encoded SHA-256 digest of `task_type` concatenated with
+/// the payload bytes (or an empty slice when there is no payload).
+pub fn generate_dedup_key(task_type: &str, payload: Option<&[u8]>) -> String {
+    let mut hasher = Sha256::new();
+    hasher.update(task_type.as_bytes());
+    hasher.update(b":");
+    if let Some(p) = payload {
+        hasher.update(p);
+    }
+    format!("{:x}", hasher.finalize())
+}
+
+/// Parameters for submitting a new task.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TaskSubmission {
+    pub task_type: String,
+    /// Optional dedup key. When `None`, the key is auto-generated by hashing
+    /// `task_type` and `payload`, so two submissions with the same type and
+    /// payload are deduplicated automatically.
+    pub key: Option<String>,
+    pub priority: Priority,
+    pub payload: Option<Vec<u8>>,
+    pub expected_read_bytes: i64,
+    pub expected_write_bytes: i64,
+}
+
+impl TaskSubmission {
+    /// Resolve the effective dedup key. Always incorporates the task type
+    /// so different task types never collide, even with the same logical key.
+    ///
+    /// - Explicit key: `hash(task_type + ":" + key)`
+    /// - No key: `hash(task_type + ":" + payload)`
+    pub fn effective_key(&self) -> String {
+        match &self.key {
+            Some(k) => generate_dedup_key(&self.task_type, Some(k.as_bytes())),
+            None => generate_dedup_key(&self.task_type, self.payload.as_deref()),
+        }
+    }
+
+    /// Create a submission with a typed payload serialized to JSON bytes.
+    ///
+    /// The dedup key is auto-generated from the task type and serialized payload.
+    /// Use `TaskRecord::deserialize_payload()` on the executor side to recover the type.
+    pub fn with_payload<T: serde::Serialize>(
+        task_type: &str,
+        priority: Priority,
+        data: &T,
+        expected_read_bytes: i64,
+        expected_write_bytes: i64,
+    ) -> Result<Self, serde_json::Error> {
+        let payload = serde_json::to_vec(data)?;
+        Ok(Self {
+            task_type: task_type.to_string(),
+            key: None,
+            priority,
+            payload: Some(payload),
+            expected_read_bytes,
+            expected_write_bytes,
+        })
+    }
+}
+
+/// A strongly-typed task that bundles serialization, task type name, and default
+/// IO estimates.
+///
+/// Implementing this trait collapses the 6 fields of [`TaskSubmission`] into a
+/// derive-friendly pattern. Use [`Scheduler::submit_typed`] to submit and
+/// [`TaskContext::deserialize_typed`] on the executor side.
+///
+/// # Example
+///
+/// ```ignore
+/// use serde::{Serialize, Deserialize};
+/// use taskmill::{TypedTask, Priority};
+///
+/// #[derive(Serialize, Deserialize)]
+/// struct Thumbnail { path: String, size: u32 }
+///
+/// impl TypedTask for Thumbnail {
+///     const TASK_TYPE: &'static str = "thumbnail";
+///     fn expected_read_bytes(&self) -> i64 { 4096 }
+///     fn expected_write_bytes(&self) -> i64 { 1024 }
+/// }
+/// ```
+pub trait TypedTask: Serialize + DeserializeOwned + Send + 'static {
+    /// Unique name used to register and look up the executor.
+    const TASK_TYPE: &'static str;
+
+    /// Estimated bytes this task will read. Default: 0.
+    fn expected_read_bytes(&self) -> i64 {
+        0
+    }
+
+    /// Estimated bytes this task will write. Default: 0.
+    fn expected_write_bytes(&self) -> i64 {
+        0
+    }
+
+    /// Scheduling priority. Default: [`Priority::NORMAL`].
+    fn priority(&self) -> Priority {
+        Priority::NORMAL
+    }
+}
+
+impl TaskSubmission {
+    /// Create a submission from a [`TypedTask`], serializing the payload and
+    /// pulling task type, priority, and IO estimates from the trait.
+    pub fn from_typed<T: TypedTask>(task: &T) -> Result<Self, serde_json::Error> {
+        let payload = serde_json::to_vec(task)?;
+        Ok(Self {
+            task_type: T::TASK_TYPE.to_string(),
+            key: None,
+            priority: task.priority(),
+            payload: Some(payload),
+            expected_read_bytes: task.expected_read_bytes(),
+            expected_write_bytes: task.expected_write_bytes(),
+        })
+    }
+}
+
+/// Unified lookup result for querying a task by its dedup inputs.
+///
+/// Returned by [`TaskStore::task_lookup`] and [`Scheduler::task_lookup`].
+/// Tells the caller whether a task is currently active (pending, running,
+/// or paused) or has finished (completed or failed), without requiring
+/// them to manually compute the dedup key or query two tables.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "location", content = "record")]
+pub enum TaskLookup {
+    /// Task is in the active queue (pending, running, or paused).
+    Active(TaskRecord),
+    /// Task has finished and is in the history table.
+    /// Contains the most recent history entry for that key.
+    History(TaskHistoryRecord),
+    /// No task with this key exists in either table.
+    NotFound,
+}
+
+/// Aggregate statistics for a task type from history.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct TypeStats {
+    pub count: i64,
+    pub avg_duration_ms: f64,
+    pub avg_read_bytes: f64,
+    pub avg_write_bytes: f64,
+    pub failure_rate: f64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[derive(Serialize, Deserialize, Debug, PartialEq)]
+    struct Thumbnail {
+        path: String,
+        size: u32,
+    }
+
+    impl TypedTask for Thumbnail {
+        const TASK_TYPE: &'static str = "thumbnail";
+
+        fn expected_read_bytes(&self) -> i64 {
+            4096
+        }
+
+        fn expected_write_bytes(&self) -> i64 {
+            1024
+        }
+    }
+
+    #[test]
+    fn typed_task_to_submission() {
+        let task = Thumbnail {
+            path: "/photos/a.jpg".into(),
+            size: 256,
+        };
+        let sub = TaskSubmission::from_typed(&task).unwrap();
+
+        assert_eq!(sub.task_type, "thumbnail");
+        assert_eq!(sub.priority, Priority::NORMAL);
+        assert_eq!(sub.expected_read_bytes, 4096);
+        assert_eq!(sub.expected_write_bytes, 1024);
+        assert!(sub.key.is_none());
+
+        // Payload round-trips correctly.
+        let recovered: Thumbnail = serde_json::from_slice(sub.payload.as_ref().unwrap()).unwrap();
+        assert_eq!(recovered, task);
+    }
+
+    #[test]
+    fn typed_task_custom_priority() {
+        #[derive(Serialize, Deserialize)]
+        struct Urgent {
+            id: u64,
+        }
+
+        impl TypedTask for Urgent {
+            const TASK_TYPE: &'static str = "urgent";
+
+            fn priority(&self) -> Priority {
+                Priority::HIGH
+            }
+        }
+
+        let sub = TaskSubmission::from_typed(&Urgent { id: 42 }).unwrap();
+        assert_eq!(sub.priority, Priority::HIGH);
+        assert_eq!(sub.task_type, "urgent");
+    }
+
+    #[test]
+    fn typed_task_defaults() {
+        #[derive(Serialize, Deserialize)]
+        struct Minimal;
+
+        impl TypedTask for Minimal {
+            const TASK_TYPE: &'static str = "minimal";
+        }
+
+        let sub = TaskSubmission::from_typed(&Minimal).unwrap();
+        assert_eq!(sub.expected_read_bytes, 0);
+        assert_eq!(sub.expected_write_bytes, 0);
+        assert_eq!(sub.priority, Priority::NORMAL);
+    }
+}