diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 809a54f..ad83299 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,6 +5,10 @@ on: branches: - main - develop + pull_request: + branches: + - main + - develop jobs: build: @@ -16,25 +20,54 @@ jobs: - esp32-s3-mini steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 - - uses: actions/cache@v3 + - uses: actions/cache@v5 with: path: | ~/.cache/pip ~/.platformio/.cache - key: ${{ runner.os }}-pio - - uses: actions/setup-node@v4.1.0 + key: ${{ runner.os }}-python-3.12-pio-${{ hashFiles('platformio.ini', 'miniweb/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-python-3.12-pio- + - uses: actions/setup-node@v6 + with: + node-version: "24" - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v6 with: - python-version: "3.11" + python-version: "3.12" - name: Build webUI - run: cd miniweb && npm i && npm run build; + run: cd miniweb && npm ci && npm run build - name: Install PlatformIO Core - run: pip install --upgrade platformio + run: | + python -m pip install --upgrade pip + python -m pip install "platformio==6.1.19" + + - name: Prime pioarduino Python environment + run: | + python -m venv ~/.platformio/penv + ~/.platformio/penv/bin/python -m pip install --upgrade pip + ~/.platformio/penv/bin/python -m pip install \ + "uv>=0.1.0" \ + "https://github.com/pioarduino/platformio-core/archive/refs/tags/v6.1.19.zip" \ + "littlefs-python>=0.16.0" \ + "fatfs-ng>=0.1.14" \ + "pyyaml>=6.0.2" \ + "rich-click>=1.8.6" \ + "zopfli>=0.2.2" \ + "intelhex>=2.3.0" \ + "rich>=14.0.0" \ + "urllib3<2" \ + "cryptography>=45.0.3" \ + "certifi>=2025.8.3" \ + "ecdsa>=0.19.1" \ + "bitstring>=4.3.1" \ + "reedsolo>=1.5.3,<1.8" \ + "esp-idf-size>=2.0.0" \ + "esp-coredump>=1.14.0" - name: Build PlatformIO Project run: | @@ -47,15 +80,16 @@ jobs: name: firmware-${{ matrix.board }} path: | .pio/build/${{ matrix.board }}/firmware.bin - .pio/build/${{ matrix.board }}/spiffs.bin + .pio/build/${{ matrix.board }}/littlefs.bin publish: + if: ${{ github.event_name == 'push' && github.repository == 'tadelv/yaeger' }} needs: build runs-on: ubuntu-latest permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 diff --git a/.github/workflows/publish-firmware.yml b/.github/workflows/publish-firmware.yml index 1b97a1b..5c3d6b0 100644 --- a/.github/workflows/publish-firmware.yml +++ b/.github/workflows/publish-firmware.yml @@ -9,7 +9,7 @@ on: jobs: publish: - if: ${{ github.event.workflow_run.conclusion == 'success' }} + if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'push' && github.event.workflow_run.repository.full_name == 'tadelv/yaeger' }} runs-on: ubuntu-latest permissions: contents: write @@ -68,7 +68,7 @@ jobs: done - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 diff --git a/.github/workflows/release-build.yml b/.github/workflows/release-build.yml index a21e48a..8e75166 100644 --- a/.github/workflows/release-build.yml +++ b/.github/workflows/release-build.yml @@ -11,7 +11,7 @@ jobs: outputs: matrix: ${{ steps.collect.outputs.matrix }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Collect PlatformIO environments id: collect @@ -43,26 +43,55 @@ jobs: matrix: ${{ fromJson(needs.determine-boards.outputs.matrix) }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - uses: actions/cache@v3 + - uses: actions/cache@v5 with: path: | ~/.cache/pip ~/.platformio/.cache - key: ${{ runner.os }}-pio + key: ${{ runner.os }}-python-3.12-pio-${{ hashFiles('platformio.ini', 'miniweb/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-python-3.12-pio- - - uses: actions/setup-node@v4.1.0 + - uses: actions/setup-node@v6 + with: + node-version: "24" - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v6 with: - python-version: "3.11" + python-version: "3.12" - name: Build webUI - run: cd miniweb && npm i && npm run build + run: cd miniweb && npm ci && npm run build - name: Install PlatformIO Core - run: pip install --upgrade platformio + run: | + python -m pip install --upgrade pip + python -m pip install "platformio==6.1.19" + + - name: Prime pioarduino Python environment + run: | + python -m venv ~/.platformio/penv + ~/.platformio/penv/bin/python -m pip install --upgrade pip + ~/.platformio/penv/bin/python -m pip install \ + "uv>=0.1.0" \ + "https://github.com/pioarduino/platformio-core/archive/refs/tags/v6.1.19.zip" \ + "littlefs-python>=0.16.0" \ + "fatfs-ng>=0.1.14" \ + "pyyaml>=6.0.2" \ + "rich-click>=1.8.6" \ + "zopfli>=0.2.2" \ + "intelhex>=2.3.0" \ + "rich>=14.0.0" \ + "urllib3<2" \ + "cryptography>=45.0.3" \ + "certifi>=2025.8.3" \ + "ecdsa>=0.19.1" \ + "bitstring>=4.3.1" \ + "reedsolo>=1.5.3,<1.8" \ + "esp-idf-size>=2.0.0" \ + "esp-coredump>=1.14.0" - name: Build PlatformIO Project run: | @@ -73,7 +102,7 @@ jobs: run: | mkdir -p release/${{ matrix.board }} cp .pio/build/${{ matrix.board }}/firmware.bin release/${{ matrix.board }}/firmware.bin - cp .pio/build/${{ matrix.board }}/spiffs.bin release/${{ matrix.board }}/spiffs.bin + cp .pio/build/${{ matrix.board }}/littlefs.bin release/${{ matrix.board }}/littlefs.bin - name: Upload firmware artifacts uses: actions/upload-artifact@v4 diff --git a/README.md b/README.md index ff06119..4ca4891 100644 --- a/README.md +++ b/README.md @@ -58,12 +58,77 @@ You can also control Yaeger from its own web interface without an app. Just poin your home wifi, or `192.168.4.1` if Yaeger creates its own access point.  +The web UI now includes a **Version & Network Info** section that shows the Web UI version/build timestamp and device firmware/network details (mode, SSID, IP, hostname) so you can quickly check when the currently loaded build was last updated. + +### Frontend status + +- `miniweb` (TypeScript + Vite) is the **only supported** web UI in this repository. +- The old `webserver` Svelte/Rollup frontend and related legacy files have been removed. +- Project scripts and firmware asset packaging target `miniweb`. + #### Using Yaeger on the go If Yaeger can't connect to your preferred Wifi, it will create its own access point. Perfect for when out and about :grin: ## Build guide (WIP) +## What changed in this fork + +If you are reviewing this fork before opening a PR against `tadelv/yaeger`, here is the practical summary: + +* `miniweb` is now the canonical frontend (TypeScript + Vite). Legacy `webserver` content is gone. +* OTA uploads are now aligned with ElegantOTA (`/update`) and no longer depend on PlatformIO `espota`. +* A one-command OTA flow (`ota_update_all.sh`) now updates both LittleFS web assets and firmware in one run. +* OTA tooling is isolated in a local Python virtual environment (`.ota-venv`) to avoid polluting global Python installs. +* GitHub Actions build flow now supports PR validation and avoids publish failures on forks/non-upstream repos. + +## Installation / update flows + +There are now two recommended paths depending on how you connect to your board: + +### 1) USB flash (first-time install or recovery) + +Use this when the device is connected over USB serial: + +```bash +./build_and_flash.sh s3 +# or +./build_and_flash.sh s3-mini +``` + +What it does: +1. installs frontend dependencies with `npm ci`, +2. builds `miniweb`, +3. optionally erases flash, +4. uploads LittleFS (`buildfs` + `uploadfs`), +5. uploads firmware (`upload`). + +### 2) OTA update (already deployed device on network) + +Use this once the device is reachable over Wi-Fi and ElegantOTA is available: + +```bash +./ota_update_all.sh s3 +# or +./ota_update_all.sh s3-mini +``` + +What it does: +1. creates/reuses `.ota-venv`, +2. installs OTA dependencies in that venv (`platformio`, `littlefs-python`, `fatfs-ng`, `pyyaml`), +3. builds `miniweb`, +4. uploads LittleFS image over ElegantOTA, +5. uploads firmware over ElegantOTA. + +If your device requires OTA credentials, set: + +```bash +export YAEGER_OTA_USERNAME=admin +export YAEGER_OTA_PASSWORD='your-password' +``` + +(`YAEGER_OTA_USERNAME` defaults to `admin` if omitted.) + ### Schema  @@ -76,6 +141,34 @@ Courtesy of [@dlisec](https://github.com/dlisec) A build script has been provided by [@matthew73210](https://github.com/matthew73210), so to get up and running on the ESP, just run `./build_and_flash.sh`. Make sure to read the comments in the script. But also in the platformio.ini and choose the right board +Yaeger OTA in this project is provided by the web-based ElegantOTA handler (`/update`) and not the PlatformIO `espota` +upload protocol. + +For VS Code + PlatformIO uploads via ElegantOTA, use one of these environments: + +* `esp32-s3-elegantota` +* `esp32-s3-mini-elegantota` + +These use a custom PlatformIO upload script that sends the built firmware to `http://yaeger.local/update` through the +same ElegantOTA mechanism used by the device web UI. + +For a **single-command OTA update of the whole project** (frontend files + firmware), run: + +```bash +./ota_update_all.sh s3 +# or +./ota_update_all.sh s3-mini +``` + +This builds `miniweb`, then runs OTA in two explicit steps: (1) upload LittleFS (`buildfs` + `uploadfs`) and (2) upload firmware (`upload`). The script creates and uses a local Python virtual environment (`.ota-venv`), installs required OTA dependencies (`platformio`, `littlefs-python`, `fatfs-ng`, `pyyaml`), and auto-retries if PlatformIO reports missing Python modules. + +For local frontend builds, use npm from `miniweb`: + +```bash +cd miniweb +npm ci +npm run build +``` ## Latest features @@ -83,6 +176,8 @@ ESP, just run `./build_and_flash.sh`. Make sure to read the comments in the scri PID temp follower, set the temperature setpoint and the PID controller will try and follow. You'll need to find your own PID values +A controller review with alternatives (including MPC/LQR and fan min/max envelope design) is available at `docs/control_strategy_review_2026-04-14.md` (current recommendation: ADRC as primary advanced controller, with an ADRC autotune workflow proposal). + ### Profile Still in the works, but there is now a profile follower, it follows a simple .json format. You can have a go at [Gaggiuino web profiler](https://matthew73210.github.io/Gaggiuino-web-profiler/) under the _pun_ "Yägermeister Mode" diff --git a/build_and_flash.sh b/build_and_flash.sh index 4082742..6ee97ee 100755 --- a/build_and_flash.sh +++ b/build_and_flash.sh @@ -31,26 +31,13 @@ fi echo "Using PlatformIO environment: $PIO_ENV" - -read -p "Choose frontend (r for reyaeger, empty for classic): " frontend - -if [ $frontend = 'r' ]; then - -echo "reyaeger download"; -curl -L https://github.com/RobTS/reyaeger/releases/latest/download/reyaeger.zip > reyaeger.zip -rm -rf data -mkdir data -unzip -d ./data ./reyaeger.zip - -else - -# Step 1: Navigate to the miniweb directory +# Step 1: Navigate to the primary frontend (miniweb) echo "Navigating to miniweb..." cd miniweb || { echo "miniweb folder not found!"; exit 1; } -# Step 2: Install dependencies -echo "Installing dependencies with npm..." -npm install || { echo "npm install failed!"; exit 1; } +# Step 2: Install dependencies with npm (standardized package manager) +echo "Installing dependencies with npm ci..." +npm ci || { echo "npm ci failed!"; exit 1; } # Step 3: Build the web assets echo "Building the web project..." @@ -58,9 +45,7 @@ npm run build || { echo "npm build failed!"; exit 1; } # Step 4: Return to the project root echo "Returning to the project root..." - cd .. || exit 1 -fi # Step 5: Erase the device memory (optional but recommended) echo "Erasing the device memory..." diff --git a/docs/code_analysis_2026-04-06.md b/docs/code_analysis_2026-04-06.md new file mode 100644 index 0000000..503516a --- /dev/null +++ b/docs/code_analysis_2026-04-06.md @@ -0,0 +1,212 @@ +# Yaeger codebase analysis (April 6, 2026) + +This is a high-level technical review of the firmware + web clients, with prioritized, up-to-date improvement proposals. + +## Scope reviewed + +- Firmware entrypoint and runtime loop (`src/main.cpp`) +- WebSocket command/control path (`src/CommandLoop.cpp`) +- REST API and Wi‑Fi credential flow (`src/api.cpp`, `src/wifi_setup.cpp`) +- Frontend app state, transport, and build setup (`miniweb`, `webserver`) +- Dependency freshness and security posture from package manager checks + +## Current strengths + +- Clear separation between firmware concerns: sensors, fan/heater control, API, and Wi‑Fi modules. +- Safety fallback exists when WebSocket clients disconnect (`updateConnectionSafety`) with cooldown fan behavior. +- OTA update path is already integrated with ElegantOTA and static content serving from LittleFS. +- A newer `miniweb` app exists (TypeScript + Vite) in parallel to legacy Svelte/Rollup webserver. + +## Key findings and prioritized improvements + +## 1) **Critical security hardening (do first)** + +### Implementation status (April 6, 2026 update) + +- ~~Move `/api/wifi` to **POST + JSON body**; never pass passwords in URL query strings.~~ ✅ Implemented (`/api/wifi` now requires `POST` JSON). +- ~~Add request authentication for mutable endpoints (`/api/wifi`, control commands, OTA), at minimum:~~ 🔄 Partially implemented. + - ~~per-device admin password or token stored in NVS,~~ ✅ Implemented for `/api/wifi` and OTA Basic Auth. + - ~~CSRF-resistant flow for browser UI.~~ ✅ Implemented (`X-Yaeger-CSRF` header validated for mutable REST writes). +- ~~Protect OTA route with credentials and rate-limiting/backoff.~~ ✅ Credentials and exponential backoff implemented (OTA upload tooling retries transient failures). +- ~~Add secure defaults in AP mode:~~ 🔄 Partially implemented. + - ~~WPA2/WPA3 AP passphrase (not open AP),~~ ✅ Implemented (password-protected AP). + - ~~setup-mode timeout window.~~ ✅ Implemented (AP setup timeout + restart). + +### Updated TODO list + +- [x] Replace `/api/wifi` GET query credential flow with authenticated `POST` + JSON body. +- [x] Protect OTA with admin credentials. +- [x] Enable AP passphrase and setup timeout window. +- [x] Add auth gate for WebSocket mutable control commands. +- [x] Add CSRF-resistant browser flow for authenticated actions. +- [x] Add rate limiting / exponential backoff for OTA endpoint. + +### Findings + +- ~~Wi‑Fi credentials are accepted over **HTTP GET query params** at `/api/wifi` (`ssid` / `pass`).~~ +- ~~API endpoints and OTA endpoint appear unauthenticated by default.~~ +- Device exposes AP fallback mode and local admin surface; risk is reduced with auth, CSRF controls, and OTA retry/backoff protections. + +### Recommendations (2026 best-practice) + +1. ~~Move `/api/wifi` to **POST + JSON body**; never pass passwords in URL query strings.~~ +2. ~~Add request authentication for mutable endpoints (`/api/wifi`, control commands, OTA), at minimum:~~ + - ~~per-device admin password or token stored in NVS,~~ + - ~~CSRF-resistant flow for browser UI.~~ +3. ~~Protect OTA route with credentials and add rate-limiting/backoff.~~ +4. Add secure defaults in AP mode: + - ~~WPA2/WPA3 AP passphrase (not open AP),~~ + - ~~setup-mode timeout window.~~ + +## 2) **WebSocket robustness and heap stability (high priority)** + +### Implementation status (April 6, 2026 update) + +- ~~Validate parse result (`DeserializationError`) and reject malformed frames.~~ ✅ Implemented (malformed JSON and unsupported fragmented/non-text frames are rejected). +- ~~Enforce command schema validation (required fields, ranges).~~ ✅ Implemented (numeric schema checks for mutating commands and preference payloads). +- ~~Replace fixed-size `char[200]` with `measureJson` + dynamic/streamed response.~~ ✅ Implemented (`measureJson` + dynamically-sized `String` response buffer). +- ~~Add clamp logic for actuator values (e.g., fan/heater range validation) server-side regardless of client behavior.~~ ✅ Implemented (server-side clamping + logging for burner/fan/cooldown values). + +### Updated TODO list + +- [x] Reject malformed JSON payloads and unsupported WebSocket frame shapes. +- [x] Validate command schema for mutating and preference commands. +- [x] Remove fixed-size WebSocket response buffer usage. +- [x] Clamp actuator and cooldown values server-side to safe ranges. + +### Findings + +- `deserializeJson` return value is not checked before consuming fields. +- Incoming frame handling concatenates payload into `String` and uses small fixed JSON capacity assumptions. +- Outgoing buffer is fixed `char buffer[200]`, risking truncation if payload grows. + +### Recommendations + +1. ~~Validate parse result (`DeserializationError`) and reject malformed frames.~~ +2. ~~Enforce command schema validation (required fields, ranges).~~ +3. ~~Replace fixed-size `char[200]` with `measureJson` + dynamic/streamed response.~~ +4. ~~Add clamp logic for actuator values (e.g., fan/heater range validation) server-side regardless of client behavior.~~ + +## 3) **Network resiliency and boot behavior (high priority)** + +### Implementation status (April 11, 2026 update) + +- ~~Convert Wi‑Fi connect to non-blocking state machine (or bounded async retry steps).~~ ✅ Implemented (connect attempts now run without blocking startup loop and time out to AP fallback). +- ~~Keep loop tick deterministic by moving periodic tasks to elapsed-time scheduling.~~ ✅ Implemented (`millis()`-driven 10ms fast tick for cleanup/safety/sensor polling). +- ~~Add watchdog-friendly design: avoid long blocking sections in startup/connect paths.~~ ✅ Implemented (removed blocking connect loop and replaced fixed loop delay with cooperative `yield()`). + +### Updated TODO list + +- [x] Replace blocking Wi‑Fi connect loop with non-blocking attempt tracking + timeout. +- [x] Move periodic runtime tasks to elapsed-time scheduling for deterministic ticks. +- [x] Remove fixed loop sleep and use cooperative yielding for watchdog friendliness. + +### Findings + +- Wi‑Fi connect routine blocks in a loop up to ~10s with `delay(1000)` retries. +- Main loop includes regular delays and mixed timing responsibilities. + +### Recommendations + +1. ~~Convert Wi‑Fi connect to non-blocking state machine (or bounded async retry steps).~~ +2. ~~Keep loop tick deterministic by moving periodic tasks to elapsed-time scheduling.~~ +3. ~~Add watchdog-friendly design: avoid long blocking sections in startup/connect paths.~~ + +## 4) **Frontend modernization path (high priority, medium effort)** + +### Implementation status (April 11, 2026 update) + +- ~~Make `miniweb` the single primary frontend and define deprecation timeline for `webserver`.~~ ✅ Implemented (project docs + build path now explicitly designate `miniweb` as primary and `webserver` as deprecated/frozen). +- ~~If legacy UI must remain, plan migration to modern Svelte/Vite stack.~~ ✅ Implemented as policy decision (legacy kept as frozen fallback with no new feature investment). +- ~~Standardize package manager/lockfile strategy (npm vs yarn) to reduce CI drift.~~ ✅ Implemented for active frontend path (`build_and_flash.sh` now uses `npm ci` for deterministic `miniweb` installs). + +### Updated TODO list + +- [x] Declare `miniweb` as canonical UI and deprecate legacy `webserver`. +- [x] Freeze legacy UI scope to maintenance-only fallback. +- [x] Standardize active frontend build path on deterministic npm installs. + +### Findings + +- Repository contains **two web UIs** (`webserver` legacy Svelte 3 + Rollup, and `miniweb` TypeScript + Vite). +- Legacy webserver dependency tree is significantly behind and has known advisory exposure via old Svelte line. + +### Recommendations + +1. ~~Make `miniweb` the single primary frontend and define deprecation timeline for `webserver`.~~ +2. ~~If legacy UI must remain, plan migration to modern Svelte/Vite stack.~~ +3. ~~Standardize package manager/lockfile strategy (npm vs yarn) to reduce CI drift.~~ + +## 5) **Dependency and supply-chain updates (high priority)** + +### Findings from `npm outdated` + +- `miniweb` has major updates pending (e.g., Vite 8.x, TypeScript 6.x, vite-plugin-pwa 1.x). +- `webserver` is heavily behind (Rollup 4.x, Svelte 5.x, SMUI 8.x available). +- root dependency `chartjs-plugin-trendline` also behind. + +### Findings from `npm audit` + +- `webserver` reports moderate vulnerabilities tied to old `svelte` line; major upgrade path available. + +### Recommendations + +1. Upgrade actively maintained UI (`miniweb`) first, one major at a time with CI snapshots. +2. Treat legacy `webserver` as frozen/deprecated or perform full migration sprint. +3. Add automated dependency checks (scheduled CI + Dependabot/Renovate). + +## 6) **API design and transport hygiene (medium priority)** + +### Findings + +- Control and data are mixed in loosely-typed WebSocket payloads. +- REST info endpoint is useful but minimal; no health/version compatibility contract. + +### Recommendations + +1. Version the protocol (`apiVersion`) across REST + WebSocket. +2. Introduce structured command envelopes and explicit error responses. +3. Add heartbeat/ping and reconnect backoff in frontend WebSocket client. + +## 7) **Build/test quality gates (medium priority)** + +### Findings + +- Frontend builds succeed, but there is no obvious unified CI matrix in repo root. +- Firmware static checks are configured in PlatformIO config but not validated in this environment (`pio` unavailable). + +### Recommendations + +1. Add CI pipeline matrix: + - firmware static analysis/build, + - miniweb build/lint/typecheck, + - optional legacy webserver build until sunset. +2. Add pre-merge checks for formatting + basic unit tests for pure logic modules. +3. Add release artifact version stamping for firmware + frontend and compatibility check. + +## Proposed implementation roadmap + +### Phase 1 (1-2 weeks): security + reliability + +- Migrate `/api/wifi` to authenticated POST. +- Add OTA auth + AP hardening defaults. +- Add WebSocket parse/validation/clamp guards. + +### Phase 2 (1-2 weeks): frontend consolidation + +- Define `miniweb` as primary. +- Freeze or retire `webserver`; remove dual-maintenance overhead. +- Upgrade `miniweb` core tooling with compatibility tests. + +### Phase 3 (ongoing): CI and observability + +- Introduce CI matrix and scheduled dependency scanning. +- Add structured logs + fault counters exposed via `/api/info` (or `/api/health`). +- Add smoke tests for profile run and actuator safety constraints. + +## Commands run for this analysis + +- `npm outdated --json` (repo root, `miniweb`, `webserver`) +- `npm run build` (`miniweb`, `webserver`) +- `npm audit --omit=dev --json` (`webserver`) +- `pio --version` (tool unavailable in environment) diff --git a/docs/control_strategy_review_2026-04-14.md b/docs/control_strategy_review_2026-04-14.md new file mode 100644 index 0000000..0c5e1a1 --- /dev/null +++ b/docs/control_strategy_review_2026-04-14.md @@ -0,0 +1,243 @@ +# Control Strategy Review (PID) and Alternatives + +Date: 2026-04-14 + +## Current PID implementation review + +The current firmware uses a single-loop PID that drives heater output only (`0..100%`) every 400 ms and smooths the output before commanding the heater SSR. The fan is not part of the closed-loop PID objective during normal operation. Instead, fan is controlled manually or by separate flows (autotune/delay measurement/manual safety). This is simple and robust, but it leaves roast quality and disturbance rejection potential on the table. + +### What is working well + +- Control update cadence and safety clamping are explicit (`PID_UPDATE_INTERVAL_MS`, actuator clamp `0..100`). +- Anti-windup exists via conditional integration and integral clamping. +- A process-delay predictor and delay measurement path already exist, which is a strong foundation for model-based control. +- Relay autotune and multiple tuning formulas are already supported. + +### Main control gaps + +1. **Single-input closed loop:** only heater is optimized by PID; fan is not coordinated in the control objective. +2. **No explicit multivariable constraints:** there is no built-in optimization that co-manages heater/fan tradeoffs while respecting user preferences (e.g., min/max fan envelope). +3. **Fixed-gain behavior across roast phases:** a single PID structure can struggle across drying/Maillard/development dynamics. +4. **No direct optimization of slope trajectories (RoR):** current loop tracks temperature, but profile slope can be more important for roasting consistency. + +## Recommended fan envelope feature (applies to all candidate controllers) + +Add user-configurable fan bounds and make *every* controller obey them. + +### Proposed new preferences and runtime fields + +- `controlFanMin` (0..100), default `30` +- `controlFanMax` (0..100), default `80` +- enforce `controlFanMin <= controlFanMax` (swap if reversed) + +### Actuator mapping rule + +Any controller computes raw commands: + +- `heaterRaw` in 0..100 +- `fanRaw` in 0..100 + +Then apply envelope: + +- `heater = clamp(heaterRaw, 0, 100)` +- `fan = clamp(fanRaw, controlFanMin, controlFanMax)` + +This gives users hard limits on airflow while still allowing automatic variation of fan. + +## Controller alternatives (6 options) + +Below are six options that can command **both heater and fan** while supporting user fan min/max bounds. + +### 1) Linear MPC (recommended long-term target) + +**What it is:** finite-horizon optimization on a linearized thermal model with constraints. + +**Why it fits this project:** +- Naturally handles two actuators (heater + fan). +- Explicitly enforces constraints (`heater 0..100`, `fan min..max`, rate limits). +- Can track temperature and RoR targets simultaneously. + +**Suggested objective:** +- Track bean temperature and/or ET targets. +- Penalize RoR error and actuator aggressiveness. +- Penalize fan movement to reduce noise/mechanical wear. + +**Complexity:** medium/high (requires model ID + QP solver or lightweight custom optimizer). + +### 2) LQR + integral action (LQI) + +**What it is:** state-feedback controller on linear model; add integral states for zero steady-state error. + +**Why it fits:** +- Lower compute load than MPC. +- Good multivariable coordination when model is decent. +- Stable and predictable tuning via Q/R matrices. + +**How to enforce fan limits:** +- Compute unconstrained command, then saturate and apply simple anti-windup logic. +- Optional reference governor to pre-shape commands so saturation is less frequent. + +**Complexity:** medium. + +### 3) Gain-scheduled 2x PID (heater PID + fan PID) + +**What it is:** keep PID family but use separate loops and phase-based gain schedules. + +**Why it fits:** +- Fastest migration path from current implementation. +- Familiar tuning workflow. +- Can use roast phase breakpoints (drying/Maillard/development) and temperature ranges. + +**Fan behavior:** +- Fan PID can regulate ET-BT delta, RoR damping, or smoke proxy. +- Always clamp by user `fanMin/fanMax`. + +**Complexity:** low/medium. + +### 4) ADRC (Active Disturbance Rejection Control) + +**What it is:** observer-based control that estimates unmodeled disturbances in real time. + +**Why it fits:** +- Handles disturbances (batch size variance, charge temp shifts, ambient changes) better than fixed PID. +- Reduces reliance on precise process model. + +**Fan integration:** +- Use dual-channel ADRC (heater + fan) or heater ADRC with fan as scheduled auxiliary. +- Apply fan envelope at command stage. + +**Complexity:** medium. + +### 5) Fuzzy supervisory control (over PID/PI inner loops) + +**What it is:** rule-based supervisor adjusts setpoints/gains/actuator splits based on roast context. + +**Why it fits:** +- Encodes operator heuristics explicitly. +- Useful when precise modeling is hard but domain expertise is strong. + +**Fan integration:** +- Rules can increase fan during high RoR overshoot risk and cap by user envelope. + +**Complexity:** medium; interpretability high. + +### 6) IMC / Smith-predictor MIMO variant + +**What it is:** model-based control compensating dead time, extending your current predictor concept into dual-actuator control. + +**Why it fits:** +- Builds directly on existing delay-estimation mechanics. +- Good compromise before full MPC. + +**Fan integration:** +- Use decoupling matrix from identified plant gains. +- Clamp fan by user bounds. + +**Complexity:** medium. + +## Recommended roadmap + +1. **Phase 1 (quick win):** implement fan envelope + **ADRC** as the primary advanced controller, while keeping current PID as fallback. +2. **Phase 2:** add gain scheduling and roast-phase-specific ADRC observer/controller parameters (drying/Maillard/development). +3. **Phase 3:** add optional LQI and MPC modes for sites that can maintain a reliable plant model. + +## Minimal API/firmware changes to support alternatives + +- Add `controlMode` enum in preferences/websocket payload, e.g.: + - `pid_single` (current) + - `pid_dual` + - `lqi` + - `mpc` + - `adrc` + - `fuzzy` +- Add `controlFanMin` / `controlFanMax` to preferences + websocket schema. +- Extend control telemetry to publish: + - actuator raw commands (`heaterRaw`, `fanRaw`) + - clamped commands (`heaterCmd`, `fanCmd`) + - active constraints flags (e.g., `fanAtMin`, `fanAtMax`). + +## Practical recommendation + +Given bean variability (origin, age, moisture, density, and batch mass), **ADRC is the best primary fit** because it tolerates modeling uncertainty and rejects disturbances without needing a highly accurate plant model. Start with **ADRC + fan min/max envelope**, retain PID as a safe fallback mode, and only enable LQI/MPC as optional modes for environments with stronger model identification and maintenance practices. + +### Why ADRC is a strong default for this roaster + +- Bean-dependent dynamics are hard to model precisely and can shift roast-to-roast. +- ADRC estimates lumped disturbances online, reducing dependence on exact model fidelity. +- It can co-manage heater/fan with fewer assumptions than model-heavy approaches. +- It maps well to incremental rollout: observer first, then tighter actuator coordination. + +## ADRC autotune proposal (how to make it practical) + +If we implement ADRC, autotune should shift from "PID gain hunt" to "plant + observer characterization". The objective is to estimate safe starting values for: + +- `b0_heater` (heater-to-temperature gain estimate) +- `b0_fan` (fan-to-cooling gain estimate) +- observer bandwidth `w0` +- controller bandwidth `wc` + +### Autotune modes + +1. **Quick tune (recommended default)** + - Single button workflow for operators. + - Uses a short sequence of bounded actuator steps and computes robust starting parameters. +2. **Advanced tune** + - Exposes full sweep settings (step amplitude, dwell time, fan baseline, repeat count). + - For power users validating different drum sizes/roaster configs. + +### Step-by-step ADRC autotune flow + +1. **Pre-check and safety lock** + - Require roast state = idle or dedicated calibration mode. + - Validate sensor health and stable sampling. + - Apply fan envelope constraints (`controlFanMin`, `controlFanMax`) before any step test. + +2. **Baseline stabilization phase (e.g., 20-40 s)** + - Hold fan at a user-selected baseline inside min/max envelope. + - Hold heater at a low safe value (or 0 for cooling characterization). + - Estimate noise level and moving slope baseline. + +3. **Heater gain test (`b0_heater`)** + - Apply a bounded heater step (example: +15%) for a fixed dwell. + - Measure slope change `dT/dt` after dead-time compensation window. + - Estimate `b0_heater` from incremental response: + - `b0_heater ~= Δ(dT/dt) / Δheater` + +4. **Fan cooling test (`b0_fan`)** + - With moderate heater hold, apply a bounded fan step (example: +10%). + - Measure slope reduction and estimate: + - `b0_fan ~= -Δ(dT/dt) / Δfan` + - Clamp all fan commands by user envelope during test and runtime. + +5. **Observer/controller bandwidth synthesis** + - Choose `w0` based on measured noise and response speed (faster plant -> higher `w0`). + - Set `wc` as a fraction of `w0` (typical start: `wc = w0 / 3` to `w0 / 5`). + - Produce conservative defaults first; allow optional "aggressive" profile. + +6. **Closed-loop verification pulse** + - Run a short setpoint move (e.g., +3 to +5 °C). + - Validate overshoot, settling time, and actuator saturation ratio. + - If metrics exceed thresholds, auto-derate (`wc` down, `w0` down) and retest once. + +7. **Persist + rollback safety** + - Save tuned ADRC params with timestamp and roast context metadata. + - Keep last-known-good profile; auto-rollback if the next roast triggers repeated saturation/oscillation alarms. + +### UI/API additions for ADRC autotune + +- Extend `setPidControl` into generic `setControl` (backward compatible alias retained). +- Add fields: + - `controlMode: "pid_single" | "adrc" | ...` + - `adrcAutotune: boolean` + - `adrcTuneLevel: "quick" | "advanced"` + - `adrcFanBaseline`, `adrcHeaterStep`, `adrcFanStep`, `adrcDwellSec` +- Telemetry: + - `adrcTuneStage`, `adrcTuneProgress`, `adrcB0Heater`, `adrcB0Fan`, `adrcW0`, `adrcWc` + - `adrcValidationOvershoot`, `adrcValidationSettlingSec`, `adrcValidationSaturationPct` + +### Why this autotune approach works for variable beans + +- It does not assume a fixed global bean model. +- It re-identifies local gains from fresh step data each tune. +- It keeps safety constraints explicit (heater bounds + user fan min/max). +- It is resilient to changing origin/age/batch by recalibrating dynamics instead of forcing one static parameter set. diff --git a/miniweb/index.html b/miniweb/index.html index 461c6ba..01e2808 100644 --- a/miniweb/index.html +++ b/miniweb/index.html @@ -7,6 +7,6 @@
- +