diff --git a/.gitignore b/.gitignore index 0ac80fc..68eff96 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ /.devenv.flake.nix /devenv.lock /game-data/ + +# Python cache +__pycache__/ +*.pyc diff --git a/README.md b/README.md index 366d7df..b10caf7 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,12 @@ This repository contains a draft specification set for a Nintendo Switch static ## Contents - `specs/` contains the numbered specification series. - `crates/` holds the exploratory pipeline/runtime scaffolding. +- `skills/` provides the Codex skill set used for validation workflows. - `ROADMAP.md` provides phased milestones and exit criteria. - `RESEARCH.md` lists research directions and required sources. - `docs/` contains development notes. - `docs/LEGAL-POLICY.md` defines legal use and asset separation rules. + - `docs/static-recomp-skills.md` documents the Codex skill set and project-level validation templates. ## How to Use the Specs - Read `specs/README.md` for ordering. @@ -31,6 +33,7 @@ Legal and provenance policy: ## Samples and Flow Docs - `samples/memory-image/` shows the memory image initialization flow (segment blob + lifted module). - `docs/static-recompilation-flow.md` outlines a hypothetical macOS static recompilation flow and verification pipeline. +- `docs/validation-matrix-template.md`, `docs/title-run-sheet-template.md`, `docs/thresholds/default.json`, `docs/batch-manifest-schema.md`, `docs/batch-manifest-schema.json`, and `docs/batch-pipeline-layout.md` are reusable validation templates. ## Back Pressure Hooks These hooks add fast, consistent feedback to keep the repo autonomous and reduce review churn. Hooks are defined in `.pre-commit-config.yaml` and can be run with `prek` (preferred) or `pre-commit`. diff --git a/RESEARCH.md b/RESEARCH.md index 4828e79..7727af1 100644 --- a/RESEARCH.md +++ b/RESEARCH.md @@ -83,6 +83,9 @@ Needed research: - nstool (XCI/NCA/NSO extraction): https://github.com/jakcron/nstool - Ghidra SLEIGH language reference (p-code semantics): https://github.com/NationalSecurityAgency/ghidra/blob/master/GhidraDocs/languages/html/sleigh.html - sleigh library (p-code lifting implementation): https://github.com/lifting-bits/sleigh +- FFmpeg filter reference for SSIM/PSNR/EBU R128 audio analysis: https://manpages.debian.org/bookworm/ffmpeg/ffmpeg-filters.1.en.html +- FFmpeg libvmaf filter usage notes: https://manpages.opensuse.org/Tumbleweed/ffmpeg/ffmpeg-filters.1.en.html +- EBU R 128 loudness recommendation (audio loudness measurement): https://tech.ebu.ch/publications/r128 ## Research Deliverables - A research summary for each category with sources. diff --git a/docs/batch-manifest-schema.json b/docs/batch-manifest-schema.json new file mode 100644 index 0000000..52b185e --- /dev/null +++ b/docs/batch-manifest-schema.json @@ -0,0 +1,153 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "batch-manifest-schema.json", + "title": "Static Recomp Batch Manifest", + "type": "object", + "additionalProperties": false, + "required": [ + "schema_version", + "batch_id", + "created_at", + "toolchain", + "titles" + ], + "properties": { + "schema_version": { + "type": "string", + "const": "v1" + }, + "batch_id": { + "type": "string", + "minLength": 1 + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "toolchain": { + "type": "object", + "additionalProperties": false, + "properties": { + "pipeline_version": { "type": "string" }, + "runtime_version": { "type": "string" }, + "ffmpeg_version": { "type": "string" }, + "emulator_version": { "type": "string" } + } + }, + "global_defaults": { + "type": "object", + "additionalProperties": true, + "properties": { + "resolution": { "type": "string" }, + "frame_rate": { "type": "number" }, + "audio_rate": { "type": "number" }, + "renderer_settings": { "type": "string" }, + "metrics_thresholds": { "$ref": "#/$defs/metrics_thresholds" } + } + }, + "titles": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/title" } + } + }, + "$defs": { + "metrics_thresholds": { + "type": "object", + "additionalProperties": true, + "properties": { + "ssim_min": { "type": "number" }, + "psnr_min": { "type": "number" }, + "vmaf_min": { "type": "number" }, + "audio_lufs_delta_max": { "type": "number" }, + "audio_peak_delta_max": { "type": "number" } + } + }, + "inputs": { + "type": "object", + "additionalProperties": false, + "required": ["provenance_record", "reference_captures", "input_traces"], + "properties": { + "provenance_record": { "type": "string" }, + "reference_captures": { + "type": "array", + "items": { "type": "string" } + }, + "input_traces": { + "type": "array", + "items": { "type": "string" } + } + } + }, + "validation": { + "type": "object", + "additionalProperties": false, + "required": ["scene_list", "targets"], + "properties": { + "scene_list": { + "type": "array", + "items": { "type": "string" } + }, + "targets": { + "type": "object", + "additionalProperties": false, + "properties": { + "resolution": { "type": "string" }, + "frame_rate": { "type": "number" }, + "audio_rate": { "type": "number" } + } + }, + "metrics_thresholds": { "$ref": "#/$defs/metrics_thresholds" } + } + }, + "status": { + "type": "object", + "additionalProperties": false, + "required": ["state", "last_updated"], + "properties": { + "state": { + "type": "string", + "enum": ["pending", "running", "passed", "failed", "needs_review"] + }, + "last_updated": { + "type": "string", + "format": "date-time" + }, + "notes": { "type": "string" } + } + }, + "artifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "report_path": { "type": "string" }, + "metrics_dir": { "type": "string" }, + "captures_dir": { "type": "string" } + } + }, + "title": { + "type": "object", + "additionalProperties": false, + "required": [ + "title_id", + "title_name", + "version", + "region", + "inputs", + "validation", + "status" + ], + "properties": { + "title_id": { "type": "string" }, + "title_name": { "type": "string" }, + "version": { "type": "string" }, + "region": { "type": "string" }, + "build_id": { "type": "string" }, + "inputs": { "$ref": "#/$defs/inputs" }, + "validation": { "$ref": "#/$defs/validation" }, + "status": { "$ref": "#/$defs/status" }, + "artifacts": { "$ref": "#/$defs/artifacts" } + } + } + } +} diff --git a/docs/batch-manifest-schema.md b/docs/batch-manifest-schema.md new file mode 100644 index 0000000..c8686a5 --- /dev/null +++ b/docs/batch-manifest-schema.md @@ -0,0 +1,115 @@ +# Batch Manifest Schema (v1) + +Machine-validated JSON schema: `docs/batch-manifest-schema.json`. + +This schema is intended for catalog-scale batch runs. It records per-title inputs, +validation targets, and status. Store as JSON or TOML; keys below are canonical. + +## Top-level +- `schema_version` (string, required): Use `v1`. +- `batch_id` (string, required): Stable identifier for the run. +- `created_at` (string, required): ISO 8601 timestamp. +- `toolchain` (object, required): Versions for the pipeline and tools. +- `global_defaults` (object, optional): Shared defaults for titles. +- `titles` (array, required): Per-title records. + +## toolchain +- `pipeline_version` (string) +- `runtime_version` (string) +- `ffmpeg_version` (string) +- `emulator_version` (string, optional) + +## global_defaults +- `resolution` (string, example: `1920x1080`) +- `frame_rate` (number, example: `60`) +- `audio_rate` (number, example: `48000`) +- `renderer_settings` (string) +- `metrics_thresholds` (object) + +## titles[] +- `title_id` (string, required) +- `title_name` (string, required) +- `version` (string, required) +- `region` (string, required) +- `build_id` (string, optional) +- `inputs` (object, required) +- `validation` (object, required) +- `status` (object, required) +- `artifacts` (object, optional) + +## inputs +- `provenance_record` (string, required): Path to provenance file. +- `reference_captures` (array, required): List of reference capture ids. +- `input_traces` (array, required): List of trace ids. + +## validation +- `scene_list` (array, required): Scene ids used for comparison. +- `targets` (object, required): Per-title overrides for resolution, fps, audio. +- `metrics_thresholds` (object, optional): Per-title overrides. + +## status +- `state` (string, required): `pending`, `running`, `passed`, `failed`, `needs_review`. +- `last_updated` (string, required): ISO 8601 timestamp. +- `notes` (string, optional) + +## artifacts +- `report_path` (string, optional) +- `metrics_dir` (string, optional) +- `captures_dir` (string, optional) + +## Example (JSON) +```json +{ + "schema_version": "v1", + "batch_id": "switch-2026-02-03", + "created_at": "2026-02-03T09:20:00Z", + "toolchain": { + "pipeline_version": "0.1.0", + "runtime_version": "0.1.0", + "ffmpeg_version": "6.1" + }, + "global_defaults": { + "resolution": "1920x1080", + "frame_rate": 60, + "audio_rate": 48000, + "renderer_settings": "default", + "metrics_thresholds": { + "ssim_min": 0.95, + "psnr_min": 35.0, + "vmaf_min": 90.0, + "audio_lufs_delta_max": 2.0 + } + }, + "titles": [ + { + "title_id": "TID-0001", + "title_name": "Example Title", + "version": "1.0.0", + "region": "US", + "build_id": "ABCD1234", + "inputs": { + "provenance_record": "provenance/TID-0001.toml", + "reference_captures": ["REF-001"], + "input_traces": ["TRACE-001"] + }, + "validation": { + "scene_list": ["SCN-001", "SCN-002"], + "targets": { + "resolution": "1920x1080", + "frame_rate": 60, + "audio_rate": 48000 + } + }, + "status": { + "state": "pending", + "last_updated": "2026-02-03T09:20:00Z" + }, + "artifacts": { + "report_path": "reports/TID-0001/summary.json", + "metrics_dir": "reports/TID-0001/metrics", + "captures_dir": "captures/TID-0001" + } + } + ] +} +``` diff --git a/docs/batch-pipeline-layout.md b/docs/batch-pipeline-layout.md new file mode 100644 index 0000000..a90ea2f --- /dev/null +++ b/docs/batch-pipeline-layout.md @@ -0,0 +1,55 @@ +# Sample Pipeline Layout + +This is a recommended directory layout for batch execution. Adjust names to match +local conventions, but keep the separation of inputs, captures, metrics, and reports. + +``` +workspace/ + manifests/ + batch-2026-02-03.json + inputs/ + provenance/ + TID-0001.toml + traces/ + TRACE-001.json + references/ + REF-001.mp4 + builds/ + TID-0001/ + recomp/ + Cargo.toml + logs/ + build.log + runs/ + TID-0001/ + captures/ + recomp.mp4 + metrics/ + ssim.log + psnr.log + vmaf.json + ref_ebur128.log + test_ebur128.log + reports/ + summary.json + summary.txt + perf/ + frame_times.csv + gpu_stats.json +``` + +## Per-title pipeline stages +1. Intake + - Validate provenance file. + - Verify required reference captures and input traces exist. +2. Build + - Run recompilation and capture build logs. +3. Run + capture + - Execute with deterministic input trace. + - Store capture video/audio and raw logs. +4. Compare + - Run A/V metrics and produce summary. +5. Performance profile + - Capture frame-time and resource metrics. +6. Report + - Emit per-title summary and update manifest status. diff --git a/docs/static-recomp-skills.md b/docs/static-recomp-skills.md new file mode 100644 index 0000000..3c29a9d --- /dev/null +++ b/docs/static-recomp-skills.md @@ -0,0 +1,46 @@ +# Static Recompilation Skills + +This project uses a set of Codex skills to accelerate static recompilation +validation and batch processing. Canonical copies live in `skills/` and can be +installed into a local Codex skills directory for use. + +## Skill Set +- `static-recomp-scope-plan`: + Define project scope, legal boundaries, validation matrix, and exit criteria. +- `static-recomp-batch-harness`: + Catalog-scale harness design, manifest schema, artifact layout, and gates. +- `static-recomp-reference-capture`: + Reference capture and normalization guidance. +- `static-recomp-input-replay`: + Deterministic input trace capture and replay guidance. +- `static-recomp-av-compare`: + A/V alignment and metrics, plus batch and threshold automation scripts. +- `static-recomp-perf-profile`: + Performance profiling and regression reporting guidance. +- `static-recomp-regression-triage`: + Regression classification and root-cause workflow. + +## Repo Skill Copies +The skill definitions are stored in `skills/` so contributors can install them +locally and keep them in sync with project workflows. + +Install example: +```bash +rsync -a skills/static-recomp-av-compare/ "$CODEX_HOME/skills/static-recomp-av-compare/" +``` + +## Project-Level Configuration +Use these repo templates to keep validation and reporting consistent across +future titles and projects. + +- Validation matrix template: `docs/validation-matrix-template.md` +- Per-title run sheet template: `docs/title-run-sheet-template.md` +- Default A/V thresholds: `docs/thresholds/default.json` +- Batch manifest schema: `docs/batch-manifest-schema.md` +- Batch pipeline layout: `docs/batch-pipeline-layout.md` +- Batch manifest JSON schema: `docs/batch-manifest-schema.json` + +## Recommended Practice +- Keep all proprietary inputs outside the repo. +- Record provenance in per-title run sheets. +- Use the validation matrix for acceptance criteria and traceability. diff --git a/docs/thresholds/default.json b/docs/thresholds/default.json new file mode 100644 index 0000000..70c882f --- /dev/null +++ b/docs/thresholds/default.json @@ -0,0 +1,7 @@ +{ + "ssim_min": 0.95, + "psnr_min": 35.0, + "vmaf_min": 90.0, + "audio_lufs_delta_max": 2.0, + "audio_peak_delta_max": 2.0 +} diff --git a/docs/title-run-sheet-template.md b/docs/title-run-sheet-template.md new file mode 100644 index 0000000..25d5208 --- /dev/null +++ b/docs/title-run-sheet-template.md @@ -0,0 +1,58 @@ +# Title Run Sheet Template + +Use this run sheet to track per-title validation and artifact collection. + +## Title Info +- Title: +- Version: +- Region: +- Build ID: +- Input provenance record: +- Target runtime: + +## Legal and Asset Boundaries +- Inputs are user-provided and legally obtained. +- No proprietary binaries, keys, or assets are committed. +- Outputs and metadata are stored separately from inputs. + +## Reference Captures +| Capture ID | Source (hardware/emulator) | Tool Version | Settings | Path | Notes | +| --- | --- | --- | --- | --- | --- | +| REF-001 | | | | | | + +## Input Traces +| Trace ID | Device | Time Base | Duration | Path | Notes | +| --- | --- | --- | --- | --- | --- | +| TRACE-001 | | | | | | + +## Validation Settings +- Resolution: +- Frame rate: +- Audio rate: +- Offset (if any): +- Scene list: + +## Run Steps +- Intake: [ ] +- Build/recompile: [ ] +- Capture: [ ] +- A/V compare: [ ] +- Performance profile: [ ] +- Report and archive: [ ] + +## Results Summary +- Overall status: +- Failed scenes: +- Top regressions: +- Manual review required: + +## Artifacts +- Validation matrix: +- Summary report: +- Metric logs: +- Captures: +- Performance traces: + +## Follow-ups +- Issues filed: +- Next run scheduled: diff --git a/docs/validation-matrix-template.md b/docs/validation-matrix-template.md new file mode 100644 index 0000000..71e273d --- /dev/null +++ b/docs/validation-matrix-template.md @@ -0,0 +1,35 @@ +# Validation Matrix Template + +Use this template to define measurable acceptance criteria per scene. + +## Instructions +- Keep inputs and outputs separate and do not commit proprietary captures. +- Use user-provided or legally obtained reference captures. +- Record all tool versions and settings. + +## Global Targets +- Resolution: +- Frame rate: +- Audio rate: +- Renderer settings: +- Input trace: +- Baseline hardware or emulator: + +## Matrix +| Scene ID | Scene Description | Reference Source | Input Trace | Video Metrics (SSIM/PSNR/VMAF) | Audio Metrics (LUFS/Peak/Drift) | Perf Targets (avg/1%/0.1%) | Stability | Pass/Fail | Notes | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| SCN-001 | Boot to main menu | | | | | | | | | +| SCN-002 | First playable loop | | | | | | | | | +| SCN-003 | UI overlay stress | | | | | | | | | + +## Acceptance Criteria Guidance +- Video: specify minimum acceptable SSIM/PSNR/VMAF per scene. +- Audio: specify maximum drift (ms) and acceptable LUFS delta. +- Performance: specify budgets and allowable variance. +- Stability: no crashes or hangs in any scene. + +## Evidence Checklist +- Reference capture path and metadata. +- Recompiled capture path and metadata. +- Metric logs and summary JSON. +- Any manual review notes with timestamps. diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000..029e324 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,19 @@ +# Codex Skills (Repo Copy) + +This directory contains the project Codex skills so contributors can install them +locally and keep them aligned with the workflow documented in `docs/static-recomp-skills.md`. + +## Install +Copy one or more skills into your local Codex skills directory. + +```bash +rsync -a skills/static-recomp-av-compare/ "$CODEX_HOME/skills/static-recomp-av-compare/" +``` + +## Upgrade +Re-run the same `rsync` command to update the local copy. If you have local edits, +back them up first to avoid overwriting. + +## Notes +- Keep skill paths stable and avoid adding proprietary assets. +- Update `docs/static-recomp-skills.md` when the skill set changes. diff --git a/skills/static-recomp-av-compare/SKILL.md b/skills/static-recomp-av-compare/SKILL.md new file mode 100644 index 0000000..0d79a11 --- /dev/null +++ b/skills/static-recomp-av-compare/SKILL.md @@ -0,0 +1,81 @@ +--- +name: static-recomp-av-compare +description: Compare reference and recompiled audio/video outputs with alignment, metrics, and thresholds. Use when validating visual or audio fidelity, computing similarity metrics, or generating automated A/V comparison reports. +--- + +# Static Recomp A/V Compare + +## Overview +Align and compare reference captures against recompiled outputs using repeatable metrics and clear pass/fail thresholds. + +## Workflow +1. Normalize inputs. + - Match resolution, frame rate, and aspect ratio. + - Match audio sample rate, channel layout, and length. +2. Align timelines. + - Use a known sync event (boot logo, sound cue, scene transition). + - Apply a fixed offset if one stream starts earlier. + - Verify alignment with a short manual check before running full metrics. +3. Compute video similarity. + - Use SSIM and PSNR for fast regression checks. + - Use VMAF when perceptual quality is critical. +4. Compute audio similarity. + - Compare loudness (EBU R128) and true peak. + - Inspect for drift or missing segments. +5. Summarize results. + - Produce per-scene metrics and overall aggregates. + - Flag outliers for manual review. + +## Automation scripts +1. Compare a single scene with `scripts/compare_av.py`. +2. Batch multiple scenes with `scripts/batch_compare_av.py` and a manifest. +3. Convert `summary.json` to pass/fail with `scripts/check_summary.py`. + +### Single scene +```bash +python3 "$CODEX_HOME/skills/static-recomp-av-compare/scripts/compare_av.py" \ + --ref ref.mp4 \ + --test recomp.mp4 \ + --out-dir out/scene-01 \ + --label "scene-01" \ + --width 1920 \ + --height 1080 \ + --fps 60 \ + --audio-rate 48000 \ + --offset 0.250 \ + --trim-start 5.0 \ + --duration 30.0 +``` + +### Threshold check +```bash +python3 "$CODEX_HOME/skills/static-recomp-av-compare/scripts/check_summary.py" \ + out/scene-01/summary.json \ + --thresholds thresholds/default.json +``` + +### Batch run +```bash +python3 "$CODEX_HOME/skills/static-recomp-av-compare/scripts/batch_compare_av.py" \ + manifests/av-batch.json +``` + +Notes: +- Requires `ffmpeg` on PATH. The scripts will use `libvmaf` if available. +- Use `--no-vmaf` to skip VMAF. +- See `references/av-batch-manifest.md` for manifest schema and example. +- A baseline thresholds file is provided at `references/default-thresholds.json`. + +## Outputs +- Per-scene metrics (SSIM, PSNR, VMAF, loudness). +- A summary report with pass/fail thresholds and top mismatches. +- Links to aligned captures used for comparison. + +## References +- Batch manifest: `references/av-batch-manifest.md` +- Default thresholds: `references/default-thresholds.json` + +## Quality bar +- Alignment must be verified before metric runs. +- Metrics must be repeatable and tied to explicit thresholds. +- Visual or audio mismatches must be paired with evidence artifacts. diff --git a/skills/static-recomp-av-compare/agents/openai.yaml b/skills/static-recomp-av-compare/agents/openai.yaml new file mode 100644 index 0000000..fbf8234 --- /dev/null +++ b/skills/static-recomp-av-compare/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Av Compare" + short_description: "Help with Static Recomp Av Compare tasks" diff --git a/skills/static-recomp-av-compare/references/av-batch-manifest.md b/skills/static-recomp-av-compare/references/av-batch-manifest.md new file mode 100644 index 0000000..271f7ec --- /dev/null +++ b/skills/static-recomp-av-compare/references/av-batch-manifest.md @@ -0,0 +1,56 @@ +# A/V Batch Manifest (v1) + +This manifest drives `scripts/batch_compare_av.py`. + +## Schema +Top-level object: +- `schema_version` (string, required): use `v1`. +- `scenes` (array, required). + +Scene entry keys: +- `id` (string, required) +- `label` (string, optional) +- `ref` (string, required): path to reference video +- `test` (string, required): path to recompiled video +- `out_dir` (string, required): output directory +- `width` (number, optional) +- `height` (number, optional) +- `fps` (number, optional) +- `audio_rate` (number, optional) +- `offset` (number, optional) +- `trim_start` (number, optional) +- `duration` (number, optional) +- `no_vmaf` (boolean, optional) +- `thresholds` (string, optional): path to thresholds JSON + +## Thresholds JSON +Keys: +- `ssim_min` (number) +- `psnr_min` (number) +- `vmaf_min` (number) +- `audio_lufs_delta_max` (number) +- `audio_peak_delta_max` (number) + +## Example +```json +{ + "schema_version": "v1", + "scenes": [ + { + "id": "SCN-001", + "label": "boot-to-menu", + "ref": "captures/ref/boot.mp4", + "test": "captures/recomp/boot.mp4", + "out_dir": "reports/boot", + "width": 1920, + "height": 1080, + "fps": 60, + "audio_rate": 48000, + "offset": 0.25, + "trim_start": 5.0, + "duration": 30.0, + "thresholds": "thresholds/default.json" + } + ] +} +``` diff --git a/skills/static-recomp-av-compare/references/default-thresholds.json b/skills/static-recomp-av-compare/references/default-thresholds.json new file mode 100644 index 0000000..70c882f --- /dev/null +++ b/skills/static-recomp-av-compare/references/default-thresholds.json @@ -0,0 +1,7 @@ +{ + "ssim_min": 0.95, + "psnr_min": 35.0, + "vmaf_min": 90.0, + "audio_lufs_delta_max": 2.0, + "audio_peak_delta_max": 2.0 +} diff --git a/skills/static-recomp-av-compare/scripts/batch_compare_av.py b/skills/static-recomp-av-compare/scripts/batch_compare_av.py new file mode 100755 index 0000000..899e04c --- /dev/null +++ b/skills/static-recomp-av-compare/scripts/batch_compare_av.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 +""" +Batch A/V comparison using a manifest. + +Manifest format: JSON with a top-level "scenes" array. +Each scene entry must include ref, test, out_dir, and id. +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path +from typing import Any, Dict, List + + +class BatchError(Exception): + pass + + +def load_manifest(path: Path) -> Dict[str, Any]: + if not path.exists(): + raise BatchError(f"Manifest not found: {path}") + return json.loads(path.read_text()) + + +def run_command(cmd: List[str]) -> int: + return subprocess.call(cmd) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Batch compare A/V scenes from a manifest.") + parser.add_argument("manifest", help="Path to manifest JSON") + parser.add_argument("--stop-on-fail", action="store_true", help="Stop after first failure") + parser.add_argument("--thresholds", help="Thresholds JSON applied to all scenes") + + args = parser.parse_args() + + manifest = load_manifest(Path(args.manifest)) + scenes = manifest.get("scenes") + if not isinstance(scenes, list) or not scenes: + raise BatchError("Manifest must include a non-empty 'scenes' array") + + base_dir = Path(args.manifest).resolve().parent + results = [] + failures = 0 + + for scene in scenes: + if not isinstance(scene, dict): + raise BatchError("Scene entries must be objects") + scene_id = scene.get("id") or scene.get("label") + if not scene_id: + raise BatchError("Scene entry missing 'id'") + + ref = scene.get("ref") + test = scene.get("test") + out_dir = scene.get("out_dir") + if not ref or not test or not out_dir: + raise BatchError(f"Scene {scene_id} missing ref/test/out_dir") + + ref_path = str((base_dir / ref).resolve()) if not Path(ref).is_absolute() else ref + test_path = str((base_dir / test).resolve()) if not Path(test).is_absolute() else test + out_path = str((base_dir / out_dir).resolve()) if not Path(out_dir).is_absolute() else out_dir + + compare_cmd = [ + sys.executable, + str(Path(__file__).with_name("compare_av.py")), + "--ref", + ref_path, + "--test", + test_path, + "--out-dir", + out_path, + "--label", + scene.get("label", scene_id), + ] + + for key in ("width", "height", "fps", "audio_rate", "offset", "trim_start", "duration"): + if key in scene and scene[key] is not None: + compare_cmd.extend([f"--{key.replace('_', '-')}", str(scene[key])]) + + if scene.get("no_vmaf"): + compare_cmd.append("--no-vmaf") + + status = run_command(compare_cmd) + summary_path = str(Path(out_path) / "summary.json") + + threshold_file = scene.get("thresholds") or args.thresholds + check_status = None + pass_fail_path = None + if threshold_file: + threshold_path = ( + str((base_dir / threshold_file).resolve()) + if not Path(threshold_file).is_absolute() + else threshold_file + ) + check_cmd = [ + sys.executable, + str(Path(__file__).with_name("check_summary.py")), + summary_path, + "--thresholds", + threshold_path, + ] + check_status = run_command(check_cmd) + pass_fail_path = str(Path(summary_path).with_name("pass_fail.json")) + + scene_result = { + "id": scene_id, + "compare_status": status, + "check_status": check_status, + "summary": summary_path, + "pass_fail": pass_fail_path, + } + results.append(scene_result) + + if status != 0 or (check_status is not None and check_status != 0): + failures += 1 + if args.stop_on_fail: + break + + output = { + "manifest": str(Path(args.manifest).resolve()), + "scenes": results, + "failures": failures, + "status": "fail" if failures else "pass", + } + + output_path = Path(args.manifest).with_name("batch_summary.json") + output_path.write_text(json.dumps(output, indent=2)) + print(f"Wrote {output_path}") + + return 1 if failures else 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except BatchError as exc: + print(f"error: {exc}", file=sys.stderr) + raise SystemExit(2) diff --git a/skills/static-recomp-av-compare/scripts/check_summary.py b/skills/static-recomp-av-compare/scripts/check_summary.py new file mode 100755 index 0000000..6c7f116 --- /dev/null +++ b/skills/static-recomp-av-compare/scripts/check_summary.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +Convert an A/V comparison summary.json into pass/fail based on thresholds. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Dict, Optional + + +DEFAULT_THRESHOLDS = { + "ssim_min": 0.95, + "psnr_min": 35.0, + "vmaf_min": 90.0, + "audio_lufs_delta_max": 2.0, + "audio_peak_delta_max": 2.0, +} + + +class ValidationError(Exception): + pass + + +def load_json(path: Path) -> Dict[str, Any]: + if not path.exists(): + raise ValidationError(f"File not found: {path}") + return json.loads(path.read_text()) + + +def get_metric(summary: Dict[str, Any], key_path: str) -> Optional[float]: + current: Any = summary + for key in key_path.split("."): + if not isinstance(current, dict) or key not in current: + return None + current = current[key] + if isinstance(current, (int, float)): + return float(current) + return None + + +def main() -> int: + parser = argparse.ArgumentParser(description="Check summary.json against thresholds.") + parser.add_argument("summary", help="Path to summary.json") + parser.add_argument("--thresholds", help="Path to thresholds.json") + parser.add_argument("--out", help="Output JSON path", default="") + parser.add_argument("--strict", action="store_true", help="Fail if a metric is missing") + + args = parser.parse_args() + + summary = load_json(Path(args.summary)) + + thresholds = dict(DEFAULT_THRESHOLDS) + if args.thresholds: + thresholds.update(load_json(Path(args.thresholds))) + + results = [] + failures = 0 + + def check_min(label: str, value: Optional[float], threshold: float) -> None: + nonlocal failures + status = "pass" + if value is None: + status = "missing" + if args.strict: + failures += 1 + elif value < threshold: + status = "fail" + failures += 1 + results.append({"metric": label, "value": value, "threshold": threshold, "status": status}) + + def check_max(label: str, value: Optional[float], threshold: float) -> None: + nonlocal failures + status = "pass" + if value is None: + status = "missing" + if args.strict: + failures += 1 + elif value > threshold: + status = "fail" + failures += 1 + results.append({"metric": label, "value": value, "threshold": threshold, "status": status}) + + ssim_avg = get_metric(summary, "video.ssim.average") + psnr_avg = get_metric(summary, "video.psnr.average") + vmaf_avg = get_metric(summary, "video.vmaf.average") + + ref_lufs = get_metric(summary, "audio.reference.integrated_lufs") + test_lufs = get_metric(summary, "audio.test.integrated_lufs") + lufs_delta = None if ref_lufs is None or test_lufs is None else abs(ref_lufs - test_lufs) + + ref_peak = get_metric(summary, "audio.reference.true_peak_dbtp") + test_peak = get_metric(summary, "audio.test.true_peak_dbtp") + peak_delta = None if ref_peak is None or test_peak is None else abs(ref_peak - test_peak) + + check_min("ssim_avg", ssim_avg, float(thresholds["ssim_min"])) + check_min("psnr_avg", psnr_avg, float(thresholds["psnr_min"])) + + if vmaf_avg is not None: + check_min("vmaf_avg", vmaf_avg, float(thresholds["vmaf_min"])) + else: + results.append({ + "metric": "vmaf_avg", + "value": None, + "threshold": float(thresholds["vmaf_min"]), + "status": "missing", + }) + if args.strict: + failures += 1 + + check_max("audio_lufs_delta", lufs_delta, float(thresholds["audio_lufs_delta_max"])) + check_max("audio_peak_delta", peak_delta, float(thresholds["audio_peak_delta_max"])) + + output = { + "label": summary.get("label"), + "summary_path": str(Path(args.summary).resolve()), + "thresholds": thresholds, + "checks": results, + "status": "fail" if failures else "pass", + "failures": failures, + } + + out_path = Path(args.out) if args.out else Path(args.summary).with_name("pass_fail.json") + out_path.write_text(json.dumps(output, indent=2)) + print(f"Wrote {out_path}") + + return 1 if failures else 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except ValidationError as exc: + print(f"error: {exc}", file=sys.stderr) + raise SystemExit(2) diff --git a/skills/static-recomp-av-compare/scripts/compare_av.py b/skills/static-recomp-av-compare/scripts/compare_av.py new file mode 100755 index 0000000..c8dd7f1 --- /dev/null +++ b/skills/static-recomp-av-compare/scripts/compare_av.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +""" +Compare reference and test A/V captures and produce a summary report. + +Requires: ffmpeg on PATH. libvmaf is optional. +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + + +SSIM_RE = re.compile(r"All:(?P[0-9.]+)") +PSNR_RE = re.compile(r"psnr_avg:(?P(?:inf|[0-9.]+))") +EBU_I_RE = re.compile(r"\bI:\s*(?P-?\d+(?:\.\d+)?)\s*LUFS") +EBU_PEAK_RE = re.compile(r"\bPeak:\s*(?P-?\d+(?:\.\d+)?)\s*dBTP") + + +class RunError(Exception): + pass + + +def check_ffmpeg() -> None: + if shutil.which("ffmpeg") is None: + raise RunError("ffmpeg not found in PATH. Install ffmpeg to use this script.") + + +def has_libvmaf() -> bool: + try: + result = subprocess.run( + ["ffmpeg", "-hide_banner", "-filters"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError: + return False + return "libvmaf" in result.stdout + + +def run(cmd: List[str]) -> None: + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as exc: + raise RunError(f"Command failed: {' '.join(cmd)}") from exc + + +def run_capture(cmd: List[str]) -> str: + try: + result = subprocess.run(cmd, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as exc: + raise RunError(f"Command failed: {' '.join(cmd)}") from exc + return result.stderr + "\n" + result.stdout + + +def parse_ssim(path: Path) -> Dict[str, Any]: + values: List[float] = [] + if not path.exists(): + return {"samples": 0, "average": None} + for line in path.read_text().splitlines(): + match = SSIM_RE.search(line) + if match: + values.append(float(match.group("all"))) + if not values: + return {"samples": 0, "average": None} + return {"samples": len(values), "average": sum(values) / len(values)} + + +def parse_psnr(path: Path) -> Dict[str, Any]: + values: List[float] = [] + if not path.exists(): + return {"samples": 0, "average": None} + for line in path.read_text().splitlines(): + match = PSNR_RE.search(line) + if match: + value = match.group("avg") + if value == "inf": + continue + values.append(float(value)) + if not values: + return {"samples": 0, "average": None} + return {"samples": len(values), "average": sum(values) / len(values)} + + +def parse_vmaf(path: Path) -> Dict[str, Any]: + if not path.exists(): + return {"samples": 0, "average": None, "min": None, "max": None} + data = json.loads(path.read_text()) + frames = data.get("frames", []) + values = [frame.get("metrics", {}).get("vmaf") for frame in frames] + values = [value for value in values if isinstance(value, (int, float))] + if not values: + return {"samples": 0, "average": None, "min": None, "max": None} + return { + "samples": len(values), + "average": sum(values) / len(values), + "min": min(values), + "max": max(values), + } + + +def parse_ebur128(output: str) -> Dict[str, Optional[float]]: + integrated = None + true_peak = None + for line in output.splitlines(): + match_i = EBU_I_RE.search(line) + if match_i: + integrated = float(match_i.group("i")) + match_peak = EBU_PEAK_RE.search(line) + if match_peak: + true_peak = float(match_peak.group("peak")) + return {"integrated_lufs": integrated, "true_peak_dbtp": true_peak} + + +def build_video_filter(width: Optional[int], height: Optional[int], fps: Optional[float]) -> str: + parts = [] + if width and height: + parts.append(f"scale={width}:{height}:flags=bicubic") + if fps: + parts.append(f"fps={fps}") + parts.append("setsar=1") + return ",".join(parts) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Compare reference vs test A/V captures.") + parser.add_argument("--ref", required=True, help="Reference video file") + parser.add_argument("--test", required=True, help="Test video file") + parser.add_argument("--out-dir", required=True, help="Output directory") + parser.add_argument("--label", default="comparison", help="Label for this run") + parser.add_argument("--width", type=int, help="Force video width") + parser.add_argument("--height", type=int, help="Force video height") + parser.add_argument("--fps", type=float, help="Force video fps") + parser.add_argument("--audio-rate", type=int, default=48000, help="Audio sample rate") + parser.add_argument("--offset", type=float, default=0.0, help="Offset reference by seconds") + parser.add_argument("--trim-start", type=float, default=0.0, help="Trim start seconds") + parser.add_argument("--duration", type=float, help="Duration in seconds") + parser.add_argument("--no-vmaf", action="store_true", help="Skip VMAF even if available") + + args = parser.parse_args() + + check_ffmpeg() + + out_dir = Path(args.out_dir).resolve() + metrics_dir = out_dir / "metrics" + metrics_dir.mkdir(parents=True, exist_ok=True) + + ssim_path = metrics_dir / "ssim.log" + psnr_path = metrics_dir / "psnr.log" + vmaf_path = metrics_dir / "vmaf.json" + + filter_parts = [] + vfilter = build_video_filter(args.width, args.height, args.fps) + + filter_parts.append(f"[0:v]{vfilter}[v0]") + filter_parts.append(f"[1:v]{vfilter}[v1]") + filter_parts.append("[v0]split=3[v0a][v0b][v0c]") + filter_parts.append("[v1]split=3[v1a][v1b][v1c]") + filter_parts.append(f"[v0a][v1a]ssim=stats_file={ssim_path}") + filter_parts.append(f"[v0b][v1b]psnr=stats_file={psnr_path}") + + use_vmaf = (not args.no_vmaf) and has_libvmaf() + if use_vmaf: + filter_parts.append( + f"[v0c][v1c]libvmaf=log_path={vmaf_path}:log_fmt=json" + ) + + filter_complex = ";".join(filter_parts) + + input_opts: List[str] = [] + if args.offset != 0.0: + input_opts += ["-itsoffset", str(args.offset)] + if args.trim_start: + input_opts += ["-ss", str(args.trim_start)] + if args.duration: + input_opts += ["-t", str(args.duration)] + + ref_input = input_opts + ["-i", args.ref] + + test_input: List[str] = [] + if args.trim_start: + test_input += ["-ss", str(args.trim_start)] + if args.duration: + test_input += ["-t", str(args.duration)] + test_input += ["-i", args.test] + + cmd = [ + "ffmpeg", + "-hide_banner", + "-y", + *ref_input, + *test_input, + "-filter_complex", + filter_complex, + "-f", + "null", + "-", + ] + + run(cmd) + + ref_audio_log = metrics_dir / "ref_ebur128.log" + test_audio_log = metrics_dir / "test_ebur128.log" + + ref_audio_cmd = [ + "ffmpeg", + "-hide_banner", + "-y", + *ref_input, + "-filter_complex", + f"[0:a]aresample={args.audio_rate},ebur128=peak=true:framelog={ref_audio_log}", + "-f", + "null", + "-", + ] + + test_audio_cmd = [ + "ffmpeg", + "-hide_banner", + "-y", + *test_input, + "-filter_complex", + f"[0:a]aresample={args.audio_rate},ebur128=peak=true:framelog={test_audio_log}", + "-f", + "null", + "-", + ] + + ref_audio_output = run_capture(ref_audio_cmd) + test_audio_output = run_capture(test_audio_cmd) + + summary = { + "label": args.label, + "inputs": { + "reference": os.path.abspath(args.ref), + "test": os.path.abspath(args.test), + }, + "settings": { + "width": args.width, + "height": args.height, + "fps": args.fps, + "audio_rate": args.audio_rate, + "offset": args.offset, + "trim_start": args.trim_start, + "duration": args.duration, + "vmaf": use_vmaf, + }, + "video": { + "ssim": parse_ssim(ssim_path), + "psnr": parse_psnr(psnr_path), + "vmaf": parse_vmaf(vmaf_path) if use_vmaf else None, + }, + "audio": { + "reference": parse_ebur128(ref_audio_output), + "test": parse_ebur128(test_audio_output), + }, + "artifacts": { + "ssim_log": str(ssim_path), + "psnr_log": str(psnr_path), + "vmaf_log": str(vmaf_path) if use_vmaf else None, + "ref_ebur128_log": str(ref_audio_log), + "test_ebur128_log": str(test_audio_log), + }, + } + + summary_path = out_dir / "summary.json" + summary_path.write_text(json.dumps(summary, indent=2)) + + print(f"Wrote summary to {summary_path}") + + +if __name__ == "__main__": + try: + main() + except RunError as exc: + print(f"error: {exc}", file=sys.stderr) + sys.exit(1) diff --git a/skills/static-recomp-batch-harness/SKILL.md b/skills/static-recomp-batch-harness/SKILL.md new file mode 100644 index 0000000..48cc6ba --- /dev/null +++ b/skills/static-recomp-batch-harness/SKILL.md @@ -0,0 +1,51 @@ +--- +name: static-recomp-batch-harness +description: Design batch execution harnesses for static recompilation across many titles, including run manifests, artifact organization, and automated pass/fail gates. Use when building or improving large-scale static recompilation pipelines or catalog-wide validation. +--- + +# Static Recomp Batch Harness + +## Overview +Scale static recompilation across many titles by standardizing inputs, outputs, and validation gates. + +## Workflow +1. Build a run manifest. + - Track title, version, region, input availability, and status. + - Record required hardware, emulator versions, and runtime assumptions. + - Use `references/manifest-schema.md` as the canonical schema. +2. Define the standard pipeline per title. + - Intake and validate inputs. + - Build or recompile. + - Run validation captures. + - Compare against references. + - Record summary metrics and artifacts. +3. Set hard gates for automation. + - Fail fast on missing inputs or build failures. + - Mark soft failures for visual/audio mismatches that need review. + - Encode thresholds for performance regressions. +4. Normalize artifact layout. + - Use stable, predictable paths for each title and run. + - Keep raw captures, derived metrics, and reports separate. + - Store metadata next to artifacts for reproducibility. + - See `references/pipeline-layout.md` for a sample directory layout. +5. Enable safe parallelism. + - Bound CPU/GPU usage and I/O. + - Ensure per-title isolation to avoid data collisions. + - Use deterministic seed values for replayable tests. +6. Provide human review hooks. + - Generate a compact report with top mismatches and links to artifacts. + - Allow manual override or acceptance for known deltas. + +## Outputs +- A run manifest with status tracking. +- A consistent artifact directory schema. +- A summary report per batch with pass/fail and top regressions. + +## References +- Manifest schema: `references/manifest-schema.md` +- Sample pipeline layout: `references/pipeline-layout.md` + +## Quality bar +- Batch runs must be reproducible with stable inputs and settings. +- Failures must be classified with enough context to triage quickly. +- The harness should avoid requiring proprietary assets beyond user-provided inputs. diff --git a/skills/static-recomp-batch-harness/agents/openai.yaml b/skills/static-recomp-batch-harness/agents/openai.yaml new file mode 100644 index 0000000..bb5a5cc --- /dev/null +++ b/skills/static-recomp-batch-harness/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Batch Harness" + short_description: "Help with Static Recomp Batch Harness tasks" diff --git a/skills/static-recomp-batch-harness/references/manifest-schema.md b/skills/static-recomp-batch-harness/references/manifest-schema.md new file mode 100644 index 0000000..3179e50 --- /dev/null +++ b/skills/static-recomp-batch-harness/references/manifest-schema.md @@ -0,0 +1,113 @@ +# Batch Manifest Schema (v1) + +This schema is intended for catalog-scale batch runs. It records per-title inputs, +validation targets, and status. Store as JSON or TOML; keys below are canonical. + +## Top-level +- `schema_version` (string, required): Use `v1`. +- `batch_id` (string, required): Stable identifier for the run. +- `created_at` (string, required): ISO 8601 timestamp. +- `toolchain` (object, required): Versions for the pipeline and tools. +- `global_defaults` (object, optional): Shared defaults for titles. +- `titles` (array, required): Per-title records. + +## toolchain +- `pipeline_version` (string) +- `runtime_version` (string) +- `ffmpeg_version` (string) +- `emulator_version` (string, optional) + +## global_defaults +- `resolution` (string, example: `1920x1080`) +- `frame_rate` (number, example: `60`) +- `audio_rate` (number, example: `48000`) +- `renderer_settings` (string) +- `metrics_thresholds` (object) + +## titles[] +- `title_id` (string, required) +- `title_name` (string, required) +- `version` (string, required) +- `region` (string, required) +- `build_id` (string, optional) +- `inputs` (object, required) +- `validation` (object, required) +- `status` (object, required) +- `artifacts` (object, optional) + +## inputs +- `provenance_record` (string, required): Path to provenance file. +- `reference_captures` (array, required): List of reference capture ids. +- `input_traces` (array, required): List of trace ids. + +## validation +- `scene_list` (array, required): Scene ids used for comparison. +- `targets` (object, required): Per-title overrides for resolution, fps, audio. +- `metrics_thresholds` (object, optional): Per-title overrides. + +## status +- `state` (string, required): `pending`, `running`, `passed`, `failed`, `needs_review`. +- `last_updated` (string, required): ISO 8601 timestamp. +- `notes` (string, optional) + +## artifacts +- `report_path` (string, optional) +- `metrics_dir` (string, optional) +- `captures_dir` (string, optional) + +## Example (JSON) +```json +{ + "schema_version": "v1", + "batch_id": "switch-2026-02-03", + "created_at": "2026-02-03T09:20:00Z", + "toolchain": { + "pipeline_version": "0.1.0", + "runtime_version": "0.1.0", + "ffmpeg_version": "6.1" + }, + "global_defaults": { + "resolution": "1920x1080", + "frame_rate": 60, + "audio_rate": 48000, + "renderer_settings": "default", + "metrics_thresholds": { + "ssim_min": 0.95, + "psnr_min": 35.0, + "vmaf_min": 90.0, + "audio_lufs_delta_max": 2.0 + } + }, + "titles": [ + { + "title_id": "TID-0001", + "title_name": "Example Title", + "version": "1.0.0", + "region": "US", + "build_id": "ABCD1234", + "inputs": { + "provenance_record": "provenance/TID-0001.toml", + "reference_captures": ["REF-001"], + "input_traces": ["TRACE-001"] + }, + "validation": { + "scene_list": ["SCN-001", "SCN-002"], + "targets": { + "resolution": "1920x1080", + "frame_rate": 60, + "audio_rate": 48000 + } + }, + "status": { + "state": "pending", + "last_updated": "2026-02-03T09:20:00Z" + }, + "artifacts": { + "report_path": "reports/TID-0001/summary.json", + "metrics_dir": "reports/TID-0001/metrics", + "captures_dir": "captures/TID-0001" + } + } + ] +} +``` diff --git a/skills/static-recomp-batch-harness/references/pipeline-layout.md b/skills/static-recomp-batch-harness/references/pipeline-layout.md new file mode 100644 index 0000000..a90ea2f --- /dev/null +++ b/skills/static-recomp-batch-harness/references/pipeline-layout.md @@ -0,0 +1,55 @@ +# Sample Pipeline Layout + +This is a recommended directory layout for batch execution. Adjust names to match +local conventions, but keep the separation of inputs, captures, metrics, and reports. + +``` +workspace/ + manifests/ + batch-2026-02-03.json + inputs/ + provenance/ + TID-0001.toml + traces/ + TRACE-001.json + references/ + REF-001.mp4 + builds/ + TID-0001/ + recomp/ + Cargo.toml + logs/ + build.log + runs/ + TID-0001/ + captures/ + recomp.mp4 + metrics/ + ssim.log + psnr.log + vmaf.json + ref_ebur128.log + test_ebur128.log + reports/ + summary.json + summary.txt + perf/ + frame_times.csv + gpu_stats.json +``` + +## Per-title pipeline stages +1. Intake + - Validate provenance file. + - Verify required reference captures and input traces exist. +2. Build + - Run recompilation and capture build logs. +3. Run + capture + - Execute with deterministic input trace. + - Store capture video/audio and raw logs. +4. Compare + - Run A/V metrics and produce summary. +5. Performance profile + - Capture frame-time and resource metrics. +6. Report + - Emit per-title summary and update manifest status. diff --git a/skills/static-recomp-input-replay/SKILL.md b/skills/static-recomp-input-replay/SKILL.md new file mode 100644 index 0000000..9304be8 --- /dev/null +++ b/skills/static-recomp-input-replay/SKILL.md @@ -0,0 +1,39 @@ +--- +name: static-recomp-input-replay +description: Design deterministic input capture and replay harnesses for static recompilation validation. Use when recording controller input traces, building replay systems, or aligning emulator and recompiled runs. +--- + +# Static Recomp Input Replay + +## Overview +Record and replay deterministic input traces so emulator and recompiled builds can be compared using the same stimuli. + +## Workflow +1. Define the input trace format. + - Use time-ordered events with timestamps or frame indices. + - Record device type, buttons, axes, and analog ranges. +2. Select a stable time base. + - Prefer frame-indexed ticks if frame rate is locked. + - Otherwise record high-resolution timestamps with explicit units. +3. Capture input traces. + - Record from a controlled session with known settings. + - Save seed values for RNG and time sources when possible. +4. Normalize and validate traces. + - Ensure monotonic time and no dropped events. + - Quantize to fixed tick rate if needed. +5. Build a replay harness. + - Inject inputs into emulator and recompiled runtime. + - Log applied inputs and any rejected events. +6. Validate determinism. + - Replay the same trace twice and compare hashes or state summaries. + - Flag nondeterministic outcomes as blocking issues. + +## Outputs +- Input trace files with stable time bases. +- A replay harness config describing injection method and target build. +- Determinism logs and hashes per run. + +## Quality bar +- Input traces must be replayable without manual intervention. +- Determinism must be tested and documented for each target build. +- Time bases and units must be explicit and consistent. diff --git a/skills/static-recomp-input-replay/agents/openai.yaml b/skills/static-recomp-input-replay/agents/openai.yaml new file mode 100644 index 0000000..729cc6f --- /dev/null +++ b/skills/static-recomp-input-replay/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Input Replay" + short_description: "Help with Static Recomp Input Replay tasks" diff --git a/skills/static-recomp-perf-profile/SKILL.md b/skills/static-recomp-perf-profile/SKILL.md new file mode 100644 index 0000000..2957f63 --- /dev/null +++ b/skills/static-recomp-perf-profile/SKILL.md @@ -0,0 +1,38 @@ +--- +name: static-recomp-perf-profile +description: Profile and compare performance of statically recompiled builds against reference runs, including frame time, CPU/GPU utilization, memory, and load times. Use when measuring performance regressions or tuning runtime behavior. +--- + +# Static Recomp Performance Profile + +## Overview +Measure performance with repeatable inputs and produce regression reports that map to specific subsystems. + +## Workflow +1. Define performance targets. + - Frame time budget and variance. + - Load times for boot and scene transitions. + - CPU/GPU utilization ranges. +2. Use deterministic inputs. + - Reuse input traces for comparable runs. + - Ensure identical settings and hardware. +3. Capture metrics. + - Collect frame times and present 1% low, 0.1% low, and average. + - Record CPU, GPU, and memory usage. + - Track shader compilation stutter or asset streaming spikes. +4. Compare against baseline. + - Use the same scene list as A/V validation. + - Flag regressions above threshold. +5. Attribute causes. + - Map spikes to pipeline stages (CPU translation, GPU command handling, audio mixing). + - Collect logs or flamegraphs if needed. + +## Outputs +- A performance report with per-scene metrics. +- A regression summary with thresholds and suspected causes. +- A list of profiles or traces attached to the report. + +## Quality bar +- Results must be gathered under consistent settings and hardware. +- Each regression must include scene context and a suspected subsystem. +- Performance changes must be reproducible with the same input trace. diff --git a/skills/static-recomp-perf-profile/agents/openai.yaml b/skills/static-recomp-perf-profile/agents/openai.yaml new file mode 100644 index 0000000..da63e24 --- /dev/null +++ b/skills/static-recomp-perf-profile/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Perf Profile" + short_description: "Help with Static Recomp Perf Profile tasks" diff --git a/skills/static-recomp-reference-capture/SKILL.md b/skills/static-recomp-reference-capture/SKILL.md new file mode 100644 index 0000000..bb78387 --- /dev/null +++ b/skills/static-recomp-reference-capture/SKILL.md @@ -0,0 +1,43 @@ +--- +name: static-recomp-reference-capture +description: Collect and normalize reference video/audio and metadata for validating static recompilation outputs. Use when creating capture pipelines, curating reference footage, or standardizing A/V inputs for comparison. +--- + +# Static Recomp Reference Capture + +## Overview +Gather clean, legally obtained reference captures and normalize them into a consistent format for automated comparison. + +## Workflow +1. Confirm legal capture sources. + - Prefer user-provided hardware captures or authorized recordings. + - If using emulator footage, record emulator version and settings. + - Do not ingest proprietary binaries or keys. +2. Standardize capture settings. + - Lock resolution, aspect ratio, and frame rate. + - Disable dynamic resolution and variable frame pacing when possible. + - Set audio sample rate (example: 48 kHz) and channel layout. +3. Capture anchor scenes. + - Identify scenes that stress rendering, audio, UI, and gameplay. + - Capture from boot to first interactive state. + - Include a repeatable gameplay loop segment. +4. Normalize formats. + - Re-encode to a common container and codec. + - Trim to exact segments with timestamps. + - Preserve original captures as raw archives. +5. Produce metadata. + - Capture start time, duration, settings, and source details. + - Record any known differences (patches, mods, settings changes). +6. Validate capture quality. + - Check for dropped frames and audio desync. + - Ensure overlays, watermarks, or UI from capture tools are absent. + +## Outputs +- Normalized reference captures (video and audio). +- A metadata file per capture with settings and source details. +- A scene list with timestamps for automated comparison. + +## Quality bar +- Reference captures must be stable, reproducible, and legally obtained. +- Metadata must be sufficient to reproduce the capture. +- Segment selection must cover core rendering, audio, and gameplay behaviors. diff --git a/skills/static-recomp-reference-capture/agents/openai.yaml b/skills/static-recomp-reference-capture/agents/openai.yaml new file mode 100644 index 0000000..505b5d8 --- /dev/null +++ b/skills/static-recomp-reference-capture/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Reference Capture" + short_description: "Help with Static Recomp Reference Capture tasks" diff --git a/skills/static-recomp-regression-triage/SKILL.md b/skills/static-recomp-regression-triage/SKILL.md new file mode 100644 index 0000000..d26e480 --- /dev/null +++ b/skills/static-recomp-regression-triage/SKILL.md @@ -0,0 +1,43 @@ +--- +name: static-recomp-regression-triage +description: Triage visual, audio, input, and performance regressions in static recompilation results by mapping symptoms to pipeline components and proposing next debugging steps. Use when investigation or root-cause analysis is needed. +--- + +# Static Recomp Regression Triage + +## Overview +Classify regressions and map them to likely pipeline components so fixes can be prioritized quickly. + +## Workflow +1. Classify the failure. + - Crash, hang, or boot failure. + - Visual mismatch or missing draw calls. + - Audio artifacts, drift, or missing channels. + - Input latency or incorrect mapping. + - Performance regression or stutter. +2. Narrow to the smallest reproducible scene. + - Use the existing input trace and scene list. + - Trim captures to the shortest failing segment. +3. Map to pipeline components. + - Loader and relocation issues. + - CPU instruction semantics or ABI mismatches. + - OS or service stubs. + - GPU command translation and shader issues. + - Audio mixing and timing. +4. Collect targeted evidence. + - Logs, trace snippets, and minimal repro data. + - Side-by-side captures and metric deltas. +5. Propose next steps. + - Candidate instrumentation to add. + - Specific unit tests or microbenchmarks to create. + - Suggested code areas to inspect. + +## Outputs +- A triage report with a root-cause hypothesis. +- A prioritized fix list with evidence links. +- A minimal reproduction recipe. + +## Quality bar +- Each hypothesis must be backed by concrete evidence. +- Proposed fixes must be testable with the same validation harness. +- The report must enable another engineer to reproduce the issue quickly. diff --git a/skills/static-recomp-regression-triage/agents/openai.yaml b/skills/static-recomp-regression-triage/agents/openai.yaml new file mode 100644 index 0000000..10c6f70 --- /dev/null +++ b/skills/static-recomp-regression-triage/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Regression Triage" + short_description: "Help with Static Recomp Regression Triage tasks" diff --git a/skills/static-recomp-scope-plan/SKILL.md b/skills/static-recomp-scope-plan/SKILL.md new file mode 100644 index 0000000..277f2cc --- /dev/null +++ b/skills/static-recomp-scope-plan/SKILL.md @@ -0,0 +1,49 @@ +--- +name: static-recomp-scope-plan +description: Plan and scope a static recompilation effort with clear legal input boundaries, behavioral targets, and validation acceptance criteria. Use when starting or rebooting a static recompilation project, or when a user asks for a validation plan, scope definition, or success criteria. +--- + +# Static Recomp Scope Plan + +## Overview +Define what "correct" means for a static recompilation and produce a concrete validation plan that can scale across many titles. + +## Workflow +1. Confirm legal and preservation boundaries. + - Require user-provided, legally obtained inputs. + - Do not request or store proprietary binaries, keys, or assets. + - Keep outputs and metadata separated from inputs. +2. Identify target build and execution envelope. + - Capture title, version, region, build ID, and platform assumptions. + - State target runtime environment (OS, GPU, audio stack, input devices). + - Set frame rate and resolution targets up front. +3. Define the behavioral surface area. + - Boot and menu flow. + - Core gameplay loops and scene transitions. + - Input handling and response timing. + - Audio, rendering, and loading behavior. +4. Build a validation matrix. + - Rows: features or scenes. + - Columns: video, audio, input, performance, stability. + - Mark each cell with acceptance criteria and a verification method. +5. Select reference sources. + - Prefer legal captures from hardware or user-supplied recordings. + - If using emulator footage, record emulator version and settings. + - Pick multiple anchor scenes that cover rendering, UI, audio, and gameplay. +6. Decide instrumentation and artifacts. + - Capture raw video/audio, logs, and performance counters. + - Ensure time sources are explicit and stable. + - Plan per-scene capture durations and naming conventions. +7. Define exit criteria. + - Set numeric thresholds (example: average VMAF >= 90, max audio drift < 20 ms). + - Require zero crashes and stable progression through anchor scenes. + +## Outputs +- A validation plan with acceptance criteria and a scene checklist. +- A reference capture plan describing sources, settings, and formats. +- A per-title run sheet describing artifacts to collect. + +## Quality bar +- Criteria must be measurable and repeatable. +- The plan must be runnable without proprietary assets beyond user-provided inputs. +- The plan should be optimized for batch execution across many titles. diff --git a/skills/static-recomp-scope-plan/agents/openai.yaml b/skills/static-recomp-scope-plan/agents/openai.yaml new file mode 100644 index 0000000..da69c41 --- /dev/null +++ b/skills/static-recomp-scope-plan/agents/openai.yaml @@ -0,0 +1,3 @@ +interface: + display_name: "Static Recomp Scope Plan" + short_description: "Help with Static Recomp Scope Plan tasks"