diff --git a/Cargo.lock b/Cargo.lock index 824930b..4dfba6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -52,6 +52,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.10.0" @@ -301,6 +307,12 @@ version = "0.1.0" dependencies = [ "clap", "recomp-pipeline", + "recomp-validation", + "serde", + "serde_json", + "sha2", + "tempfile", + "toml", ] [[package]] @@ -321,6 +333,7 @@ dependencies = [ name = "recomp-pipeline" version = "0.1.0" dependencies = [ + "base64", "lz4_flex", "pathdiff", "serde", @@ -341,6 +354,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "toml", ] [[package]] @@ -366,7 +380,9 @@ dependencies = [ "recomp-runtime", "serde", "serde_json", + "sha2", "tempfile", + "toml", ] [[package]] diff --git a/PLANS.md b/PLANS.md index 9bfdd74..26cbfd0 100644 --- a/PLANS.md +++ b/PLANS.md @@ -23,6 +23,10 @@ This file tracks implementation work derived from specs that do not yet have a c - SPEC-180 XCI Title Intake - SPEC-190 Video-Based Validation - SPEC-200 DKCR HD First-Level Milestone (macOS/aarch64) +- SPEC-210 Automated Recompilation Loop +- SPEC-220 Input Replay and Interaction Scripts +- SPEC-230 Reference Media Normalization +- SPEC-240 Validation Orchestration and Triage ## SPEC-000: Project Charter and Ethics Outcome @@ -85,10 +89,10 @@ Outcome - Runtime memory layout is configurable via `title.toml` while preserving a safe default. Work items -- [ ] Extend `title.toml` schema to include `runtime.memory_layout` regions. -- [ ] Validate region overlap, zero sizes, and overflow errors. -- [ ] Emit configured memory layout in `manifest.json` and generated runtime init. -- [ ] Add tests for default layout and custom layout parsing. +- [x] Extend `title.toml` schema to include `runtime.memory_layout` regions. +- [x] Validate region overlap, zero sizes, and overflow errors. +- [x] Emit configured memory layout in `manifest.json` and generated runtime init. +- [x] Add tests for default layout and custom layout parsing. Exit criteria (from SPEC-046) - Custom memory layout in `title.toml` is parsed and emitted in `manifest.json`. @@ -100,10 +104,10 @@ Outcome - Runtime memory is initialized from module segment metadata (code/rodata/data/bss). Work items -- [ ] Define segment descriptor schema and carry it through pipeline output metadata. -- [ ] Populate runtime memory regions with initial segment bytes and zeroed bss. -- [ ] Validate init sizes and bounds during initialization. -- [ ] Add tests covering initialized load/store behavior and error paths. +- [x] Define segment descriptor schema and carry it through pipeline output metadata. +- [x] Populate runtime memory regions with initial segment bytes and zeroed bss. +- [x] Validate init sizes and bounds during initialization. +- [x] Add tests covering initialized load/store behavior and error paths. Exit criteria (from SPEC-047) - A sample module with init bytes executes a load/store path against initialized memory. @@ -280,11 +284,11 @@ Outcome - Intake XCI inputs with user-supplied keys and extract code/assets deterministically. Work items -- [ ] Define the XCI intake CLI path and config schema extensions. -- [ ] Integrate keyset validation and explicit Program NCA selection. -- [ ] Extract ExeFS/NSO into deterministic segment blobs with hashes recorded. -- [ ] Emit RomFS assets to a separate asset output root and record in manifest. -- [ ] Add non-proprietary tests for intake validation and asset separation rules. +- [x] Define the XCI intake CLI path and config schema extensions. +- [x] Integrate keyset validation and explicit Program NCA selection. +- [x] Extract ExeFS/NSO into deterministic segment blobs with hashes recorded. +- [x] Emit RomFS assets to a separate asset output root and record in manifest. +- [x] Add non-proprietary tests for intake validation and asset separation rules. Exit criteria (from SPEC-180) - XCI intake emits deterministic ExeFS/NSO outputs and a manifest with hashes. @@ -295,12 +299,19 @@ Exit criteria (from SPEC-180) Outcome - Validate the recompiled output against a reference gameplay video without emulator traces. +Note +- DKCR validation is paused until the automation loop, input replay, and normalization specs land (SPEC-210/220/230/240). + Work items -- [ ] Define a reference timeline for the first level and store it in `reference_video.toml`. -- [ ] Implement a capture workflow for macOS/aarch64 runtime output. -- [ ] Add a comparison step that computes video and audio similarity metrics. -- [ ] Generate a `validation-report.json` with pass/fail and drift summaries. -- [ ] Document manual review steps for mismatches. +- [x] Define a reference timeline for the first level and store it in `reference_video.toml`. +- [x] Implement a capture workflow for macOS/aarch64 runtime output. +- [x] Add a comparison step that computes video and audio similarity metrics. +- [x] Generate a `validation-report.json` with pass/fail and drift summaries. +- [x] Document manual review steps for mismatches. + +External prerequisites (see `docs/dkcr-validation-prereqs.md`) +- Absolute paths to reference and capture artifacts (video or hashes). +- Confirmed first-level start and end timecodes. Exit criteria (from SPEC-190) - A single run produces a validation report for the first level. @@ -311,14 +322,80 @@ Exit criteria (from SPEC-190) Outcome - Produce a macOS/aarch64 static recompilation of DKCR HD that reaches and plays the first level. +Note +- DKCR validation is paused until SPEC-210/220/230/240 are implemented. + Work items -- [ ] Complete XCI intake for the DKCR HD title (SPEC-180 inputs and outputs). -- [ ] Identify required OS services and implement or stub them in the runtime. -- [ ] Implement the minimal GPU translation path needed for the first level. -- [ ] Create a per-title config and patch set for DKCR HD. -- [ ] Run video-based validation against the first level (SPEC-190). +- [x] Complete XCI intake for the DKCR HD title (SPEC-180 inputs and outputs). +- [x] Identify required OS services and implement or stub them in the runtime. +- [x] Implement the minimal GPU translation path needed for the first level. +- [x] Create a per-title config and patch set for DKCR HD. +- [x] Run video-based validation against the first level (SPEC-190). + +External prerequisites (see `docs/dkcr-validation-prereqs.md`) +- Absolute paths to DKCR reference and capture artifacts. +- Confirmed first-level start and end timecodes. Exit criteria (from SPEC-200) - The macOS/aarch64 build boots and reaches the first playable level. - First-level gameplay matches the reference video within defined tolerances. - No proprietary assets or keys are stored in the repo or build outputs. + +## SPEC-210: Automated Recompilation Loop +Outcome +- Provide a one-command automation loop for intake, build, capture, and validation. + +Work items +- [x] Define `automation.toml` schema and validator. +- [x] Implement an orchestrator CLI that runs intake -> lift -> build -> run -> capture -> validate. +- [x] Emit a deterministic `run-manifest.json` with step timings and artifact hashes. +- [x] Add resume/caching logic keyed by input hashes. +- [x] Add integration tests using non-proprietary fixtures. + +Exit criteria (from SPEC-210) +- One command runs the full loop and produces a run manifest and validation report. +- Re-running with identical inputs yields identical artifacts. +- Proprietary assets remain external. + +## SPEC-220: Input Replay and Interaction Scripts +Outcome +- Deterministic input playback aligned to reference timelines. + +Work items +- [x] Define `input_script.toml` schema with events and markers. +- [x] Implement input script loader and runtime playback module. +- [x] Add tools/tests for deterministic playback and alignment. +- [x] Document authoring and replay workflows. + +Exit criteria (from SPEC-220) +- Input scripts replay deterministically across two runs. +- Playback order is stable for simultaneous events. +- Markers align to reference timecodes. + +## SPEC-230: Reference Media Normalization +Outcome +- Normalize reference video/audio into a canonical, comparable format. + +Work items +- [x] Define canonical reference profile (resolution, fps, audio). +- [x] Implement normalization workflow and metadata capture. +- [x] Update `reference_video.toml` schema to record normalization details. +- [x] Add hash generation tests for normalized outputs. + +Exit criteria (from SPEC-230) +- Reference media can be normalized deterministically. +- Hashes for normalized outputs are stable across runs. + +## SPEC-240: Validation Orchestration and Triage +Outcome +- Automated validation with structured reports and triage summaries. + +Work items +- [x] Define `validation-config.toml` and report schema extensions. +- [x] Implement triage summary generation (drift, likely causes). +- [x] Integrate validation orchestration into the automation loop. +- [x] Add tests for report determinism and failure summaries. + +Exit criteria (from SPEC-240) +- Validation runs emit deterministic reports and triage summaries. +- Failures include actionable context and artifact references. diff --git a/RESEARCH.md b/RESEARCH.md index 7727af1..165452a 100644 --- a/RESEARCH.md +++ b/RESEARCH.md @@ -66,6 +66,17 @@ Needed research: - Jurisdiction-specific rules affecting preservation. - Best practices for open source preservation tooling. +### 8) Automation, Input Replay, and Media Normalization +- Define a stable automation loop for intake, build, capture, and validation. +- Normalize reference media and capture outputs into comparable artifacts. +- Model deterministic input replay aligned to reference timelines. + +Needed research: +- Capture tooling behavior and determinism guarantees. +- Input timing and latency characteristics for Switch titles. +- Video/audio similarity metrics and drift analysis. +- Operational checklist for DKCR validation artifacts (see `docs/dkcr-validation-prereqs.md`). + ## Seed Resources (Reviewed) - Jamulator write-up on static recompilation pitfalls and concurrency: https://andrewkelley.me/post/jamulator.html - N64Recomp repository for pipeline patterns: https://github.com/N64Recomp/N64Recomp @@ -81,6 +92,9 @@ Needed research: - hactool (XCI/NCA extraction and keyset handling): https://github.com/SciresM/hactool - hactoolnet (XCI/NCA extraction with user keys): https://github.com/Thealexbarney/hactoolnet - nstool (XCI/NCA/NSO extraction): https://github.com/jakcron/nstool +- Switch HID input services: https://switchbrew.org/wiki/HID +- FFmpeg documentation (capture, formats, filters): https://ffmpeg.org/ffmpeg.html +- FFmpeg filters (SSIM/PSNR references): https://ffmpeg.org/ffmpeg-filters.html - Ghidra SLEIGH language reference (p-code semantics): https://github.com/NationalSecurityAgency/ghidra/blob/master/GhidraDocs/languages/html/sleigh.html - sleigh library (p-code lifting implementation): https://github.com/lifting-bits/sleigh - FFmpeg filter reference for SSIM/PSNR/EBU R128 audio analysis: https://manpages.debian.org/bookworm/ffmpeg/ffmpeg-filters.1.en.html @@ -97,3 +111,4 @@ Needed research: - Which OS services are required to reach a game loop without patches? - What is the simplest graphics path that still produces correct output? - How can we generate reference traces without distributing proprietary content? +- What is the minimal input script fidelity needed for stable validation? diff --git a/ROADMAP.md b/ROADMAP.md index 4661d75..fcc9280 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -34,6 +34,7 @@ Exit criteria: - Implement a basic GPU command path or a thin translation layer. - Render a test scene from recompiled code. - Add graphics conformance tests. +- Define the automation loop inputs/outputs needed for validation. Exit criteria: - A test scene renders deterministically. @@ -43,10 +44,12 @@ Exit criteria: - Select a preservation-safe title and provide a public build pipeline. - Expand instruction coverage to what the title needs. - Document limitations and required assets. +- Stand up the automated recompilation loop with input replay and video validation. Exit criteria: - Title boots and reaches gameplay. - Performance targets met on baseline host. +- Automated validation produces a report with stable metrics. ## Phase 5: Stabilization - Harden tooling, improve diagnostics, and expand coverage. diff --git a/crates/recomp-cli/Cargo.toml b/crates/recomp-cli/Cargo.toml index 335e78c..eab9bde 100644 --- a/crates/recomp-cli/Cargo.toml +++ b/crates/recomp-cli/Cargo.toml @@ -7,6 +7,14 @@ license = "MIT OR Apache-2.0" [dependencies] clap = { version = "4.5", features = ["derive"] } recomp-pipeline = { path = "../recomp-pipeline" } +recomp-validation = { path = "../recomp-validation" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10" +toml = "0.8" + +[dev-dependencies] +tempfile = "3.10" [[bin]] name = "recomp" diff --git a/crates/recomp-cli/src/automation.rs b/crates/recomp-cli/src/automation.rs new file mode 100644 index 0000000..f439d8b --- /dev/null +++ b/crates/recomp-cli/src/automation.rs @@ -0,0 +1,1445 @@ +use recomp_pipeline::homebrew::{ + intake_homebrew, lift_homebrew, IntakeOptions, LiftMode, LiftOptions, +}; +use recomp_pipeline::xci::{intake_xci, XciIntakeOptions, XciToolPreference}; +use recomp_pipeline::{run_pipeline, PipelineOptions}; +use recomp_validation::{ + hash_audio_file, hash_frames_dir, run_video_suite, write_hash_list, CaptureVideoConfig, + HashFormat, +}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, HashMap}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Instant; + +const AUTOMATION_SCHEMA_VERSION: &str = "1"; +const RUN_MANIFEST_SCHEMA_VERSION: &str = "1"; + +#[derive(Debug, Deserialize, Clone)] +pub struct AutomationConfig { + pub schema_version: String, + pub inputs: InputsConfig, + pub outputs: OutputsConfig, + pub reference: ReferenceConfig, + pub capture: CaptureConfig, + pub commands: CommandConfig, + #[serde(default)] + pub tools: ToolsConfig, + #[serde(default)] + pub run: RunConfig, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct InputsConfig { + pub mode: InputMode, + #[serde(default)] + pub module_json: Option, + #[serde(default)] + pub nro: Option, + #[serde(default)] + pub nso: Vec, + #[serde(default)] + pub xci: Option, + #[serde(default)] + pub keys: Option, + pub provenance: PathBuf, + pub config: PathBuf, + #[serde(default)] + pub runtime_path: Option, +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(rename_all = "snake_case")] +pub enum InputMode { + Homebrew, + Xci, + Lifted, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct OutputsConfig { + pub work_root: PathBuf, + #[serde(default)] + pub intake_dir: Option, + #[serde(default)] + pub lift_dir: Option, + #[serde(default)] + pub build_dir: Option, + #[serde(default)] + pub assets_dir: Option, + #[serde(default)] + pub validation_dir: Option, + #[serde(default)] + pub log_dir: Option, + #[serde(default)] + pub run_manifest: Option, + #[serde(default)] + pub lifted_module_json: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct ReferenceConfig { + pub reference_video_toml: PathBuf, + pub capture_video_toml: PathBuf, + #[serde(default)] + pub validation_config_toml: Option, + #[serde(default)] + pub input_script_toml: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct CaptureConfig { + pub video_path: PathBuf, + pub frames_dir: PathBuf, + #[serde(default)] + pub audio_file: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct CommandConfig { + pub build: Vec, + pub run: Vec, + pub capture: Vec, + pub extract_frames: Vec, + #[serde(default)] + pub extract_audio: Option>, + #[serde(default)] + pub lift: Option>, +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct ToolsConfig { + #[serde(default)] + pub xci_tool: Option, + #[serde(default)] + pub xci_tool_path: Option, + #[serde(default)] + pub ffmpeg_path: Option, +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub enum AutomationXciTool { + Auto, + Hactool, + Hactoolnet, + Mock, +} + +impl From for XciToolPreference { + fn from(value: AutomationXciTool) -> Self { + match value { + AutomationXciTool::Auto => XciToolPreference::Auto, + AutomationXciTool::Hactool => XciToolPreference::Hactool, + AutomationXciTool::Hactoolnet => XciToolPreference::Hactoolnet, + AutomationXciTool::Mock => XciToolPreference::Mock, + } + } +} + +#[derive(Debug, Deserialize, Clone, Default)] +pub struct RunConfig { + #[serde(default = "default_resume")] + pub resume: bool, + #[serde(default)] + pub lift_entry: Option, + #[serde(default)] + pub lift_mode: Option, +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub enum LiftModeConfig { + Stub, + Decode, +} + +impl From for LiftMode { + fn from(value: LiftModeConfig) -> Self { + match value { + LiftModeConfig::Stub => LiftMode::Stub, + LiftModeConfig::Decode => LiftMode::Decode, + } + } +} + +fn default_resume() -> bool { + true +} + +#[derive(Debug)] +struct ResolvedPaths { + repo_root: PathBuf, + config_dir: PathBuf, + work_root: PathBuf, + intake_dir: PathBuf, + lift_dir: PathBuf, + build_dir: PathBuf, + assets_dir: PathBuf, + validation_dir: PathBuf, + log_dir: PathBuf, + run_manifest: PathBuf, + lifted_module_json: PathBuf, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RunManifest { + pub schema_version: String, + pub input_fingerprint: String, + pub inputs: Vec, + pub steps: Vec, + pub artifacts: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub validation_report: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RunInput { + pub name: String, + pub path: String, + pub sha256: String, + pub size: u64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RunStep { + pub name: String, + pub status: StepStatus, + pub duration_ms: u128, + #[serde(skip_serializing_if = "Option::is_none")] + pub command: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub stdout_path: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub stderr_path: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub outputs: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub notes: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum StepStatus { + Succeeded, + Failed, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct RunArtifact { + pub path: String, + pub sha256: String, + pub size: u64, + pub role: String, +} + +#[derive(Debug)] +struct RunState { + manifest: RunManifest, + artifacts: BTreeMap, + previous_steps: HashMap, + cache_valid: bool, +} + +pub fn run_automation(config_path: &Path) -> Result { + let config_path = fs::canonicalize(config_path) + .map_err(|err| format!("resolve automation config {}: {err}", config_path.display()))?; + let config_src = fs::read_to_string(&config_path) + .map_err(|err| format!("read automation config {}: {err}", config_path.display()))?; + let mut config: AutomationConfig = + toml::from_str(&config_src).map_err(|err| format!("invalid automation config: {err}"))?; + let config_dir = config_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .to_path_buf(); + config.resolve_paths(&config_dir); + config.validate()?; + + let paths = ResolvedPaths::new(&config, config_dir.clone())?; + fs::create_dir_all(&paths.work_root) + .map_err(|err| format!("create work root {}: {err}", paths.work_root.display()))?; + fs::create_dir_all(&paths.log_dir) + .map_err(|err| format!("create log dir {}: {err}", paths.log_dir.display()))?; + fs::create_dir_all(&paths.validation_dir).map_err(|err| { + format!( + "create validation dir {}: {err}", + paths.validation_dir.display() + ) + })?; + + let inputs = gather_inputs(&config, &config_path, &paths)?; + let input_fingerprint = fingerprint_inputs(&inputs); + + let previous_manifest = if config.run.resume && paths.run_manifest.exists() { + Some(load_run_manifest(&paths.run_manifest)?) + } else { + None + }; + + if let Some(previous) = &previous_manifest { + if previous.input_fingerprint == input_fingerprint + && previous + .steps + .iter() + .all(|step| step.status == StepStatus::Succeeded) + && manifest_outputs_exist(&paths, previous) + { + return Ok(previous.clone()); + } + } + + let mut artifacts = BTreeMap::new(); + let mut previous_steps = HashMap::new(); + if let Some(previous) = &previous_manifest { + if previous.input_fingerprint == input_fingerprint { + for artifact in &previous.artifacts { + artifacts.insert(artifact.path.clone(), artifact.clone()); + } + for step in &previous.steps { + previous_steps.insert(step.name.clone(), step.clone()); + } + } + } + + let mut state = RunState { + manifest: RunManifest { + schema_version: RUN_MANIFEST_SCHEMA_VERSION.to_string(), + input_fingerprint: input_fingerprint.clone(), + inputs, + steps: Vec::new(), + artifacts: Vec::new(), + validation_report: None, + }, + artifacts, + previous_steps, + cache_valid: config.run.resume, + }; + + let mut module_json_path = match config.inputs.mode { + InputMode::Lifted => config + .inputs + .module_json + .clone() + .ok_or_else(|| "inputs.module_json is required for mode=lifted".to_string())?, + _ => paths.intake_dir.join("module.json"), + }; + + if matches!(config.inputs.mode, InputMode::Homebrew | InputMode::Xci) { + run_cached_step("intake", &paths, &config, &mut state, None, |state| { + let outcome = + match config.inputs.mode { + InputMode::Homebrew => { + let report = intake_homebrew(IntakeOptions { + module_path: config.inputs.nro.clone().ok_or_else(|| { + "inputs.nro is required for mode=homebrew".to_string() + })?, + nso_paths: config.inputs.nso.clone(), + provenance_path: config.inputs.provenance.clone(), + out_dir: paths.intake_dir.clone(), + }) + .map_err(|err| format!("homebrew intake failed: {err}"))?; + module_json_path = report.module_json_path.clone(); + let mut outputs = Vec::new(); + for path in report.files_written { + outputs.push(record_artifact(state, &paths, &path, "intake_output")?); + } + StepOutcome { + status: StepStatus::Succeeded, + stdout: format!("homebrew intake wrote {} files", outputs.len()), + stderr: String::new(), + outputs, + } + } + InputMode::Xci => { + let report = + intake_xci(XciIntakeOptions { + xci_path: config.inputs.xci.clone().ok_or_else(|| { + "inputs.xci is required for mode=xci".to_string() + })?, + keys_path: config.inputs.keys.clone().ok_or_else(|| { + "inputs.keys is required for mode=xci".to_string() + })?, + config_path: None, + provenance_path: config.inputs.provenance.clone(), + out_dir: paths.intake_dir.clone(), + assets_dir: paths.assets_dir.clone(), + tool_preference: config + .tools + .xci_tool + .unwrap_or(AutomationXciTool::Auto) + .into(), + tool_path: config.tools.xci_tool_path.clone(), + }) + .map_err(|err| format!("xci intake failed: {err}"))?; + module_json_path = report.module_json_path.clone(); + let mut outputs = Vec::new(); + for path in report.files_written { + outputs.push(record_artifact(state, &paths, &path, "intake_output")?); + } + StepOutcome { + status: StepStatus::Succeeded, + stdout: format!("xci intake wrote {} files", outputs.len()), + stderr: String::new(), + outputs, + } + } + InputMode::Lifted => { + return Err("intake step not valid for mode=lifted".to_string()); + } + }; + Ok(outcome) + })?; + } + + if matches!(config.inputs.mode, InputMode::Homebrew | InputMode::Xci) { + run_cached_step( + "lift", + &paths, + &config, + &mut state, + None, + |state| match config.inputs.mode { + InputMode::Homebrew => { + let report = lift_homebrew(LiftOptions { + module_json_path: module_json_path.clone(), + out_dir: paths.lift_dir.clone(), + entry_name: config + .run + .lift_entry + .clone() + .unwrap_or_else(|| "entry".to_string()), + mode: config + .run + .lift_mode + .unwrap_or(LiftModeConfig::Decode) + .into(), + }) + .map_err(|err| format!("homebrew lift failed: {err}"))?; + module_json_path = report.module_json_path.clone(); + let output = + record_artifact(state, &paths, &report.module_json_path, "lifted_module")?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout: format!( + "lifted module emitted {} functions", + report.functions_emitted + ), + stderr: report.warnings.join("\n"), + outputs: vec![output], + }) + } + InputMode::Xci => { + let lift_command = config + .commands + .lift + .clone() + .ok_or_else(|| "commands.lift is required for mode=xci".to_string())?; + let (stdout, stderr) = run_command(&lift_command, &paths, &config)?; + let output_path = paths.lifted_module_json.clone(); + if !output_path.exists() { + return Err(format!( + "lifted module not found at {}", + output_path.display() + )); + } + module_json_path = output_path.clone(); + let output = record_artifact(state, &paths, &output_path, "lifted_module")?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs: vec![output], + }) + } + InputMode::Lifted => unreachable!(), + }, + )?; + } + + run_cached_step("pipeline", &paths, &config, &mut state, None, |state| { + let runtime_path = config + .inputs + .runtime_path + .clone() + .unwrap_or_else(|| paths.repo_root.join("crates/recomp-runtime")); + let report = run_pipeline(PipelineOptions { + module_path: module_json_path.clone(), + config_path: config.inputs.config.clone(), + provenance_path: config.inputs.provenance.clone(), + out_dir: paths.build_dir.clone(), + runtime_path, + }) + .map_err(|err| format!("pipeline failed: {err}"))?; + let mut outputs = Vec::new(); + for path in report.files_written { + outputs.push(record_artifact(state, &paths, &path, "pipeline_output")?); + } + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout: format!("pipeline wrote {} files", outputs.len()), + stderr: String::new(), + outputs, + }) + })?; + + run_cached_step( + "build", + &paths, + &config, + &mut state, + Some(config.commands.build.clone()), + |_state| { + let (stdout, stderr) = run_command(&config.commands.build, &paths, &config)?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs: Vec::new(), + }) + }, + )?; + + run_cached_step( + "run", + &paths, + &config, + &mut state, + Some(config.commands.run.clone()), + |_state| { + let (stdout, stderr) = run_command(&config.commands.run, &paths, &config)?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs: Vec::new(), + }) + }, + )?; + + run_cached_step( + "capture", + &paths, + &config, + &mut state, + Some(config.commands.capture.clone()), + |state| { + let (stdout, stderr) = run_command(&config.commands.capture, &paths, &config)?; + let mut outputs = Vec::new(); + if config.capture.video_path.exists() { + outputs.push(record_artifact( + state, + &paths, + &config.capture.video_path, + "capture_video", + )?); + } + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs, + }) + }, + )?; + + run_cached_step( + "extract_frames", + &paths, + &config, + &mut state, + Some(config.commands.extract_frames.clone()), + |_state| { + let (stdout, stderr) = run_command(&config.commands.extract_frames, &paths, &config)?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs: Vec::new(), + }) + }, + )?; + + if let Some(audio_file) = config.capture.audio_file.clone() { + let command = config.commands.extract_audio.clone().ok_or_else(|| { + "commands.extract_audio is required when capture.audio_file is set".to_string() + })?; + run_cached_step( + "extract_audio", + &paths, + &config, + &mut state, + Some(command.clone()), + |state| { + let (stdout, stderr) = run_command(&command, &paths, &config)?; + let mut outputs = Vec::new(); + if audio_file.exists() { + outputs.push(record_artifact( + state, + &paths, + &audio_file, + "capture_audio", + )?); + } + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout, + stderr, + outputs, + }) + }, + )?; + } + + let capture_config_src = + fs::read_to_string(&config.reference.capture_video_toml).map_err(|err| { + format!( + "read capture config {}: {err}", + config.reference.capture_video_toml.display() + ) + })?; + let capture_config: CaptureVideoConfig = toml::from_str(&capture_config_src) + .map_err(|err| format!("invalid capture config: {err}"))?; + let capture_config_dir = config + .reference + .capture_video_toml + .parent() + .unwrap_or_else(|| Path::new(".")); + let capture_video_path = resolve_path(capture_config_dir, &capture_config.video.path); + if capture_video_path != config.capture.video_path { + return Err(format!( + "capture video path mismatch: config {}, capture_video.toml {}", + config.capture.video_path.display(), + capture_video_path.display() + )); + } + + if capture_config.hashes.frames.format != HashFormat::List { + return Err("capture hashes.frames must use format=list".to_string()); + } + let frames_hash_path = resolve_path(capture_config_dir, &capture_config.hashes.frames.path); + run_cached_step("hash_frames", &paths, &config, &mut state, None, |state| { + let hashes = hash_frames_dir(&config.capture.frames_dir) + .map_err(|err| format!("hash frames failed: {err}"))?; + write_hash_list(&frames_hash_path, &hashes) + .map_err(|err| format!("write frame hashes: {err}"))?; + let output = record_artifact(state, &paths, &frames_hash_path, "frame_hashes")?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout: format!("frame hashes written ({})", hashes.len()), + stderr: String::new(), + outputs: vec![output], + }) + })?; + + if let Some(audio_hash) = &capture_config.hashes.audio { + if audio_hash.format != HashFormat::List { + return Err("capture hashes.audio must use format=list".to_string()); + } + let audio_file = config + .capture + .audio_file + .clone() + .ok_or_else(|| "capture.audio_file is required for audio hashing".to_string())?; + let audio_hash_path = resolve_path(capture_config_dir, &audio_hash.path); + run_cached_step("hash_audio", &paths, &config, &mut state, None, |state| { + let hashes = + hash_audio_file(&audio_file).map_err(|err| format!("hash audio failed: {err}"))?; + write_hash_list(&audio_hash_path, &hashes) + .map_err(|err| format!("write audio hashes: {err}"))?; + let output = record_artifact(state, &paths, &audio_hash_path, "audio_hashes")?; + Ok(StepOutcome { + status: StepStatus::Succeeded, + stdout: format!("audio hashes written ({})", hashes.len()), + stderr: String::new(), + outputs: vec![output], + }) + })?; + } + + run_cached_step("validate", &paths, &config, &mut state, None, |state| { + let report = run_video_suite( + &config.reference.reference_video_toml, + &config.reference.capture_video_toml, + config.reference.validation_config_toml.as_deref(), + ); + let report_dir = &paths.validation_dir; + recomp_validation::write_report(report_dir, &report) + .map_err(|err| format!("write validation report: {err}"))?; + let report_path = report_dir.join("validation-report.json"); + let output = record_artifact(state, &paths, &report_path, "validation_report")?; + state.manifest.validation_report = Some(output.clone()); + let status = if report.failed > 0 { + StepStatus::Failed + } else { + StepStatus::Succeeded + }; + Ok(StepOutcome { + status, + stdout: format!( + "validation status: {}", + if report.failed > 0 { + "failed" + } else { + "passed" + } + ), + stderr: if report.failed > 0 { + format!("validation failed: {} cases", report.failed) + } else { + String::new() + }, + outputs: vec![output], + }) + })?; + + finalize_manifest(&mut state); + write_run_manifest(&paths.run_manifest, &state.manifest)?; + + Ok(state.manifest) +} + +impl AutomationConfig { + fn resolve_paths(&mut self, base_dir: &Path) { + self.inputs.provenance = resolve_path(base_dir, &self.inputs.provenance); + self.inputs.config = resolve_path(base_dir, &self.inputs.config); + if let Some(path) = &self.inputs.module_json { + self.inputs.module_json = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.inputs.nro { + self.inputs.nro = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.inputs.xci { + self.inputs.xci = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.inputs.keys { + self.inputs.keys = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.inputs.runtime_path { + self.inputs.runtime_path = Some(resolve_path(base_dir, path)); + } + for path in &mut self.inputs.nso { + *path = resolve_path(base_dir, path); + } + + self.outputs.work_root = resolve_path(base_dir, &self.outputs.work_root); + if let Some(path) = &self.outputs.intake_dir { + self.outputs.intake_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.lift_dir { + self.outputs.lift_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.build_dir { + self.outputs.build_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.assets_dir { + self.outputs.assets_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.validation_dir { + self.outputs.validation_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.log_dir { + self.outputs.log_dir = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.run_manifest { + self.outputs.run_manifest = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.outputs.lifted_module_json { + self.outputs.lifted_module_json = Some(resolve_path(base_dir, path)); + } + + self.reference.reference_video_toml = + resolve_path(base_dir, &self.reference.reference_video_toml); + self.reference.capture_video_toml = + resolve_path(base_dir, &self.reference.capture_video_toml); + if let Some(path) = &self.reference.validation_config_toml { + self.reference.validation_config_toml = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.reference.input_script_toml { + self.reference.input_script_toml = Some(resolve_path(base_dir, path)); + } + + self.capture.video_path = resolve_path(base_dir, &self.capture.video_path); + self.capture.frames_dir = resolve_path(base_dir, &self.capture.frames_dir); + if let Some(path) = &self.capture.audio_file { + self.capture.audio_file = Some(resolve_path(base_dir, path)); + } + + if let Some(path) = &self.tools.xci_tool_path { + self.tools.xci_tool_path = Some(resolve_path(base_dir, path)); + } + if let Some(path) = &self.tools.ffmpeg_path { + self.tools.ffmpeg_path = Some(resolve_path(base_dir, path)); + } + } + + fn validate(&self) -> Result<(), String> { + if self.schema_version != AUTOMATION_SCHEMA_VERSION { + return Err(format!( + "unsupported automation schema version: {}", + self.schema_version + )); + } + if self.commands.build.is_empty() + || self.commands.run.is_empty() + || self.commands.capture.is_empty() + || self.commands.extract_frames.is_empty() + { + return Err("commands.build/run/capture/extract_frames must be non-empty".to_string()); + } + if !self.inputs.provenance.exists() { + return Err(format!( + "provenance path not found: {}", + self.inputs.provenance.display() + )); + } + if !self.inputs.config.exists() { + return Err(format!( + "config path not found: {}", + self.inputs.config.display() + )); + } + match self.inputs.mode { + InputMode::Homebrew => { + let Some(nro) = &self.inputs.nro else { + return Err("inputs.nro is required for mode=homebrew".to_string()); + }; + if !nro.exists() { + return Err(format!("homebrew NRO not found: {}", nro.display())); + } + for path in &self.inputs.nso { + if !path.exists() { + return Err(format!("homebrew NSO not found: {}", path.display())); + } + } + } + InputMode::Xci => { + let Some(xci) = &self.inputs.xci else { + return Err("inputs.xci is required for mode=xci".to_string()); + }; + if !xci.exists() { + return Err(format!("xci not found: {}", xci.display())); + } + let Some(keys) = &self.inputs.keys else { + return Err("inputs.keys is required for mode=xci".to_string()); + }; + if !keys.exists() { + return Err(format!("keys not found: {}", keys.display())); + } + if self.commands.lift.is_none() { + return Err("commands.lift is required for mode=xci".to_string()); + } + } + InputMode::Lifted => { + let Some(module_json) = &self.inputs.module_json else { + return Err("inputs.module_json is required for mode=lifted".to_string()); + }; + if !module_json.exists() { + return Err(format!("module.json not found: {}", module_json.display())); + } + } + } + if !self.reference.reference_video_toml.exists() { + return Err(format!( + "reference video config not found: {}", + self.reference.reference_video_toml.display() + )); + } + if !self.reference.capture_video_toml.exists() { + return Err(format!( + "capture video config not found: {}", + self.reference.capture_video_toml.display() + )); + } + if let Some(path) = &self.reference.validation_config_toml { + if !path.exists() { + return Err(format!("validation config not found: {}", path.display())); + } + } + if let Some(path) = &self.reference.input_script_toml { + if !path.exists() { + return Err(format!("input script not found: {}", path.display())); + } + } + if let Some(runtime_path) = &self.inputs.runtime_path { + if !runtime_path.exists() { + return Err(format!( + "runtime path not found: {}", + runtime_path.display() + )); + } + } + if self.capture.audio_file.is_some() && self.commands.extract_audio.is_none() { + return Err( + "commands.extract_audio is required when capture.audio_file is set".to_string(), + ); + } + Ok(()) + } +} + +impl ResolvedPaths { + fn new(config: &AutomationConfig, config_dir: PathBuf) -> Result { + let repo_root = repo_root(); + let work_root = config.outputs.work_root.clone(); + let intake_dir = config + .outputs + .intake_dir + .clone() + .unwrap_or_else(|| work_root.join("intake")); + let lift_dir = config + .outputs + .lift_dir + .clone() + .unwrap_or_else(|| work_root.join("lift")); + let build_dir = config + .outputs + .build_dir + .clone() + .unwrap_or_else(|| work_root.join("build")); + let assets_dir = config + .outputs + .assets_dir + .clone() + .unwrap_or_else(|| work_root.join("assets")); + let validation_dir = config + .outputs + .validation_dir + .clone() + .unwrap_or_else(|| work_root.join("validation")); + let log_dir = config + .outputs + .log_dir + .clone() + .unwrap_or_else(|| work_root.join("logs")); + let run_manifest = config + .outputs + .run_manifest + .clone() + .unwrap_or_else(|| work_root.join("run-manifest.json")); + let lifted_module_json = config + .outputs + .lifted_module_json + .clone() + .unwrap_or_else(|| lift_dir.join("module.json")); + + Ok(Self { + repo_root, + config_dir, + work_root, + intake_dir, + lift_dir, + build_dir, + assets_dir, + validation_dir, + log_dir, + run_manifest, + lifted_module_json, + }) + } +} + +fn run_cached_step( + name: &str, + paths: &ResolvedPaths, + _config: &AutomationConfig, + state: &mut RunState, + command: Option>, + action: F, +) -> Result<(), String> +where + F: FnOnce(&mut RunState) -> Result, +{ + if state.cache_valid { + if let Some(previous) = state.previous_steps.get(name) { + if previous.status == StepStatus::Succeeded && outputs_exist(paths, previous) { + state.manifest.steps.push(previous.clone()); + return Ok(()); + } + } + state.cache_valid = false; + } + + let start = Instant::now(); + let outcome = action(state); + let duration_ms = start.elapsed().as_millis(); + + match outcome { + Ok(outcome) => { + let (stdout_path, stderr_path) = + write_step_logs(paths, name, &outcome.stdout, &outcome.stderr)?; + let mut outputs = outcome.outputs; + if let Some(stdout) = &stdout_path { + outputs.push(record_artifact(state, paths, stdout, "log_stdout")?); + } + if let Some(stderr) = &stderr_path { + outputs.push(record_artifact(state, paths, stderr, "log_stderr")?); + } + let step = RunStep { + name: name.to_string(), + status: outcome.status, + duration_ms, + command, + stdout_path: stdout_path.map(|path| format_path(paths, &path)), + stderr_path: stderr_path.map(|path| format_path(paths, &path)), + outputs, + notes: if outcome.status == StepStatus::Failed { + Some(outcome.stderr.clone()) + } else { + None + }, + }; + state.manifest.steps.push(step); + finalize_manifest(state); + write_run_manifest(&paths.run_manifest, &state.manifest)?; + if outcome.status == StepStatus::Failed { + Err(outcome.stderr) + } else { + Ok(()) + } + } + Err(err) => { + let (stdout_path, stderr_path) = write_step_logs(paths, name, "", &err)?; + let mut outputs = Vec::new(); + if let Some(stdout) = &stdout_path { + outputs.push(record_artifact(state, paths, stdout, "log_stdout")?); + } + if let Some(stderr) = &stderr_path { + outputs.push(record_artifact(state, paths, stderr, "log_stderr")?); + } + let step = RunStep { + name: name.to_string(), + status: StepStatus::Failed, + duration_ms, + command, + stdout_path: stdout_path.map(|path| format_path(paths, &path)), + stderr_path: stderr_path.map(|path| format_path(paths, &path)), + outputs, + notes: Some(err.clone()), + }; + state.manifest.steps.push(step); + finalize_manifest(state); + write_run_manifest(&paths.run_manifest, &state.manifest)?; + Err(err) + } + } +} + +struct StepOutcome { + status: StepStatus, + stdout: String, + stderr: String, + outputs: Vec, +} + +fn run_command( + argv: &[String], + paths: &ResolvedPaths, + config: &AutomationConfig, +) -> Result<(String, String), String> { + let (program, args) = argv + .split_first() + .ok_or_else(|| "command argv is empty".to_string())?; + let mut cmd = Command::new(program); + cmd.args(args); + cmd.current_dir(&paths.repo_root); + for (key, value) in command_env(paths, config) { + cmd.env(key, value); + } + let output = cmd + .output() + .map_err(|err| format!("run command failed: {err}"))?; + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + if output.status.success() { + Ok((stdout, stderr)) + } else { + Err(format!( + "command failed ({}): {}", + output.status.code().unwrap_or(-1), + stderr.trim() + )) + } +} + +fn command_env(paths: &ResolvedPaths, config: &AutomationConfig) -> BTreeMap { + let mut env = BTreeMap::new(); + env.insert( + "RECOMP_WORK_ROOT".to_string(), + paths.work_root.display().to_string(), + ); + env.insert( + "RECOMP_INTAKE_DIR".to_string(), + paths.intake_dir.display().to_string(), + ); + env.insert( + "RECOMP_LIFT_DIR".to_string(), + paths.lift_dir.display().to_string(), + ); + env.insert( + "RECOMP_BUILD_DIR".to_string(), + paths.build_dir.display().to_string(), + ); + env.insert( + "RECOMP_ASSETS_DIR".to_string(), + paths.assets_dir.display().to_string(), + ); + env.insert( + "RECOMP_REFERENCE_VIDEO_TOML".to_string(), + config.reference.reference_video_toml.display().to_string(), + ); + env.insert( + "RECOMP_CAPTURE_VIDEO_TOML".to_string(), + config.reference.capture_video_toml.display().to_string(), + ); + env.insert( + "RECOMP_CAPTURE_VIDEO".to_string(), + config.capture.video_path.display().to_string(), + ); + env.insert( + "RECOMP_CAPTURE_FRAMES_DIR".to_string(), + config.capture.frames_dir.display().to_string(), + ); + if let Some(audio_file) = &config.capture.audio_file { + env.insert( + "RECOMP_CAPTURE_AUDIO_FILE".to_string(), + audio_file.display().to_string(), + ); + } + env.insert( + "RECOMP_VALIDATION_DIR".to_string(), + paths.validation_dir.display().to_string(), + ); + env.insert( + "RECOMP_RUN_MANIFEST".to_string(), + paths.run_manifest.display().to_string(), + ); + env.insert( + "RECOMP_LIFTED_MODULE_JSON".to_string(), + paths.lifted_module_json.display().to_string(), + ); + if let Some(validation) = &config.reference.validation_config_toml { + env.insert( + "RECOMP_VALIDATION_CONFIG_TOML".to_string(), + validation.display().to_string(), + ); + } + if let Some(input_script) = &config.reference.input_script_toml { + env.insert( + "RECOMP_INPUT_SCRIPT_TOML".to_string(), + input_script.display().to_string(), + ); + } + env +} + +fn write_step_logs( + paths: &ResolvedPaths, + name: &str, + stdout: &str, + stderr: &str, +) -> Result<(Option, Option), String> { + let stdout_path = paths.log_dir.join(format!("{name}.stdout.log")); + let stderr_path = paths.log_dir.join(format!("{name}.stderr.log")); + fs::write(&stdout_path, stdout) + .map_err(|err| format!("write stdout log {}: {err}", stdout_path.display()))?; + fs::write(&stderr_path, stderr) + .map_err(|err| format!("write stderr log {}: {err}", stderr_path.display()))?; + Ok((Some(stdout_path), Some(stderr_path))) +} + +fn record_artifact( + state: &mut RunState, + paths: &ResolvedPaths, + path: &Path, + role: &str, +) -> Result { + let (sha256, size) = hash_file(path)?; + let stored_path = format_path(paths, path); + state.artifacts.insert( + stored_path.clone(), + RunArtifact { + path: stored_path.clone(), + sha256, + size, + role: role.to_string(), + }, + ); + Ok(stored_path) +} + +fn finalize_manifest(state: &mut RunState) { + state.manifest.artifacts = state + .artifacts + .values() + .cloned() + .collect::>(); + state.manifest.artifacts.sort_by(|a, b| a.path.cmp(&b.path)); +} + +fn format_path(paths: &ResolvedPaths, path: &Path) -> String { + if let Ok(relative) = path.strip_prefix(&paths.config_dir) { + return relative.to_string_lossy().to_string(); + } + path.to_string_lossy().to_string() +} + +fn outputs_exist(paths: &ResolvedPaths, step: &RunStep) -> bool { + if step.outputs.is_empty() { + return true; + } + step.outputs.iter().all(|stored| { + let path = resolve_path(&paths.config_dir, Path::new(stored)); + path.exists() + }) +} + +fn manifest_outputs_exist(paths: &ResolvedPaths, manifest: &RunManifest) -> bool { + manifest.artifacts.iter().all(|artifact| { + let path = resolve_path(&paths.config_dir, Path::new(&artifact.path)); + path.exists() + }) +} + +fn write_run_manifest(path: &Path, manifest: &RunManifest) -> Result<(), String> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create manifest dir {}: {err}", parent.display()))?; + } + let json = serde_json::to_string_pretty(manifest).map_err(|err| err.to_string())?; + fs::write(path, json).map_err(|err| format!("write run manifest {}: {err}", path.display()))?; + Ok(()) +} + +fn load_run_manifest(path: &Path) -> Result { + let src = fs::read_to_string(path) + .map_err(|err| format!("read run manifest {}: {err}", path.display()))?; + serde_json::from_str(&src).map_err(|err| format!("invalid run manifest: {err}")) +} + +fn gather_inputs( + config: &AutomationConfig, + config_path: &Path, + paths: &ResolvedPaths, +) -> Result, String> { + let mut inputs = vec![ + run_input("automation_config", config_path)?, + run_input("provenance", &config.inputs.provenance)?, + run_input("title_config", &config.inputs.config)?, + run_input("reference_video", &config.reference.reference_video_toml)?, + run_input("capture_video", &config.reference.capture_video_toml)?, + ]; + if let Some(validation) = &config.reference.validation_config_toml { + inputs.push(run_input("validation_config", validation)?); + } + if let Some(input_script) = &config.reference.input_script_toml { + inputs.push(run_input("input_script", input_script)?); + } + if let Some(path) = &config.inputs.module_json { + inputs.push(run_input("module_json", path)?); + } + if let Some(path) = &config.inputs.nro { + inputs.push(run_input("homebrew_nro", path)?); + } + if let Some(path) = &config.inputs.xci { + inputs.push(run_input("xci", path)?); + } + if let Some(path) = &config.inputs.keys { + inputs.push(run_input("keyset", path)?); + } + for (index, path) in config.inputs.nso.iter().enumerate() { + inputs.push(run_input(&format!("homebrew_nso_{index}"), path)?); + } + if let Some(runtime_path) = &config.inputs.runtime_path { + let cargo_toml = runtime_path.join("Cargo.toml"); + if cargo_toml.exists() { + inputs.push(run_input("runtime_cargo", &cargo_toml)?); + } + } else { + let default_runtime = paths.repo_root.join("crates/recomp-runtime/Cargo.toml"); + if default_runtime.exists() { + inputs.push(run_input("runtime_cargo", &default_runtime)?); + } + } + inputs.sort_by(|a, b| a.name.cmp(&b.name)); + Ok(inputs) +} + +fn run_input(name: &str, path: &Path) -> Result { + let (sha256, size) = hash_file(path)?; + Ok(RunInput { + name: name.to_string(), + path: path.to_string_lossy().to_string(), + sha256, + size, + }) +} + +fn hash_file(path: &Path) -> Result<(String, u64), String> { + let bytes = fs::read(path).map_err(|err| format!("read {}: {err}", path.display()))?; + let size = bytes.len() as u64; + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let digest = hasher.finalize(); + Ok((format!("{:x}", digest), size)) +} + +fn fingerprint_inputs(inputs: &[RunInput]) -> String { + let mut hasher = Sha256::new(); + for input in inputs { + hasher.update(input.name.as_bytes()); + hasher.update(b":"); + hasher.update(input.sha256.as_bytes()); + hasher.update(b":"); + hasher.update(input.size.to_string().as_bytes()); + hasher.update(b"\n"); + } + let digest = hasher.finalize(); + format!("{:x}", digest) +} + +fn resolve_path(base_dir: &Path, path: &Path) -> PathBuf { + if path.is_absolute() { + path.to_path_buf() + } else { + base_dir.join(path) + } +} + +fn repo_root() -> PathBuf { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_dir + .parent() + .and_then(|path| path.parent()) + .unwrap_or(&manifest_dir) + .to_path_buf() +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn automation_runs_with_lifted_module() { + let repo_root = repo_root(); + let temp = tempdir().expect("tempdir"); + let work_root = temp.path().join("work"); + let capture_dir = temp.path().join("capture"); + let frames_dir = capture_dir.join("frames"); + fs::create_dir_all(&frames_dir).expect("frames dir"); + + let frame_a = frames_dir.join("00000001.png"); + let frame_b = frames_dir.join("00000002.png"); + fs::write(&frame_a, b"frame-one").expect("write frame a"); + fs::write(&frame_b, b"frame-two").expect("write frame b"); + + let reference_hashes = hash_frames_dir(&frames_dir).expect("hash frames"); + let reference_hash_path = temp.path().join("reference_frames.hashes"); + write_hash_list(&reference_hash_path, &reference_hashes).expect("write ref hashes"); + + let capture_hash_path = capture_dir.join("frames.hashes"); + let capture_video_path = capture_dir.join("capture.mp4"); + fs::write(&capture_video_path, b"").expect("write capture video"); + + let reference_toml = format!( + r#"schema_version = "2" + +[video] +path = "reference.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[timeline] +start = "00:00:00.000" +end = "00:00:00.067" + +[hashes.frames] +format = "list" +path = "{}" +"#, + reference_hash_path.display() + ); + let capture_toml = format!( + r#"schema_version = "1" + +[video] +path = "{}" +width = 1280 +height = 720 +fps = 30.0 + +[hashes.frames] +format = "list" +path = "{}" +"#, + capture_video_path.display(), + capture_hash_path.display() + ); + let reference_path = temp.path().join("reference_video.toml"); + let capture_path = temp.path().join("capture_video.toml"); + fs::write(&reference_path, reference_toml).expect("write reference config"); + fs::write(&capture_path, capture_toml).expect("write capture config"); + + let automation_path = temp.path().join("automation.toml"); + let automation_toml = format!( + r#"schema_version = "1" + +[inputs] +mode = "lifted" +module_json = "{}" +provenance = "{}" +config = "{}" +runtime_path = "{}" + +[outputs] +work_root = "{}" + +[reference] +reference_video_toml = "{}" +capture_video_toml = "{}" + +[capture] +video_path = "{}" +frames_dir = "{}" + +[commands] +build = ["/usr/bin/true"] +run = ["/usr/bin/true"] +capture = ["/usr/bin/true"] +extract_frames = ["/usr/bin/true"] +"#, + repo_root.join("samples/minimal/module.json").display(), + repo_root.join("samples/minimal/provenance.toml").display(), + repo_root.join("samples/minimal/title.toml").display(), + repo_root.join("crates/recomp-runtime").display(), + work_root.display(), + reference_path.display(), + capture_path.display(), + capture_video_path.display(), + frames_dir.display() + ); + fs::write(&automation_path, automation_toml).expect("write automation config"); + + let manifest = run_automation(&automation_path).expect("run automation"); + assert_eq!(manifest.input_fingerprint.len(), 64); + assert!(manifest.steps.iter().any(|step| step.name == "pipeline")); + assert!(paths_exist(&manifest, temp.path())); + + let manifest_again = run_automation(&automation_path).expect("run automation again"); + assert_eq!(manifest.input_fingerprint, manifest_again.input_fingerprint); + } + + fn paths_exist(manifest: &RunManifest, base: &Path) -> bool { + for artifact in &manifest.artifacts { + let path = resolve_path(base, Path::new(&artifact.path)); + if !path.exists() { + return false; + } + } + true + } +} diff --git a/crates/recomp-cli/src/main.rs b/crates/recomp-cli/src/main.rs index 4823e38..7a65aef 100644 --- a/crates/recomp-cli/src/main.rs +++ b/crates/recomp-cli/src/main.rs @@ -1,8 +1,11 @@ use clap::{Parser, Subcommand, ValueEnum}; +mod automation; +use automation::run_automation; use recomp_pipeline::bundle::{package_bundle, PackageOptions}; use recomp_pipeline::homebrew::{ intake_homebrew, lift_homebrew, IntakeOptions, LiftMode, LiftOptions, }; +use recomp_pipeline::xci::{intake_xci, XciIntakeOptions, XciToolPreference}; use recomp_pipeline::{run_pipeline, PipelineOptions}; use std::path::PathBuf; @@ -19,6 +22,8 @@ enum Command { Package(PackageArgs), HomebrewIntake(HomebrewIntakeArgs), HomebrewLift(HomebrewLiftArgs), + XciIntake(XciIntakeArgs), + Automate(AutomateArgs), } #[derive(Parser, Debug)] @@ -71,6 +76,51 @@ struct HomebrewLiftArgs { mode: HomebrewLiftMode, } +#[derive(Parser, Debug)] +struct XciIntakeArgs { + #[arg(long)] + xci: PathBuf, + #[arg(long)] + keys: PathBuf, + #[arg(long)] + provenance: PathBuf, + #[arg(long)] + out_dir: PathBuf, + #[arg(long)] + assets_dir: PathBuf, + #[arg(long)] + config: Option, + #[arg(long, value_enum, default_value = "auto")] + xci_tool: XciToolMode, + #[arg(long)] + xci_tool_path: Option, +} + +#[derive(Parser, Debug)] +struct AutomateArgs { + #[arg(long)] + config: PathBuf, +} + +#[derive(ValueEnum, Debug, Clone)] +enum XciToolMode { + Auto, + Hactool, + Hactoolnet, + Mock, +} + +impl From for XciToolPreference { + fn from(value: XciToolMode) -> Self { + match value { + XciToolMode::Auto => XciToolPreference::Auto, + XciToolMode::Hactool => XciToolPreference::Hactool, + XciToolMode::Hactoolnet => XciToolPreference::Hactoolnet, + XciToolMode::Mock => XciToolPreference::Mock, + } + } +} + #[derive(ValueEnum, Debug, Clone)] enum HomebrewLiftMode { Stub, @@ -194,5 +244,42 @@ fn main() { } } } + Command::XciIntake(intake) => { + let options = XciIntakeOptions { + xci_path: intake.xci, + keys_path: intake.keys, + config_path: intake.config, + provenance_path: intake.provenance, + out_dir: intake.out_dir, + assets_dir: intake.assets_dir, + tool_preference: intake.xci_tool.into(), + tool_path: intake.xci_tool_path, + }; + match intake_xci(options) { + Ok(report) => { + println!( + "XCI intake wrote {} files to {}", + report.files_written.len(), + report.out_dir.display() + ); + println!("module.json: {}", report.module_json_path.display()); + println!("manifest.json: {}", report.manifest_path.display()); + println!("assets root: {}", report.assets_dir.display()); + } + Err(err) => { + eprintln!("XCI intake error: {err}"); + std::process::exit(1); + } + } + } + Command::Automate(automate) => match run_automation(&automate.config) { + Ok(manifest) => { + println!("Automation complete ({} steps).", manifest.steps.len()); + } + Err(err) => { + eprintln!("Automation error: {err}"); + std::process::exit(1); + } + }, } } diff --git a/crates/recomp-gfx/src/lib.rs b/crates/recomp-gfx/src/lib.rs index 5fc2304..d5a3f90 100644 --- a/crates/recomp-gfx/src/lib.rs +++ b/crates/recomp-gfx/src/lib.rs @@ -19,6 +19,27 @@ pub trait GraphicsBackend { fn submit(&mut self, stream: &CommandStream) -> Result<(), GraphicsError>; } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FrameDescriptor { + pub frame_id: u64, + pub width: u32, + pub height: u32, +} + +impl FrameDescriptor { + pub fn new(frame_id: u64, width: u32, height: u32) -> Self { + Self { + frame_id, + width, + height, + } + } +} + +pub trait GraphicsPresenter { + fn present(&mut self, frame: &FrameDescriptor) -> Result<(), GraphicsError>; +} + pub fn checksum_stream(stream: &CommandStream) -> u64 { let mut hash = 1469598103934665603u64; for word in &stream.words { @@ -40,6 +61,18 @@ impl GraphicsBackend for StubBackend { } } +#[derive(Debug, Default)] +pub struct StubPresenter { + pub presented: Vec, +} + +impl GraphicsPresenter for StubPresenter { + fn present(&mut self, frame: &FrameDescriptor) -> Result<(), GraphicsError> { + self.presented.push(frame.clone()); + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; @@ -60,4 +93,13 @@ mod tests { let second = checksum_stream(&stream); assert_eq!(first, second); } + + #[test] + fn presenter_records_frames() { + let mut presenter = StubPresenter::default(); + let frame = FrameDescriptor::new(7, 1280, 720); + presenter.present(&frame).expect("present ok"); + assert_eq!(presenter.presented.len(), 1); + assert_eq!(presenter.presented[0], frame); + } } diff --git a/crates/recomp-pipeline/Cargo.toml b/crates/recomp-pipeline/Cargo.toml index bb94684..dd1febb 100644 --- a/crates/recomp-pipeline/Cargo.toml +++ b/crates/recomp-pipeline/Cargo.toml @@ -12,6 +12,7 @@ sha2 = "0.10" thiserror = "1.0" toml = "0.8" lz4_flex = "0.11" +base64 = "0.22" +tempfile = "3.10" [dev-dependencies] -tempfile = "3.10" diff --git a/crates/recomp-pipeline/src/lib.rs b/crates/recomp-pipeline/src/lib.rs index fcd119a..c587be0 100644 --- a/crates/recomp-pipeline/src/lib.rs +++ b/crates/recomp-pipeline/src/lib.rs @@ -6,5 +6,6 @@ pub mod memory; pub mod output; pub mod pipeline; pub mod provenance; +pub mod xci; pub use crate::pipeline::{run_pipeline, PipelineOptions, PipelineReport}; diff --git a/crates/recomp-pipeline/src/provenance.rs b/crates/recomp-pipeline/src/provenance.rs index ee369e1..409a668 100644 --- a/crates/recomp-pipeline/src/provenance.rs +++ b/crates/recomp-pipeline/src/provenance.rs @@ -60,6 +60,8 @@ pub enum InputFormatHint { Nrr0, Npdm, LiftedJson, + Xci, + Keyset, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -71,6 +73,8 @@ pub enum InputFormat { Nrr0, Npdm, LiftedJson, + Xci, + Keyset, } impl InputFormat { @@ -83,6 +87,8 @@ impl InputFormat { InputFormat::Nrr0 => "nrr0", InputFormat::Npdm => "npdm", InputFormat::LiftedJson => "lifted_json", + InputFormat::Xci => "xci", + InputFormat::Keyset => "keyset", } } } @@ -185,6 +191,8 @@ impl ProvenanceManifest { InputFormatHint::Nrr0 => InputFormat::Nrr0, InputFormatHint::Npdm => InputFormat::Npdm, InputFormatHint::LiftedJson => InputFormat::LiftedJson, + InputFormatHint::Xci => InputFormat::Xci, + InputFormatHint::Keyset => InputFormat::Keyset, }; if expected != detected { return Err(format!( @@ -242,6 +250,12 @@ pub fn detect_format(path: &Path) -> Result { if ext.eq_ignore_ascii_case("json") { return Ok(InputFormat::LiftedJson); } + if ext.eq_ignore_ascii_case("xci") { + return Ok(InputFormat::Xci); + } + if ext.eq_ignore_ascii_case("keys") || ext.eq_ignore_ascii_case("keyset") { + return Ok(InputFormat::Keyset); + } } let bytes = diff --git a/crates/recomp-pipeline/src/xci/external.rs b/crates/recomp-pipeline/src/xci/external.rs new file mode 100644 index 0000000..6443a90 --- /dev/null +++ b/crates/recomp-pipeline/src/xci/external.rs @@ -0,0 +1,448 @@ +use crate::xci::types::{XciExtractRequest, XciExtractResult, XciExtractor, XciFile, XciProgram}; +use std::env; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum XciToolKind { + Hactool, + Hactoolnet, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum XciToolPreference { + Auto, + Hactool, + Hactoolnet, + Mock, +} + +impl XciToolPreference { + pub fn from_env() -> Option { + let value = env::var("RECOMP_XCI_TOOL").ok()?; + match value.to_ascii_lowercase().as_str() { + "auto" => Some(Self::Auto), + "hactool" => Some(Self::Hactool), + "hactoolnet" => Some(Self::Hactoolnet), + "mock" => Some(Self::Mock), + _ => None, + } + } +} + +#[derive(Debug, Clone)] +struct XciTool { + path: PathBuf, +} + +#[derive(Debug, Clone)] +pub struct ExternalXciExtractor { + tool: XciTool, +} + +impl ExternalXciExtractor { + pub fn detect( + preference: XciToolPreference, + tool_path: Option<&Path>, + ) -> Result, String> { + let env_pref = XciToolPreference::from_env().unwrap_or(preference); + if matches!(env_pref, XciToolPreference::Mock) { + return Ok(None); + } + + let env_path = env::var_os("RECOMP_XCI_TOOL_PATH").map(PathBuf::from); + let path_override = tool_path.map(PathBuf::from).or(env_path); + let tool = match env_pref { + XciToolPreference::Auto => detect_tool(path_override)?, + XciToolPreference::Hactool => detect_specific(XciToolKind::Hactool, path_override)?, + XciToolPreference::Hactoolnet => { + detect_specific(XciToolKind::Hactoolnet, path_override)? + } + XciToolPreference::Mock => None, + }; + + Ok(tool.map(|tool| Self { tool })) + } + + fn run(&self, args: &[&str]) -> Result { + let output = Command::new(&self.tool.path) + .args(args) + .output() + .map_err(|err| format!("failed to run {}: {err}", self.tool.path.display()))?; + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + if output.status.success() { + Ok(stdout) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + Err(format!( + "{} failed (status={}): {}{}", + self.tool.path.display(), + output.status, + stderr, + if stdout.is_empty() { + String::new() + } else { + format!("\nstdout:\n{stdout}") + } + )) + } + } + + fn extract_xci(&self, request: &XciExtractRequest, out_dir: &Path) -> Result<(), String> { + let args = [ + "-k", + request.keys_path.to_str().ok_or("keys path invalid")?, + "--intype=xci", + "--outdir", + out_dir.to_str().ok_or("xci out dir invalid")?, + request.xci_path.to_str().ok_or("xci path invalid")?, + ]; + self.run(&args)?; + Ok(()) + } + + fn list_titles(&self, request: &XciExtractRequest) -> Option> { + let args = [ + "-k", + request.keys_path.to_str()?, + "--intype=xci", + "--listtitles", + request.xci_path.to_str()?, + ]; + let output = self.run(&args).ok()?; + Some(parse_title_listing(&output)) + } + + fn extract_nca( + &self, + request: &XciExtractRequest, + nca_path: &Path, + exefs: &Path, + romfs: &Path, + ) -> Result<(), String> { + let args = [ + "-k", + request.keys_path.to_str().ok_or("keys path invalid")?, + "--intype=nca", + "--exefsdir", + exefs.to_str().ok_or("exefs dir invalid")?, + "--romfsdir", + romfs.to_str().ok_or("romfs dir invalid")?, + nca_path.to_str().ok_or("nca path invalid")?, + ]; + self.run(&args)?; + Ok(()) + } +} + +impl XciExtractor for ExternalXciExtractor { + fn extract(&self, request: &XciExtractRequest) -> Result { + let temp = tempfile::tempdir().map_err(|err| format!("create temp dir: {err}"))?; + let xci_out = temp.path().join("xci"); + fs::create_dir_all(&xci_out) + .map_err(|err| format!("create xci dir {}: {err}", xci_out.display()))?; + + self.extract_xci(request, &xci_out)?; + + let mut nca_files = Vec::new(); + collect_nca_files(&xci_out, &mut nca_files)?; + if nca_files.is_empty() { + return Err("no NCA files extracted from XCI".to_string()); + } + + let metadata = self.list_titles(request).unwrap_or_default(); + let mut programs = Vec::new(); + let mut matched = Vec::new(); + + for meta in &metadata { + if let Some(content_id) = &meta.content_id { + if let Some(path) = find_nca_by_content_id(&nca_files, content_id) { + matched.push(path.clone()); + programs.push(build_program(self, request, path, meta)?); + } + } + } + + if programs.is_empty() { + for (index, nca_path) in nca_files.iter().enumerate() { + let meta = ProgramMetadata { + title_id: "unknown".to_string(), + content_type: "program".to_string(), + version: format!("unknown-{index}"), + content_id: None, + }; + programs.push(build_program(self, request, nca_path.clone(), &meta)?); + } + } else { + for nca_path in &nca_files { + if matched.iter().any(|path| path == nca_path) { + continue; + } + } + } + + Ok(XciExtractResult { programs }) + } +} + +fn build_program( + extractor: &ExternalXciExtractor, + request: &XciExtractRequest, + nca_path: PathBuf, + meta: &ProgramMetadata, +) -> Result { + let temp = tempfile::tempdir().map_err(|err| format!("create temp dir: {err}"))?; + let exefs_dir = temp.path().join("exefs"); + let romfs_dir = temp.path().join("romfs"); + fs::create_dir_all(&exefs_dir) + .map_err(|err| format!("create exefs dir {}: {err}", exefs_dir.display()))?; + fs::create_dir_all(&romfs_dir) + .map_err(|err| format!("create romfs dir {}: {err}", romfs_dir.display()))?; + + extractor.extract_nca(request, &nca_path, &exefs_dir, &romfs_dir)?; + + let mut exefs_files = Vec::new(); + let mut nso_files = Vec::new(); + for entry in fs::read_dir(&exefs_dir) + .map_err(|err| format!("read exefs dir {}: {err}", exefs_dir.display()))? + { + let entry = entry.map_err(|err| format!("read exefs entry: {err}"))?; + let path = entry.path(); + if !path.is_file() { + continue; + } + let name = entry + .file_name() + .into_string() + .map_err(|_| "invalid exefs file name".to_string())?; + let data = fs::read(&path).map_err(|err| format!("read exefs file: {err}"))?; + let file = XciFile { + name: name.clone(), + data: data.clone(), + }; + if is_nso_name(&name) { + nso_files.push(file.clone()); + } + exefs_files.push(file); + } + + exefs_files.sort_by(|a, b| a.name.cmp(&b.name)); + nso_files.sort_by(|a, b| a.name.cmp(&b.name)); + + let romfs_entries = collect_romfs_entries(&romfs_dir)?; + + Ok(XciProgram { + title_id: meta.title_id.clone(), + content_type: meta.content_type.clone(), + version: meta.version.clone(), + nca_bytes: fs::read(&nca_path).map_err(|err| format!("read NCA: {err}"))?, + exefs_files, + nso_files, + romfs_image: None, + romfs_entries, + }) +} + +fn is_nso_name(name: &str) -> bool { + if name == "main" { + return true; + } + if name.ends_with(".nso") { + return true; + } + !name.contains('.') && name != "main.npdm" +} + +fn collect_romfs_entries(root: &Path) -> Result, String> { + let mut entries = Vec::new(); + collect_romfs_entries_recursive(root, root, &mut entries)?; + Ok(entries) +} + +fn collect_romfs_entries_recursive( + root: &Path, + current: &Path, + entries: &mut Vec, +) -> Result<(), String> { + let dir_entries = match fs::read_dir(current) { + Ok(entries) => entries, + Err(_) => return Ok(()), + }; + for entry in dir_entries { + let entry = entry.map_err(|err| format!("read romfs entry: {err}"))?; + let path = entry.path(); + if path.is_dir() { + collect_romfs_entries_recursive(root, &path, entries)?; + continue; + } + let rel = path + .strip_prefix(root) + .map_err(|_| "romfs entry outside root".to_string())?; + let rel_str = rel.to_string_lossy().replace('\\', "/"); + let data = + fs::read(&path).map_err(|err| format!("read romfs file {}: {err}", path.display()))?; + entries.push(XciFile { + name: rel_str, + data, + }); + } + Ok(()) +} + +#[derive(Debug, Clone)] +struct ProgramMetadata { + title_id: String, + content_type: String, + version: String, + content_id: Option, +} + +fn parse_title_listing(output: &str) -> Vec { + let mut out = Vec::new(); + let mut current = ProgramMetadata { + title_id: String::new(), + content_type: "program".to_string(), + version: "unknown".to_string(), + content_id: None, + }; + + for line in output.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + if !current.title_id.is_empty() { + out.push(current.clone()); + current = ProgramMetadata { + title_id: String::new(), + content_type: "program".to_string(), + version: "unknown".to_string(), + content_id: None, + }; + } + continue; + } + let lower = trimmed.to_ascii_lowercase(); + if lower.starts_with("title id") { + if !current.title_id.is_empty() { + out.push(current.clone()); + } + current = ProgramMetadata { + title_id: after_colon(trimmed), + content_type: "program".to_string(), + version: "unknown".to_string(), + content_id: None, + }; + } else if lower.starts_with("content type") { + current.content_type = after_colon(trimmed).to_ascii_lowercase(); + } else if lower.starts_with("version") { + current.version = after_colon(trimmed); + } else if lower.starts_with("content id") { + current.content_id = Some(after_colon(trimmed).to_ascii_lowercase()); + } + } + + if !current.title_id.is_empty() { + out.push(current); + } + + out +} + +fn after_colon(line: &str) -> String { + line.split_once(':') + .map(|(_, value)| value.trim()) + .unwrap_or("") + .to_string() +} + +fn collect_nca_files(dir: &Path, out: &mut Vec) -> Result<(), String> { + for entry in fs::read_dir(dir).map_err(|err| format!("read dir {}: {err}", dir.display()))? { + let entry = entry.map_err(|err| format!("read entry: {err}"))?; + let path = entry.path(); + if path.is_dir() { + collect_nca_files(&path, out)?; + continue; + } + if path + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("nca")) + { + out.push(path); + } + } + out.sort(); + Ok(()) +} + +fn find_nca_by_content_id(ncas: &[PathBuf], content_id: &str) -> Option { + let target = content_id.to_ascii_lowercase(); + for path in ncas { + let stem = path + .file_stem() + .and_then(|stem| stem.to_str()) + .unwrap_or("") + .to_ascii_lowercase(); + if stem == target { + return Some(path.clone()); + } + } + None +} + +fn detect_tool(path_override: Option) -> Result, String> { + if let Some(path) = path_override { + return Ok(Some(infer_tool_kind(path)?)); + } + if let Some(path) = find_on_path("hactoolnet") { + return Ok(Some(XciTool { path })); + } + if let Some(path) = find_on_path("hactool") { + return Ok(Some(XciTool { path })); + } + Ok(None) +} + +fn detect_specific( + kind: XciToolKind, + path_override: Option, +) -> Result, String> { + let path = if let Some(path) = path_override { + path + } else { + let name = match kind { + XciToolKind::Hactool => "hactool", + XciToolKind::Hactoolnet => "hactoolnet", + }; + match find_on_path(name) { + Some(path) => path, + None => return Err(format!("{} not found on PATH", name)), + } + }; + Ok(Some(XciTool { path })) +} + +fn infer_tool_kind(path: PathBuf) -> Result { + if !path.is_file() { + return Err(format!("xci tool path is not a file: {}", path.display())); + } + Ok(XciTool { path }) +} + +fn find_on_path(name: &str) -> Option { + let path_var = env::var_os("PATH")?; + for dir in env::split_paths(&path_var) { + let candidate = dir.join(name); + if candidate.is_file() { + return Some(candidate); + } + #[cfg(windows)] + { + let candidate = dir.join(format!("{name}.exe")); + if candidate.is_file() { + return Some(candidate); + } + } + } + None +} diff --git a/crates/recomp-pipeline/src/xci/intake.rs b/crates/recomp-pipeline/src/xci/intake.rs new file mode 100644 index 0000000..298b9a5 --- /dev/null +++ b/crates/recomp-pipeline/src/xci/intake.rs @@ -0,0 +1,680 @@ +use crate::homebrew::module::{BssInfo, ModuleBuild, ModuleJson, ModuleSegment, OffsetInfo}; +use crate::homebrew::nso::{extract_segments, parse_nso, NsoModule, NsoSegmentKind}; +use crate::homebrew::romfs::{list_romfs_entries, RomfsEntry}; +use crate::output::{GeneratedFile, InputSummary}; +use crate::provenance::{InputFormat, ProvenanceManifest}; +use crate::xci::external::{ExternalXciExtractor, XciToolPreference}; +use crate::xci::mock::MockXciExtractor; +use crate::xci::types::{XciExtractRequest, XciExtractResult, XciExtractor, XciProgram}; +use pathdiff::diff_paths; +use serde::Deserialize; +use sha2::{Digest, Sha256}; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Component, Path, PathBuf}; + +const INTAKE_SCHEMA_VERSION: &str = "1"; +const MODULE_SCHEMA_VERSION: &str = "1"; + +#[derive(Debug)] +pub struct XciIntakeOptions { + pub xci_path: PathBuf, + pub keys_path: PathBuf, + pub config_path: Option, + pub provenance_path: PathBuf, + pub out_dir: PathBuf, + pub assets_dir: PathBuf, + pub tool_preference: XciToolPreference, + pub tool_path: Option, +} + +#[derive(Debug)] +pub struct XciIntakeReport { + pub out_dir: PathBuf, + pub assets_dir: PathBuf, + pub module_json_path: PathBuf, + pub manifest_path: PathBuf, + pub files_written: Vec, +} + +#[derive(Debug, Deserialize, Default)] +struct RawXciConfig { + #[serde(default)] + program_title_id: Option, + #[serde(default)] + program_version: Option, + #[serde(default)] + program_content_type: Option, +} + +#[derive(Debug, Clone)] +struct XciSelection { + title_id: Option, + version: Option, + content_type: Option, +} + +#[derive(Debug, serde::Serialize)] +struct IntakeManifest { + schema_version: String, + tool: ToolInfo, + program: ProgramRecord, + assets_root: String, + modules: Vec, + assets: Vec, + inputs: Vec, + generated_files: Vec, +} + +#[derive(Debug, serde::Serialize)] +struct ToolInfo { + name: String, + version: String, +} + +#[derive(Debug, serde::Serialize)] +struct ProgramRecord { + title_id: String, + content_type: String, + version: String, + nca_sha256: String, + nca_size: u64, + nca_metadata_path: String, +} + +#[derive(Debug, serde::Serialize)] +struct ModuleRecord { + name: String, + format: String, + build_id: String, + module_json_path: String, +} + +#[derive(Debug, serde::Serialize, Clone)] +struct AssetRecord { + kind: String, + path: String, + sha256: String, + size: u64, + source_offset: u64, + source_size: u64, +} + +pub fn intake_xci(options: XciIntakeOptions) -> Result { + if let Some(external) = + ExternalXciExtractor::detect(options.tool_preference, options.tool_path.as_deref())? + { + intake_xci_with_extractor(options, &external) + } else { + let extractor = MockXciExtractor::new(); + intake_xci_with_extractor(options, &extractor) + } +} + +pub fn intake_xci_with_extractor( + options: XciIntakeOptions, + extractor: &dyn XciExtractor, +) -> Result { + let xci_path = absolute_path(&options.xci_path)?; + let keys_path = absolute_path(&options.keys_path)?; + let provenance_path = absolute_path(&options.provenance_path)?; + let out_dir = absolute_path(&options.out_dir)?; + let assets_dir = absolute_path(&options.assets_dir)?; + + let config = match &options.config_path { + Some(path) => { + let config_path = absolute_path(path)?; + let config_src = fs::read_to_string(&config_path) + .map_err(|err| format!("read config {}: {err}", config_path.display()))?; + parse_config(&config_src)? + } + None => RawXciConfig::default(), + }; + + ensure_separate_outputs(&out_dir, &assets_dir)?; + + let provenance_src = + fs::read_to_string(&provenance_path).map_err(|err| format!("read provenance: {err}"))?; + let provenance = ProvenanceManifest::parse(&provenance_src)?; + let validation = provenance.validate(&provenance_path, &provenance_src)?; + + ensure_input_present(&validation.inputs, &xci_path, InputFormat::Xci)?; + ensure_input_present(&validation.inputs, &keys_path, InputFormat::Keyset)?; + + let extract_request = XciExtractRequest { + xci_path: xci_path.clone(), + keys_path: keys_path.clone(), + }; + let extraction = extractor.extract(&extract_request)?; + let mut selection = XciSelection { + title_id: config.program_title_id, + version: config.program_version, + content_type: config.program_content_type, + }; + if selection.content_type.is_none() { + selection.content_type = Some("program".to_string()); + } + let program = select_program(&extraction, &selection)?; + + fs::create_dir_all(&out_dir) + .map_err(|err| format!("create out dir {}: {err}", out_dir.display()))?; + fs::create_dir_all(&assets_dir) + .map_err(|err| format!("create assets dir {}: {err}", assets_dir.display()))?; + + let exefs_dir = out_dir.join("exefs"); + let segments_dir = out_dir.join("segments"); + let nca_dir = out_dir.join("nca"); + fs::create_dir_all(&exefs_dir).map_err(|err| format!("create exefs dir: {err}"))?; + fs::create_dir_all(&segments_dir).map_err(|err| format!("create segments dir: {err}"))?; + fs::create_dir_all(&nca_dir).map_err(|err| format!("create nca dir: {err}"))?; + + let mut generated_files = Vec::new(); + let mut files_written = Vec::new(); + + let mut exefs_index = BTreeMap::new(); + for file in &program.exefs_files { + let name = sanitize_name(&file.name)?; + let out_path = exefs_dir.join(&name); + fs::write(&out_path, &file.data).map_err(|err| format!("write exefs {name}: {err}"))?; + files_written.push(out_path.clone()); + let rel_path = format!("exefs/{name}"); + generated_files.push(GeneratedFile { + path: rel_path.clone(), + sha256: sha256_bytes(&file.data), + size: file.data.len() as u64, + }); + exefs_index.insert(name, out_path); + } + + let mut module_builds = Vec::new(); + let mut module_files = Vec::new(); + for nso in &program.nso_files { + let name = sanitize_name(&nso.name)?; + let Some(nso_path) = exefs_index.get(&name) else { + return Err(format!("NSO {name} is not present in ExeFS output")); + }; + let module = parse_nso(nso_path)?; + let (build, generated, written) = write_nso_segments(&module, &segments_dir)?; + module_builds.push(build); + module_files.extend(generated); + files_written.extend(written); + } + + generated_files.extend(module_files); + + module_builds.sort_by(|a, b| a.name.cmp(&b.name)); + let module_json = ModuleJson { + schema_version: MODULE_SCHEMA_VERSION.to_string(), + module_type: "xci".to_string(), + modules: module_builds, + }; + let module_json_path = out_dir.join("module.json"); + let module_json_src = + serde_json::to_string_pretty(&module_json).map_err(|err| err.to_string())?; + fs::write(&module_json_path, module_json_src.as_bytes()) + .map_err(|err| format!("write module.json: {err}"))?; + files_written.push(module_json_path.clone()); + generated_files.push(GeneratedFile { + path: "module.json".to_string(), + sha256: sha256_bytes(module_json_src.as_bytes()), + size: module_json_src.len() as u64, + }); + + let nca_path = nca_dir.join("program.json"); + let nca_metadata = ProgramRecord { + title_id: program.title_id.clone(), + content_type: program.content_type.clone(), + version: program.version.clone(), + nca_sha256: sha256_bytes(&program.nca_bytes), + nca_size: program.nca_bytes.len() as u64, + nca_metadata_path: "nca/program.json".to_string(), + }; + let nca_src = serde_json::to_string_pretty(&nca_metadata).map_err(|err| err.to_string())?; + fs::write(&nca_path, nca_src.as_bytes()).map_err(|err| format!("write nca metadata: {err}"))?; + files_written.push(nca_path); + generated_files.push(GeneratedFile { + path: "nca/program.json".to_string(), + sha256: sha256_bytes(nca_src.as_bytes()), + size: nca_src.len() as u64, + }); + + let mut assets = Vec::new(); + if let Some(romfs_image) = program.romfs_image.clone() { + let romfs_root = assets_dir.join("romfs"); + fs::create_dir_all(&romfs_root).map_err(|err| format!("create romfs dir: {err}"))?; + let entries = list_romfs_entries(&romfs_image)?; + let asset_written = write_romfs_entries( + &romfs_image, + &entries, + &romfs_root, + &assets_dir, + "romfs", + &mut assets, + )?; + files_written.extend(asset_written); + } else if !program.romfs_entries.is_empty() { + let asset_written = + write_romfs_entry_files(&program.romfs_entries, &assets_dir, &mut assets)?; + files_written.extend(asset_written); + } + + let inputs = validation + .inputs + .iter() + .map(|input| InputSummary { + path: input.path.clone(), + format: input.format.as_str().to_string(), + sha256: input.sha256.clone(), + size: input.size, + role: input.role.clone(), + }) + .collect::>(); + + let module_records = module_json + .modules + .iter() + .map(|module| ModuleRecord { + name: module.name.clone(), + format: module.format.clone(), + build_id: module.build_id.clone(), + module_json_path: "module.json".to_string(), + }) + .collect::>(); + + assets.sort_by(|a, b| a.path.cmp(&b.path)); + generated_files.sort_by(|a, b| a.path.cmp(&b.path)); + + let assets_root = diff_paths(&assets_dir, &out_dir) + .unwrap_or_else(|| assets_dir.clone()) + .to_string_lossy() + .replace('\\', "/"); + + let manifest = IntakeManifest { + schema_version: INTAKE_SCHEMA_VERSION.to_string(), + tool: ToolInfo { + name: "recomp-pipeline".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + }, + program: nca_metadata, + assets_root, + modules: module_records, + assets, + inputs, + generated_files, + }; + + let manifest_path = out_dir.join("manifest.json"); + let manifest_src = serde_json::to_string_pretty(&manifest).map_err(|err| err.to_string())?; + fs::write(&manifest_path, manifest_src.as_bytes()) + .map_err(|err| format!("write manifest.json: {err}"))?; + files_written.push(manifest_path.clone()); + + Ok(XciIntakeReport { + out_dir, + assets_dir, + module_json_path, + manifest_path, + files_written, + }) +} + +fn parse_config(src: &str) -> Result { + toml::from_str(src).map_err(|err| format!("invalid xci intake config: {err}")) +} + +fn select_program<'a>( + extraction: &'a XciExtractResult, + selection: &XciSelection, +) -> Result<&'a XciProgram, String> { + let mut candidates = Vec::new(); + for program in &extraction.programs { + if let Some(title_id) = &selection.title_id { + if &program.title_id != title_id { + continue; + } + } + if let Some(version) = &selection.version { + if &program.version != version { + continue; + } + } + if let Some(content_type) = &selection.content_type { + if &program.content_type != content_type { + continue; + } + } + candidates.push(program); + } + + if candidates.is_empty() { + return Err(format!( + "no Program NCA matches selection. available: {}", + format_programs(extraction.programs.iter()) + )); + } + if candidates.len() > 1 { + return Err(format!( + "ambiguous Program NCA selection. Provide program_title_id/program_version to disambiguate. available: {}", + format_programs(candidates.iter().copied()) + )); + } + + Ok(candidates[0]) +} + +fn format_programs<'a>(programs: impl IntoIterator) -> String { + let mut out = Vec::new(); + for program in programs { + out.push(format!( + "{} {} {}", + program.title_id, program.content_type, program.version + )); + } + out.join(", ") +} + +fn ensure_input_present( + inputs: &[crate::provenance::ValidatedInput], + path: &Path, + format: InputFormat, +) -> Result<(), String> { + if inputs + .iter() + .any(|input| input.path == path && input.format == format) + { + Ok(()) + } else { + Err(format!( + "input {} with format {} not listed in provenance metadata", + path.display(), + format.as_str() + )) + } +} + +fn ensure_separate_outputs(out_dir: &Path, assets_dir: &Path) -> Result<(), String> { + let normalized_out = normalize_path(out_dir); + let normalized_assets = normalize_path(assets_dir); + if normalized_out == normalized_assets { + return Err("assets_dir must be separate from out_dir".to_string()); + } + if is_within(&normalized_assets, &normalized_out) { + return Err("assets_dir must not be inside out_dir".to_string()); + } + if is_within(&normalized_out, &normalized_assets) { + return Err("out_dir must not be inside assets_dir".to_string()); + } + Ok(()) +} + +fn is_within(path: &Path, base: &Path) -> bool { + path.starts_with(base) +} + +fn normalize_path(path: &Path) -> PathBuf { + let mut out = PathBuf::new(); + for component in path.components() { + match component { + Component::CurDir => {} + Component::ParentDir => { + out.pop(); + } + Component::Prefix(prefix) => out.push(prefix.as_os_str()), + Component::RootDir => out.push(Component::RootDir.as_os_str()), + Component::Normal(value) => out.push(value), + } + } + out +} + +fn write_nso_segments( + module: &NsoModule, + segments_dir: &Path, +) -> Result<(ModuleBuild, Vec, Vec), String> { + let module_name = module + .path + .file_stem() + .and_then(|name| name.to_str()) + .unwrap_or("nso") + .to_string(); + let module_dir = segments_dir.join(&module_name); + fs::create_dir_all(&module_dir).map_err(|err| format!("create module dir: {err}"))?; + + let segment_data = extract_segments(module)?; + let mut segments = Vec::new(); + let mut generated = Vec::new(); + let mut written = Vec::new(); + + for entry in segment_data { + let file_name = format!("{}.bin", segment_name(entry.segment.kind)); + let output_rel = format!("segments/{module_name}/{file_name}"); + let output_path = module_dir.join(&file_name); + fs::write(&output_path, &entry.data) + .map_err(|err| format!("write NSO segment {file_name}: {err}"))?; + written.push(output_path.clone()); + generated.push(GeneratedFile { + path: output_rel.clone(), + sha256: sha256_bytes(&entry.data), + size: entry.data.len() as u64, + }); + segments.push(ModuleSegment { + name: segment_name(entry.segment.kind).to_string(), + file_offset: entry.segment.file_offset as u64, + file_size: entry.segment.file_size as u64, + memory_offset: entry.segment.memory_offset as u64, + memory_size: entry.segment.size as u64, + permissions: entry.segment.permissions.as_str().to_string(), + compressed: Some(entry.segment.compressed), + output_path: output_rel, + }); + } + + let input_sha256 = sha256_path(&module.path)?; + let bss_offset = module + .segments + .iter() + .find(|segment| segment.kind == NsoSegmentKind::Data) + .map(|segment| segment.memory_offset as u64 + segment.size as u64) + .unwrap_or(0); + + let input_name = module + .path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("nso"); + let build = ModuleBuild { + name: module_name, + format: "nso".to_string(), + input_path: PathBuf::from(format!("exefs/{input_name}")), + input_sha256, + input_size: module.size, + build_id: module.module_id_hex(), + segments, + bss: BssInfo { + size: module.bss_size as u64, + memory_offset: bss_offset, + }, + embedded: Some(OffsetInfo { + offset: module.embedded_offset as u64, + size: module.embedded_size as u64, + }), + dynstr: Some(OffsetInfo { + offset: module.dynstr_offset as u64, + size: module.dynstr_size as u64, + }), + dynsym: Some(OffsetInfo { + offset: module.dynsym_offset as u64, + size: module.dynsym_size as u64, + }), + }; + + Ok((build, generated, written)) +} + +fn segment_name(kind: NsoSegmentKind) -> &'static str { + match kind { + NsoSegmentKind::Text => "text", + NsoSegmentKind::Rodata => "rodata", + NsoSegmentKind::Data => "data", + } +} + +fn write_romfs_entries( + romfs_bytes: &[u8], + entries: &[RomfsEntry], + romfs_dir: &Path, + root_dir: &Path, + kind: &str, + records: &mut Vec, +) -> Result, String> { + let mut written = Vec::new(); + for entry in entries { + let rel_path = Path::new(&entry.path); + if rel_path.is_absolute() { + return Err(format!("romfs entry path is absolute: {}", entry.path)); + } + for component in rel_path.components() { + match component { + std::path::Component::Normal(_) => {} + _ => { + return Err(format!( + "romfs entry path contains invalid component: {}", + entry.path + )) + } + } + } + + let out_path = romfs_dir.join(rel_path); + if let Some(parent) = out_path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create romfs dir {}: {err}", parent.display()))?; + } + + let start = entry.data_offset as usize; + let end = start + .checked_add(entry.data_size as usize) + .ok_or_else(|| "romfs file size overflow".to_string())?; + if end > romfs_bytes.len() { + return Err(format!( + "romfs entry out of range: {}..{} (len={})", + start, + end, + romfs_bytes.len() + )); + } + let data = &romfs_bytes[start..end]; + fs::write(&out_path, data) + .map_err(|err| format!("write romfs entry {}: {err}", out_path.display()))?; + + let rel = out_path + .strip_prefix(root_dir) + .unwrap_or(&out_path) + .to_string_lossy() + .replace('\\', "/"); + let record = AssetRecord { + kind: kind.to_string(), + path: rel, + sha256: sha256_bytes(data), + size: data.len() as u64, + source_offset: entry.data_offset, + source_size: entry.data_size, + }; + records.push(record); + written.push(out_path); + } + + Ok(written) +} + +fn write_romfs_entry_files( + entries: &[crate::xci::types::XciFile], + assets_dir: &Path, + records: &mut Vec, +) -> Result, String> { + let mut written = Vec::new(); + let root = assets_dir.join("romfs"); + fs::create_dir_all(&root) + .map_err(|err| format!("create romfs dir {}: {err}", root.display()))?; + for entry in entries { + let rel_path = Path::new(&entry.name); + if rel_path.is_absolute() { + return Err(format!("romfs entry path is absolute: {}", entry.name)); + } + for component in rel_path.components() { + match component { + std::path::Component::Normal(_) => {} + _ => { + return Err(format!( + "romfs entry path contains invalid component: {}", + entry.name + )) + } + } + } + let out_path = root.join(rel_path); + if let Some(parent) = out_path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create romfs dir {}: {err}", parent.display()))?; + } + fs::write(&out_path, &entry.data) + .map_err(|err| format!("write romfs entry {}: {err}", out_path.display()))?; + let rel_out = out_path + .strip_prefix(assets_dir) + .unwrap_or(&out_path) + .to_string_lossy() + .replace('\\', "/"); + records.push(AssetRecord { + kind: "romfs".to_string(), + path: rel_out, + sha256: sha256_bytes(&entry.data), + size: entry.data.len() as u64, + source_offset: 0, + source_size: entry.data.len() as u64, + }); + written.push(out_path); + } + Ok(written) +} + +fn sanitize_name(name: &str) -> Result { + if name.is_empty() { + return Err("empty file name in ExeFS".to_string()); + } + let path = Path::new(name); + if path.components().count() != 1 { + return Err(format!("ExeFS file name has path separators: {name}")); + } + Ok(name.to_string()) +} + +fn absolute_path(path: &Path) -> Result { + if path.is_absolute() { + Ok(path.to_path_buf()) + } else { + std::env::current_dir() + .map_err(|err| err.to_string()) + .map(|cwd| cwd.join(path)) + } +} + +fn sha256_bytes(bytes: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(bytes); + let digest = hasher.finalize(); + hex_bytes(&digest) +} + +fn sha256_path(path: &Path) -> Result { + let bytes = fs::read(path).map_err(|err| err.to_string())?; + Ok(sha256_bytes(&bytes)) +} + +fn hex_bytes(bytes: &[u8]) -> String { + let mut out = String::with_capacity(bytes.len() * 2); + for byte in bytes { + use std::fmt::Write; + let _ = write!(&mut out, "{byte:02x}"); + } + out +} diff --git a/crates/recomp-pipeline/src/xci/mock.rs b/crates/recomp-pipeline/src/xci/mock.rs new file mode 100644 index 0000000..cdcd374 --- /dev/null +++ b/crates/recomp-pipeline/src/xci/mock.rs @@ -0,0 +1,106 @@ +use crate::xci::types::{XciExtractRequest, XciExtractResult, XciExtractor, XciFile, XciProgram}; +use base64::engine::general_purpose::STANDARD; +use base64::Engine as _; +use serde::Deserialize; +use std::fs; + +const MOCK_SCHEMA_VERSION: &str = "1"; + +#[derive(Debug, Deserialize)] +struct MockXciImage { + schema_version: String, + programs: Vec, + #[serde(default)] + romfs: Option, +} + +#[derive(Debug, Deserialize)] +struct MockProgram { + title_id: String, + content_type: String, + version: String, + nca: MockBlob, + exefs: Vec, + #[serde(default)] + nso: Vec, +} + +#[derive(Debug, Deserialize)] +struct MockRomfs { + image_b64: String, +} + +#[derive(Debug, Deserialize)] +struct MockFile { + name: String, + data_b64: String, +} + +#[derive(Debug, Deserialize)] +struct MockBlob { + data_b64: String, +} + +#[derive(Debug, Default)] +pub struct MockXciExtractor; + +impl MockXciExtractor { + pub fn new() -> Self { + Self + } +} + +impl XciExtractor for MockXciExtractor { + fn extract(&self, request: &XciExtractRequest) -> Result { + let payload = fs::read_to_string(&request.xci_path) + .map_err(|err| format!("read mock xci {}: {err}", request.xci_path.display()))?; + let image: MockXciImage = + serde_json::from_str(&payload).map_err(|err| format!("parse mock xci: {err}"))?; + if image.schema_version != MOCK_SCHEMA_VERSION { + return Err(format!( + "unsupported mock xci schema version: {}", + image.schema_version + )); + } + + let mut programs = Vec::new(); + for program in image.programs { + let nca_bytes = decode_b64("nca", &program.nca.data_b64)?; + let exefs_files = decode_files(&program.exefs)?; + let nso_files = decode_files(&program.nso)?; + let romfs_image = match &image.romfs { + Some(romfs) => Some(decode_b64("romfs", &romfs.image_b64)?), + None => None, + }; + programs.push(XciProgram { + title_id: program.title_id, + content_type: program.content_type, + version: program.version, + nca_bytes, + exefs_files, + nso_files, + romfs_image, + romfs_entries: Vec::new(), + }); + } + Ok(XciExtractResult { programs }) + } +} + +fn decode_files(files: &[MockFile]) -> Result, String> { + let mut out = Vec::new(); + for file in files { + let data = decode_b64(&file.name, &file.data_b64)?; + out.push(XciFile { + name: file.name.clone(), + data, + }); + } + Ok(out) +} + +fn decode_b64(label: &str, payload: &str) -> Result, String> { + STANDARD + .decode(payload) + .map_err(|err| format!("invalid base64 for {label}: {err}")) +} diff --git a/crates/recomp-pipeline/src/xci/mod.rs b/crates/recomp-pipeline/src/xci/mod.rs new file mode 100644 index 0000000..8e1ba4e --- /dev/null +++ b/crates/recomp-pipeline/src/xci/mod.rs @@ -0,0 +1,9 @@ +pub mod external; +pub mod intake; +pub mod mock; +pub mod types; + +pub use external::{ExternalXciExtractor, XciToolPreference}; +pub use intake::{intake_xci, intake_xci_with_extractor, XciIntakeOptions, XciIntakeReport}; +pub use mock::MockXciExtractor; +pub use types::{XciExtractRequest, XciExtractResult, XciExtractor, XciFile, XciProgram}; diff --git a/crates/recomp-pipeline/src/xci/types.rs b/crates/recomp-pipeline/src/xci/types.rs new file mode 100644 index 0000000..e356c77 --- /dev/null +++ b/crates/recomp-pipeline/src/xci/types.rs @@ -0,0 +1,34 @@ +use std::path::PathBuf; + +#[derive(Debug, Clone)] +pub struct XciFile { + pub name: String, + pub data: Vec, +} + +#[derive(Debug, Clone)] +pub struct XciProgram { + pub title_id: String, + pub content_type: String, + pub version: String, + pub nca_bytes: Vec, + pub exefs_files: Vec, + pub nso_files: Vec, + pub romfs_image: Option>, + pub romfs_entries: Vec, +} + +#[derive(Debug, Clone)] +pub struct XciExtractResult { + pub programs: Vec, +} + +#[derive(Debug, Clone)] +pub struct XciExtractRequest { + pub xci_path: PathBuf, + pub keys_path: PathBuf, +} + +pub trait XciExtractor { + fn extract(&self, request: &XciExtractRequest) -> Result; +} diff --git a/crates/recomp-pipeline/tests/fixtures/formats/sample.keys b/crates/recomp-pipeline/tests/fixtures/formats/sample.keys new file mode 100644 index 0000000..ac09cf1 --- /dev/null +++ b/crates/recomp-pipeline/tests/fixtures/formats/sample.keys @@ -0,0 +1 @@ +# dummy keyset diff --git a/crates/recomp-pipeline/tests/fixtures/formats/sample.xci b/crates/recomp-pipeline/tests/fixtures/formats/sample.xci new file mode 100644 index 0000000..693c350 --- /dev/null +++ b/crates/recomp-pipeline/tests/fixtures/formats/sample.xci @@ -0,0 +1 @@ +{"schema_version":"1","programs":[]} diff --git a/crates/recomp-pipeline/tests/provenance.rs b/crates/recomp-pipeline/tests/provenance.rs index 290891e..536d5a3 100644 --- a/crates/recomp-pipeline/tests/provenance.rs +++ b/crates/recomp-pipeline/tests/provenance.rs @@ -20,6 +20,8 @@ fn detect_supported_formats() { ("homebrew.nro", "nro0"), ("plugins.nrr", "nrr0"), ("main.npdm", "npdm"), + ("sample.xci", "xci"), + ("sample.keys", "keyset"), ]; for (file, expected) in cases { diff --git a/crates/recomp-pipeline/tests/xci_intake.rs b/crates/recomp-pipeline/tests/xci_intake.rs new file mode 100644 index 0000000..2a0404c --- /dev/null +++ b/crates/recomp-pipeline/tests/xci_intake.rs @@ -0,0 +1,395 @@ +use base64::engine::general_purpose::STANDARD; +use base64::Engine as _; +use recomp_pipeline::xci::{intake_xci, XciIntakeOptions, XciToolPreference}; +use sha2::{Digest, Sha256}; +use std::fs; +use std::path::{Path, PathBuf}; +use tempfile::tempdir; + +fn sha256_hex(bytes: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(bytes); + let digest = hasher.finalize(); + let mut out = String::with_capacity(digest.len() * 2); + for byte in digest { + use std::fmt::Write; + let _ = write!(&mut out, "{byte:02x}"); + } + out +} + +fn write_u32(bytes: &mut [u8], offset: usize, value: u32) { + bytes[offset..offset + 4].copy_from_slice(&value.to_le_bytes()); +} + +fn write_u64(bytes: &mut [u8], offset: usize, value: u64) { + bytes[offset..offset + 8].copy_from_slice(&value.to_le_bytes()); +} + +fn align_up(value: usize, align: usize) -> usize { + value.div_ceil(align) * align +} + +fn build_romfs_image() -> Vec { + let file_root = b"HELLO"; + let file_nested = b"NESTED"; + let nested_dir = "data"; + let root_name = ""; + + let root_entry_size = align_up(0x18 + root_name.len(), 4); + let nested_entry_off = root_entry_size as u32; + let nested_entry_size = align_up(0x18 + nested_dir.len(), 4); + let dir_table_size = root_entry_size + nested_entry_size; + + let file_root_name = "hello.txt"; + let file_nested_name = "nested.bin"; + let file_root_entry_size = align_up(0x20 + file_root_name.len(), 4); + let file_nested_off = file_root_entry_size as u32; + let file_nested_entry_size = align_up(0x20 + file_nested_name.len(), 4); + let file_table_size = file_root_entry_size + file_nested_entry_size; + + let file_root_data_off = 0u64; + let file_nested_data_off = align_up(file_root.len(), 0x10) as u64; + let mut file_data = Vec::new(); + file_data.extend_from_slice(file_root); + let padding = align_up(file_data.len(), 0x10) - file_data.len(); + file_data.extend(std::iter::repeat_n(0u8, padding)); + file_data.extend_from_slice(file_nested); + + let mut dir_table = Vec::new(); + push_dir_entry( + &mut dir_table, + 0xFFFF_FFFF, + 0xFFFF_FFFF, + nested_entry_off, + 0, + 0xFFFF_FFFF, + root_name, + ); + push_dir_entry( + &mut dir_table, + 0, + 0xFFFF_FFFF, + 0xFFFF_FFFF, + file_nested_off, + 0xFFFF_FFFF, + nested_dir, + ); + + let mut file_table = Vec::new(); + push_file_entry( + &mut file_table, + 0, + 0xFFFF_FFFF, + file_root_data_off, + file_root.len() as u64, + 0xFFFF_FFFF, + file_root_name, + ); + push_file_entry( + &mut file_table, + nested_entry_off, + 0xFFFF_FFFF, + file_nested_data_off, + file_nested.len() as u64, + 0xFFFF_FFFF, + file_nested_name, + ); + + let header_size = 0x50usize; + let dir_table_off = align_up(header_size, 0x10); + let file_table_off = align_up(dir_table_off + dir_table_size, 0x10); + let file_data_off = align_up(file_table_off + file_table_size, 0x10); + let total_size = file_data_off + file_data.len(); + + let mut image = vec![0u8; total_size]; + write_u64(&mut image, 0x0, 0x50); + write_u64(&mut image, 0x8, dir_table_off as u64); + write_u64(&mut image, 0x10, 0); + write_u64(&mut image, 0x18, dir_table_off as u64); + write_u64(&mut image, 0x20, dir_table_size as u64); + write_u64(&mut image, 0x28, file_table_off as u64); + write_u64(&mut image, 0x30, 0); + write_u64(&mut image, 0x38, file_table_off as u64); + write_u64(&mut image, 0x40, file_table_size as u64); + write_u64(&mut image, 0x48, file_data_off as u64); + + image[dir_table_off..dir_table_off + dir_table_size].copy_from_slice(&dir_table); + image[file_table_off..file_table_off + file_table_size].copy_from_slice(&file_table); + image[file_data_off..file_data_off + file_data.len()].copy_from_slice(&file_data); + + image +} + +fn push_dir_entry( + buf: &mut Vec, + parent: u32, + sibling: u32, + child_dir: u32, + child_file: u32, + next_hash: u32, + name: &str, +) -> u32 { + let offset = buf.len() as u32; + buf.extend_from_slice(&parent.to_le_bytes()); + buf.extend_from_slice(&sibling.to_le_bytes()); + buf.extend_from_slice(&child_dir.to_le_bytes()); + buf.extend_from_slice(&child_file.to_le_bytes()); + buf.extend_from_slice(&next_hash.to_le_bytes()); + buf.extend_from_slice(&(name.len() as u32).to_le_bytes()); + buf.extend_from_slice(name.as_bytes()); + while buf.len() % 4 != 0 { + buf.push(0); + } + offset +} + +fn push_file_entry( + buf: &mut Vec, + parent: u32, + sibling: u32, + data_off: u64, + data_size: u64, + next_hash: u32, + name: &str, +) -> u32 { + let offset = buf.len() as u32; + buf.extend_from_slice(&parent.to_le_bytes()); + buf.extend_from_slice(&sibling.to_le_bytes()); + buf.extend_from_slice(&data_off.to_le_bytes()); + buf.extend_from_slice(&data_size.to_le_bytes()); + buf.extend_from_slice(&next_hash.to_le_bytes()); + buf.extend_from_slice(&(name.len() as u32).to_le_bytes()); + buf.extend_from_slice(name.as_bytes()); + while buf.len() % 4 != 0 { + buf.push(0); + } + offset +} + +fn build_nso() -> Vec { + let header_size = 0x100usize; + let text = b"TEXTDATA"; + let rodata = b"RO"; + let data = b"DATA"; + let compressed_text = lz4_flex::block::compress(text); + + let text_off = header_size as u32; + let ro_off = text_off + compressed_text.len() as u32; + let data_off = ro_off + rodata.len() as u32; + let total = header_size + compressed_text.len() + rodata.len() + data.len(); + let mut bytes = vec![0u8; total]; + + bytes[0x0..0x4].copy_from_slice(b"NSO0"); + write_u32(&mut bytes, 0x8, 0x1); + write_u32(&mut bytes, 0x10, text_off); + write_u32(&mut bytes, 0x14, 0); + write_u32(&mut bytes, 0x18, text.len() as u32); + write_u32(&mut bytes, 0x20, ro_off); + write_u32(&mut bytes, 0x24, 0x1000); + write_u32(&mut bytes, 0x28, rodata.len() as u32); + write_u32(&mut bytes, 0x30, data_off); + write_u32(&mut bytes, 0x34, 0x2000); + write_u32(&mut bytes, 0x38, data.len() as u32); + write_u32(&mut bytes, 0x3C, 0x40); + + let module_id = [0xCDu8; 0x20]; + bytes[0x40..0x60].copy_from_slice(&module_id); + write_u32(&mut bytes, 0x60, compressed_text.len() as u32); + write_u32(&mut bytes, 0x64, rodata.len() as u32); + write_u32(&mut bytes, 0x68, data.len() as u32); + + bytes[text_off as usize..text_off as usize + compressed_text.len()] + .copy_from_slice(&compressed_text); + let ro_start = ro_off as usize; + bytes[ro_start..ro_start + rodata.len()].copy_from_slice(rodata); + let data_start = data_off as usize; + bytes[data_start..data_start + data.len()].copy_from_slice(data); + + bytes +} + +fn write_provenance(path: &Path, entries: Vec<(PathBuf, &str, &[u8])>) { + let mut inputs = String::new(); + for (entry_path, format, bytes) in entries { + let sha = sha256_hex(bytes); + let size = bytes.len(); + inputs.push_str(&format!( + "[[inputs]]\npath = \"{}\"\nsha256 = \"{}\"\nsize = {}\nformat = \"{}\"\n\n", + entry_path.display(), + sha, + size, + format + )); + } + + let toml = format!( + "schema_version = \"1\"\n\n[title]\nname = \"Test\"\ntitle_id = \"0100000000000000\"\nversion = \"1.0.0\"\nregion = \"US\"\n\n[collection]\ndevice = \"Switch\"\ncollected_at = \"2026-02-03\"\n\n[collection.tool]\nname = \"collector\"\nversion = \"0.1\"\n\n{}", + inputs + ); + fs::write(path, toml).expect("write provenance"); +} + +fn build_mock_xci_json(nso: &[u8], romfs: &[u8]) -> String { + let nca_bytes = b"NCA3"; + let program = serde_json::json!({ + "title_id": "0100000000000000", + "content_type": "program", + "version": "1.0.0", + "nca": { "data_b64": STANDARD.encode(nca_bytes) }, + "exefs": [ + { "name": "main", "data_b64": STANDARD.encode(nso) }, + { "name": "main.npdm", "data_b64": STANDARD.encode(b"NPDM") } + ], + "nso": [ + { "name": "main", "data_b64": STANDARD.encode(nso) } + ] + }); + let image = serde_json::json!({ + "schema_version": "1", + "programs": [program], + "romfs": { "image_b64": STANDARD.encode(romfs) } + }); + serde_json::to_string(&image).expect("serialize mock xci") +} + +#[test] +fn intake_xci_emits_manifest_and_assets() { + let dir = tempdir().expect("tempdir"); + let xci_path = dir.path().join("sample.xci"); + let keys_path = dir.path().join("title.keys"); + fs::write(&keys_path, b"DUMMYKEYS").expect("write keys"); + + let nso_bytes = build_nso(); + let romfs_bytes = build_romfs_image(); + let xci_json = build_mock_xci_json(&nso_bytes, &romfs_bytes); + fs::write(&xci_path, xci_json.as_bytes()).expect("write xci"); + + let provenance_path = dir.path().join("provenance.toml"); + write_provenance( + &provenance_path, + vec![ + (xci_path.clone(), "xci", xci_json.as_bytes()), + (keys_path.clone(), "keyset", b"DUMMYKEYS"), + ], + ); + + let out_dir = dir.path().join("out"); + let assets_dir = dir.path().join("assets"); + let report = intake_xci(XciIntakeOptions { + xci_path, + keys_path, + config_path: None, + provenance_path, + out_dir: out_dir.clone(), + assets_dir: assets_dir.clone(), + tool_preference: XciToolPreference::Mock, + tool_path: None, + }) + .expect("intake xci"); + + assert!(report.module_json_path.exists()); + assert!(report.manifest_path.exists()); + assert!(out_dir.join("exefs/main").exists()); + assert!(out_dir.join("segments/main/text.bin").exists()); + assert!(assets_dir.join("romfs/hello.txt").exists()); + + let manifest_src = fs::read_to_string(report.manifest_path).expect("read manifest"); + let manifest: serde_json::Value = serde_json::from_str(&manifest_src).expect("parse manifest"); + let assets_root = manifest + .get("assets_root") + .and_then(|value| value.as_str()) + .expect("assets_root string"); + assert!(assets_root.contains("assets")); +} + +#[test] +fn intake_xci_rejects_ambiguous_program() { + let dir = tempdir().expect("tempdir"); + let xci_path = dir.path().join("sample.xci"); + let keys_path = dir.path().join("title.keys"); + fs::write(&keys_path, b"DUMMYKEYS").expect("write keys"); + + let nso_bytes = build_nso(); + let program_one = serde_json::json!({ + "title_id": "0100000000000000", + "content_type": "program", + "version": "1.0.0", + "nca": { "data_b64": STANDARD.encode(b"NCA3") }, + "exefs": [{ "name": "main", "data_b64": STANDARD.encode(&nso_bytes) }], + "nso": [{ "name": "main", "data_b64": STANDARD.encode(&nso_bytes) }] + }); + let program_two = serde_json::json!({ + "title_id": "0100000000000001", + "content_type": "program", + "version": "1.0.0", + "nca": { "data_b64": STANDARD.encode(b"NCA3") }, + "exefs": [{ "name": "main", "data_b64": STANDARD.encode(&nso_bytes) }], + "nso": [{ "name": "main", "data_b64": STANDARD.encode(&nso_bytes) }] + }); + let image = serde_json::json!({ + "schema_version": "1", + "programs": [program_one, program_two] + }); + let xci_json = serde_json::to_string(&image).expect("serialize mock xci"); + fs::write(&xci_path, xci_json.as_bytes()).expect("write xci"); + + let provenance_path = dir.path().join("provenance.toml"); + write_provenance( + &provenance_path, + vec![ + (xci_path.clone(), "xci", xci_json.as_bytes()), + (keys_path.clone(), "keyset", b"DUMMYKEYS"), + ], + ); + + let out_dir = dir.path().join("out"); + let assets_dir = dir.path().join("assets"); + let err = intake_xci(XciIntakeOptions { + xci_path, + keys_path, + config_path: None, + provenance_path, + out_dir, + assets_dir, + tool_preference: XciToolPreference::Mock, + tool_path: None, + }) + .expect_err("ambiguous program should fail"); + assert!(err.contains("ambiguous Program NCA selection")); +} + +#[test] +fn intake_xci_rejects_nested_assets_dir() { + let dir = tempdir().expect("tempdir"); + let xci_path = dir.path().join("sample.xci"); + let keys_path = dir.path().join("title.keys"); + fs::write(&keys_path, b"DUMMYKEYS").expect("write keys"); + + let xci_json = build_mock_xci_json(&build_nso(), &build_romfs_image()); + fs::write(&xci_path, xci_json.as_bytes()).expect("write xci"); + + let provenance_path = dir.path().join("provenance.toml"); + write_provenance( + &provenance_path, + vec![ + (xci_path.clone(), "xci", xci_json.as_bytes()), + (keys_path.clone(), "keyset", b"DUMMYKEYS"), + ], + ); + + let out_dir = dir.path().join("out"); + let assets_dir = out_dir.join("assets"); + let err = intake_xci(XciIntakeOptions { + xci_path, + keys_path, + config_path: None, + provenance_path, + out_dir, + assets_dir, + tool_preference: XciToolPreference::Mock, + tool_path: None, + }) + .expect_err("nested assets_dir should fail"); + assert!(err.contains("assets_dir must not be inside out_dir")); +} diff --git a/crates/recomp-runtime/Cargo.toml b/crates/recomp-runtime/Cargo.toml index 0b9f63b..f64182a 100644 --- a/crates/recomp-runtime/Cargo.toml +++ b/crates/recomp-runtime/Cargo.toml @@ -11,3 +11,4 @@ recomp-timing = { path = "../recomp-timing" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" thiserror = "1.0" +toml = "0.8" diff --git a/crates/recomp-runtime/src/audio.rs b/crates/recomp-runtime/src/audio.rs new file mode 100644 index 0000000..0792efb --- /dev/null +++ b/crates/recomp-runtime/src/audio.rs @@ -0,0 +1,52 @@ +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AudioBuffer { + pub frames: u32, + pub channels: u16, + pub sample_rate: u32, +} + +impl AudioBuffer { + pub fn new(frames: u32, channels: u16, sample_rate: u32) -> Self { + Self { + frames, + channels, + sample_rate, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum AudioError { + #[error("unsupported audio buffer")] + Unsupported, +} + +pub trait AudioBackend { + fn submit(&mut self, buffer: &AudioBuffer) -> Result<(), AudioError>; +} + +#[derive(Debug, Default)] +pub struct StubAudioBackend { + pub submitted: Vec, +} + +impl AudioBackend for StubAudioBackend { + fn submit(&mut self, buffer: &AudioBuffer) -> Result<(), AudioError> { + self.submitted.push(buffer.clone()); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn stub_audio_backend_records_buffers() { + let mut backend = StubAudioBackend::default(); + let buffer = AudioBuffer::new(128, 2, 48_000); + backend.submit(&buffer).expect("submit"); + assert_eq!(backend.submitted.len(), 1); + assert_eq!(backend.submitted[0], buffer); + } +} diff --git a/crates/recomp-runtime/src/boot.rs b/crates/recomp-runtime/src/boot.rs new file mode 100644 index 0000000..1b759ef --- /dev/null +++ b/crates/recomp-runtime/src/boot.rs @@ -0,0 +1,230 @@ +use crate::audio::{AudioBackend, AudioBuffer, AudioError, StubAudioBackend}; +use crate::input::{InputBackend, InputFrame, StubInputBackend}; +use crate::Runtime; +use recomp_gfx::{ + CommandStream, FrameDescriptor, GraphicsBackend, GraphicsError, GraphicsPresenter, StubBackend, + StubPresenter, +}; +use recomp_services::{register_stubbed_services, ServiceCall, ServiceError, ServiceStubSpec}; +use std::path::PathBuf; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BootStep { + pub stage: String, + pub detail: String, +} + +#[derive(Debug, Default, Clone)] +pub struct BootTrace { + steps: Vec, +} + +impl BootTrace { + pub fn record(&mut self, stage: impl Into, detail: impl Into) { + self.steps.push(BootStep { + stage: stage.into(), + detail: detail.into(), + }); + } + + pub fn steps(&self) -> &[BootStep] { + &self.steps + } +} + +#[derive(Debug, Clone)] +pub struct BootAssets { + pub romfs_root: PathBuf, +} + +impl Default for BootAssets { + fn default() -> Self { + Self { + romfs_root: PathBuf::from("game-data/romfs"), + } + } +} + +#[derive(Debug, Clone)] +pub struct ServiceCallSpec { + pub service: String, + pub args: Vec, +} + +impl ServiceCallSpec { + pub fn new(service: impl Into, args: Vec) -> Self { + Self { + service: service.into(), + args, + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct BootPlan { + pub service_calls: Vec, + pub gfx_streams: Vec, + pub present_frames: Vec, + pub audio_buffers: Vec, + pub input_frames: Vec, +} + +impl BootPlan { + pub fn new() -> Self { + Self::default() + } + + pub fn service_call(mut self, call: ServiceCallSpec) -> Self { + self.service_calls.push(call); + self + } + + pub fn gfx_stream(mut self, stream: CommandStream) -> Self { + self.gfx_streams.push(stream); + self + } + + pub fn present(mut self, frame: FrameDescriptor) -> Self { + self.present_frames.push(frame); + self + } + + pub fn audio(mut self, buffer: AudioBuffer) -> Self { + self.audio_buffers.push(buffer); + self + } + + pub fn input(mut self, frame: InputFrame) -> Self { + self.input_frames.push(frame); + self + } +} + +#[derive(Debug, thiserror::Error)] +pub enum BootError { + #[error("service error: {0}")] + Service(#[from] ServiceError), + #[error("graphics error: {0}")] + Graphics(#[from] GraphicsError), + #[error("audio error: {0}")] + Audio(#[from] AudioError), +} + +pub struct BootContext { + pub title: String, + pub assets: BootAssets, + pub runtime: Runtime, + pub gfx: StubBackend, + pub presenter: StubPresenter, + pub audio: StubAudioBackend, + pub input: StubInputBackend, + pub trace: BootTrace, +} + +impl BootContext { + pub fn new(title: impl Into) -> Self { + Self { + title: title.into(), + assets: BootAssets::default(), + runtime: Runtime::new(), + gfx: StubBackend::default(), + presenter: StubPresenter::default(), + audio: StubAudioBackend::default(), + input: StubInputBackend::default(), + trace: BootTrace::default(), + } + } + + pub fn with_assets_root(mut self, root: impl Into) -> Self { + self.assets.romfs_root = root.into(); + self + } + + pub fn register_service_stubs(&mut self, stubs: &[ServiceStubSpec]) { + register_stubbed_services(&mut self.runtime.services, stubs); + self.trace + .record("services.register", format!("count={}", stubs.len())); + } + + pub fn run_plan(&mut self, plan: &BootPlan) -> Result { + self.trace + .record("boot.start", format!("title={}", self.title)); + self.trace + .record("assets.romfs", self.assets.romfs_root.display().to_string()); + + for call in &plan.service_calls { + let call = ServiceCall { + client: "boot".to_string(), + service: call.service.clone(), + args: call.args.clone(), + }; + self.runtime.dispatch_service(&call)?; + self.trace.record("service.call", call.service); + } + + for stream in &plan.gfx_streams { + self.gfx.submit(stream)?; + self.trace + .record("gfx.submit", format!("words={}", stream.words.len())); + } + + for frame in &plan.present_frames { + self.presenter.present(frame)?; + self.trace + .record("gfx.present", format!("frame={}", frame.frame_id)); + } + + for buffer in &plan.audio_buffers { + self.audio.submit(buffer)?; + self.trace + .record("audio.submit", format!("frames={}", buffer.frames)); + } + + for frame in &plan.input_frames { + self.input.push_frame(frame.clone()); + self.trace + .record("input.frame", format!("events={}", frame.events.len())); + } + + Ok(self.trace.clone()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::InputEvent; + use recomp_services::StubBehavior; + + #[test] + fn boot_context_runs_plan_and_records() { + let mut context = + BootContext::new("DKCR HD Sample").with_assets_root("game-data/dkcr-hd/romfs"); + context.register_service_stubs(&[ + ServiceStubSpec::new("svc_sm", StubBehavior::Noop), + ServiceStubSpec::new("svc_fs", StubBehavior::Noop), + ]); + + let plan = BootPlan::new() + .service_call(ServiceCallSpec::new("svc_sm", vec![])) + .service_call(ServiceCallSpec::new("svc_fs", vec![1])) + .gfx_stream(CommandStream::new(vec![1, 2, 3])) + .present(FrameDescriptor::new(1, 1280, 720)) + .audio(AudioBuffer::new(256, 2, 48_000)) + .input(InputFrame::new( + 0, + vec![InputEvent { + time: 0, + code: 1, + value: 1, + }], + )); + + let trace = context.run_plan(&plan).expect("boot plan"); + assert!(trace.steps().len() >= 7); + assert_eq!(context.gfx.submitted.len(), 1); + assert_eq!(context.presenter.presented.len(), 1); + assert_eq!(context.audio.submitted.len(), 1); + assert_eq!(context.input.pending(), 1); + } +} diff --git a/crates/recomp-runtime/src/input.rs b/crates/recomp-runtime/src/input.rs new file mode 100644 index 0000000..c4099a9 --- /dev/null +++ b/crates/recomp-runtime/src/input.rs @@ -0,0 +1,70 @@ +use crate::homebrew::{InputEvent, InputQueue}; + +#[derive(Debug, Clone)] +pub struct InputFrame { + pub time: u64, + pub events: Vec, +} + +impl InputFrame { + pub fn new(time: u64, events: Vec) -> Self { + Self { time, events } + } +} + +pub trait InputBackend { + fn push_frame(&mut self, frame: InputFrame); + fn drain_ready(&mut self, time: u64) -> Vec; +} + +#[derive(Debug, Default)] +pub struct StubInputBackend { + queue: InputQueue, + pub pushed: Vec, +} + +impl StubInputBackend { + pub fn pending(&self) -> usize { + self.queue.pending() + } +} + +impl InputBackend for StubInputBackend { + fn push_frame(&mut self, frame: InputFrame) { + for event in &frame.events { + self.queue.push(InputEvent { + time: event.time, + code: event.code, + value: event.value, + }); + } + self.pushed.push(frame); + } + + fn drain_ready(&mut self, time: u64) -> Vec { + self.queue.drain_ready(time) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn stub_input_backend_records_frames_and_events() { + let mut backend = StubInputBackend::default(); + backend.push_frame(InputFrame::new( + 0, + vec![InputEvent { + time: 1, + code: 10, + value: 1, + }], + )); + assert_eq!(backend.pushed.len(), 1); + assert_eq!(backend.pending(), 1); + let ready = backend.drain_ready(1); + assert_eq!(ready.len(), 1); + assert_eq!(backend.pending(), 0); + } +} diff --git a/crates/recomp-runtime/src/input_replay.rs b/crates/recomp-runtime/src/input_replay.rs new file mode 100644 index 0000000..7167e3b --- /dev/null +++ b/crates/recomp-runtime/src/input_replay.rs @@ -0,0 +1,368 @@ +use crate::{InputBackend, InputEvent, InputFrame}; +use serde::Deserialize; +use std::collections::HashSet; + +const INPUT_SCRIPT_SCHEMA_VERSION: &str = "1"; + +#[derive(Debug, Deserialize, Clone)] +pub struct InputScript { + pub schema_version: String, + pub metadata: InputMetadata, + pub events: Vec, + #[serde(default)] + pub markers: Vec, +} + +impl InputScript { + pub fn parse(toml_src: &str) -> Result { + let script: InputScript = + toml::from_str(toml_src).map_err(|err| format!("invalid input script: {err}"))?; + script.validate()?; + Ok(script) + } + + pub fn validate(&self) -> Result<(), String> { + if self.schema_version != INPUT_SCRIPT_SCHEMA_VERSION { + return Err(format!( + "unsupported input script schema version: {}", + self.schema_version + )); + } + if self.metadata.title.trim().is_empty() + || self.metadata.controller.trim().is_empty() + || self.metadata.timing_mode == TimingMode::Unspecified + { + return Err("input script metadata is incomplete".to_string()); + } + if self.events.is_empty() { + return Err("input script events list is empty".to_string()); + } + + for (index, event) in self.events.iter().enumerate() { + let label = format!("event[{index}]"); + validate_time_fields( + &label, + self.metadata.timing_mode, + event.time_ms, + event.frame, + )?; + } + + let mut names = HashSet::new(); + for (index, marker) in self.markers.iter().enumerate() { + let label = format!("marker[{index}]"); + if marker.name.trim().is_empty() { + return Err(format!("{label} name is empty")); + } + if !names.insert(marker.name.as_str()) { + return Err(format!("{label} name is duplicated")); + } + validate_time_fields( + &label, + self.metadata.timing_mode, + marker.time_ms, + marker.frame, + )?; + } + + Ok(()) + } +} + +#[derive(Debug, Deserialize, Clone)] +pub struct InputMetadata { + pub title: String, + pub controller: String, + pub timing_mode: TimingMode, + #[serde(default)] + pub notes: Option, + #[serde(default)] + pub recorded_at: Option, +} + +#[derive(Debug, Deserialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum TimingMode { + #[serde(rename = "ms")] + Milliseconds, + Frames, + #[serde(other)] + Unspecified, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct InputScriptEvent { + #[serde(default)] + pub time_ms: Option, + #[serde(default)] + pub frame: Option, + pub control: u32, + pub value: i32, + #[serde(default)] + pub note: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct InputScriptMarker { + pub name: String, + #[serde(default)] + pub time_ms: Option, + #[serde(default)] + pub frame: Option, + #[serde(default)] + pub note: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InputMarker { + pub name: String, + pub time: u64, + pub note: Option, +} + +#[derive(Debug, Clone)] +pub struct InputPlayback { + timing_mode: TimingMode, + frames: Vec, + markers: Vec, + cursor: usize, +} + +impl InputPlayback { + pub fn from_script(script: InputScript) -> Result { + script.validate()?; + + let timing_mode = script.metadata.timing_mode; + let mut sequenced = Vec::with_capacity(script.events.len()); + for (index, event) in script.events.into_iter().enumerate() { + let time = match timing_mode { + TimingMode::Milliseconds => event.time_ms.expect("validated"), + TimingMode::Frames => event.frame.expect("validated"), + TimingMode::Unspecified => { + return Err("input script timing mode is unspecified".to_string()) + } + }; + let input_event = InputEvent { + time, + code: event.control, + value: event.value, + }; + sequenced.push(SequencedEvent { + time, + index, + event: input_event, + }); + } + + sequenced.sort_by(|a, b| a.time.cmp(&b.time).then_with(|| a.index.cmp(&b.index))); + + let mut frames: Vec = Vec::new(); + for item in sequenced { + if let Some(frame) = frames.last_mut() { + if frame.time == item.time { + frame.events.push(item.event); + continue; + } + } + frames.push(InputFrame::new(item.time, vec![item.event])); + } + + let mut markers: Vec = script + .markers + .into_iter() + .enumerate() + .map(|(index, marker)| { + let time = match timing_mode { + TimingMode::Milliseconds => marker.time_ms.expect("validated"), + TimingMode::Frames => marker.frame.expect("validated"), + TimingMode::Unspecified => 0, + }; + SequencedMarker { + time, + index, + marker: InputMarker { + name: marker.name, + time, + note: marker.note, + }, + } + }) + .collect(); + + markers.sort_by(|a, b| a.time.cmp(&b.time).then_with(|| a.index.cmp(&b.index))); + + Ok(Self { + timing_mode, + frames, + markers: markers.into_iter().map(|entry| entry.marker).collect(), + cursor: 0, + }) + } + + pub fn timing_mode(&self) -> TimingMode { + self.timing_mode + } + + pub fn frames(&self) -> &[InputFrame] { + &self.frames + } + + pub fn markers(&self) -> &[InputMarker] { + &self.markers + } + + pub fn reset(&mut self) { + self.cursor = 0; + } + + pub fn seek(&mut self, time: u64) { + let mut index = 0; + while index < self.frames.len() && self.frames[index].time < time { + index += 1; + } + self.cursor = index; + } + + pub fn is_finished(&self) -> bool { + self.cursor >= self.frames.len() + } + + pub fn feed_until(&mut self, backend: &mut B, time: u64) -> usize { + let mut pushed = 0; + while self.cursor < self.frames.len() && self.frames[self.cursor].time <= time { + backend.push_frame(self.frames[self.cursor].clone()); + self.cursor += 1; + pushed += 1; + } + pushed + } +} + +#[derive(Debug)] +struct SequencedEvent { + time: u64, + index: usize, + event: InputEvent, +} + +#[derive(Debug)] +struct SequencedMarker { + time: u64, + index: usize, + marker: InputMarker, +} + +fn validate_time_fields( + label: &str, + timing_mode: TimingMode, + time_ms: Option, + frame: Option, +) -> Result<(), String> { + match timing_mode { + TimingMode::Milliseconds => { + if time_ms.is_none() { + return Err(format!("{label} missing time_ms for timing_mode=ms")); + } + if frame.is_some() { + return Err(format!("{label} frame is not valid for timing_mode=ms")); + } + } + TimingMode::Frames => { + if frame.is_none() { + return Err(format!("{label} missing frame for timing_mode=frames")); + } + if time_ms.is_some() { + return Err(format!( + "{label} time_ms is not valid for timing_mode=frames" + )); + } + } + TimingMode::Unspecified => { + return Err(format!("{label} timing_mode is unspecified")); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn playback_orders_events_deterministically() { + let toml_src = r#" + schema_version = "1" + + [metadata] + title = "Replay" + controller = "pro_controller" + timing_mode = "ms" + + [[events]] + time_ms = 20 + control = 10 + value = 1 + + [[events]] + time_ms = 10 + control = 20 + value = 1 + + [[events]] + time_ms = 10 + control = 30 + value = 0 + "#; + + let script = InputScript::parse(toml_src).expect("parse script"); + let playback = InputPlayback::from_script(script).expect("build playback"); + let frames = playback.frames(); + assert_eq!(frames.len(), 2); + assert_eq!(frames[0].time, 10); + assert_eq!(frames[0].events.len(), 2); + assert_eq!(frames[0].events[0].code, 20); + assert_eq!(frames[0].events[1].code, 30); + assert_eq!(frames[1].time, 20); + } + + #[test] + fn playback_sorts_markers_by_time() { + let toml_src = r#" + schema_version = "1" + + [metadata] + title = "Replay" + controller = "pro_controller" + timing_mode = "ms" + + [[events]] + time_ms = 0 + control = 1 + value = 1 + + [[markers]] + name = "late" + time_ms = 300 + + [[markers]] + name = "boot" + time_ms = 0 + + [[markers]] + name = "mid" + time_ms = 150 + "#; + + let script = InputScript::parse(toml_src).expect("parse script"); + let playback = InputPlayback::from_script(script).expect("build playback"); + let markers = playback.markers(); + assert_eq!(markers.len(), 3); + assert_eq!(markers[0].name, "boot"); + assert_eq!(markers[0].time, 0); + assert_eq!(markers[1].name, "mid"); + assert_eq!(markers[1].time, 150); + assert_eq!(markers[2].name, "late"); + assert_eq!(markers[2].time, 300); + } +} diff --git a/crates/recomp-runtime/src/lib.rs b/crates/recomp-runtime/src/lib.rs index b47cfcb..ab01424 100644 --- a/crates/recomp-runtime/src/lib.rs +++ b/crates/recomp-runtime/src/lib.rs @@ -1,24 +1,40 @@ use std::fmt; +mod audio; +mod boot; mod homebrew; +mod input; +mod input_replay; mod memory; pub const ABI_VERSION: &str = "0.1.0"; +pub use audio::{AudioBackend, AudioBuffer, AudioError, StubAudioBackend}; +pub use boot::{ + BootAssets, BootContext, BootError, BootPlan, BootStep, BootTrace, ServiceCallSpec, +}; pub use homebrew::{ entrypoint_shim, DeterministicClock, InputEvent, InputQueue, LoaderConfig, LoaderConfigBuilder, LoaderConfigEntry, LoaderConfigKey, NroEntrypoint, RuntimeManifest, ServiceStub, NRO_ENTRY_X1, }; +pub use input::{InputBackend, InputFrame, StubInputBackend}; +pub use input_replay::{ + InputMarker, InputMetadata, InputPlayback, InputScript, InputScriptEvent, InputScriptMarker, + TimingMode, +}; pub use memory::{ init_memory, recomp_mem_load_u16, recomp_mem_load_u32, recomp_mem_load_u64, recomp_mem_load_u8, recomp_mem_store_u16, recomp_mem_store_u32, recomp_mem_store_u64, recomp_mem_store_u8, MemoryInitSegment, MemoryLayout, MemoryLayoutError, MemoryPermissions, MemoryRegionSpec, MemoryStatus, MemoryZeroSegment, }; -pub use recomp_gfx::{CommandStream, GraphicsBackend, GraphicsError, StubBackend}; +pub use recomp_gfx::{ + CommandStream, FrameDescriptor, GraphicsBackend, GraphicsError, GraphicsPresenter, StubBackend, + StubPresenter, +}; pub use recomp_services::{ - stub_handler, ServiceAccessControl, ServiceCall, ServiceError, ServiceLogger, ServiceRegistry, - StubBehavior, + register_stubbed_services, stub_handler, ServiceAccessControl, ServiceCall, ServiceError, + ServiceLogger, ServiceRegistry, ServiceStubSpec, StubBehavior, }; pub use recomp_timing::Scheduler; diff --git a/crates/recomp-services/src/lib.rs b/crates/recomp-services/src/lib.rs index 590c528..f83e416 100644 --- a/crates/recomp-services/src/lib.rs +++ b/crates/recomp-services/src/lib.rs @@ -72,6 +72,27 @@ pub fn stub_handler( } } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ServiceStubSpec { + pub name: String, + pub behavior: StubBehavior, +} + +impl ServiceStubSpec { + pub fn new(name: impl Into, behavior: StubBehavior) -> Self { + Self { + name: name.into(), + behavior, + } + } +} + +pub fn register_stubbed_services(registry: &mut ServiceRegistry, stubs: &[ServiceStubSpec]) { + for stub in stubs { + registry.register(&stub.name, stub_handler(stub.behavior)); + } +} + #[derive(Debug, Default)] pub struct ServiceAccessControl { allowed: BTreeSet, @@ -198,4 +219,19 @@ mod tests { assert!(dispatcher.dispatch(&call).is_ok()); } + + #[test] + fn register_stubbed_services_installs_handlers() { + let mut registry = ServiceRegistry::new(); + register_stubbed_services( + &mut registry, + &[ServiceStubSpec::new("svc_stub", StubBehavior::Noop)], + ); + let call = ServiceCall { + client: "demo".to_string(), + service: "svc_stub".to_string(), + args: vec![], + }; + assert!(registry.call(&call).is_ok()); + } } diff --git a/crates/recomp-validation/Cargo.toml b/crates/recomp-validation/Cargo.toml index e58b2dd..d1fb721 100644 --- a/crates/recomp-validation/Cargo.toml +++ b/crates/recomp-validation/Cargo.toml @@ -9,7 +9,9 @@ recomp-pipeline = { path = "../recomp-pipeline" } recomp-runtime = { path = "../recomp-runtime" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +sha2 = "0.10" tempfile = "3.10" +toml = "0.8" [dependencies.clap] version = "4.5" diff --git a/crates/recomp-validation/src/lib.rs b/crates/recomp-validation/src/lib.rs index afc6027..37641d4 100644 --- a/crates/recomp-validation/src/lib.rs +++ b/crates/recomp-validation/src/lib.rs @@ -4,6 +4,13 @@ use serde::Serialize; use std::path::{Path, PathBuf}; use std::time::Instant; +pub mod video; +pub use video::{ + hash_audio_file, hash_frames_dir, run_video_validation, run_video_validation_with_config, + write_hash_list, CaptureVideoConfig, HashFormat, HashSource, HashSources, ReferenceVideoConfig, + Timecode, ValidationConfigFile, VideoValidationReport, +}; + #[derive(Debug, Serialize)] pub struct ValidationReport { pub generated_at: String, @@ -11,6 +18,8 @@ pub struct ValidationReport { pub passed: usize, pub failed: usize, pub cases: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub video: Option, } #[derive(Debug, Serialize)] @@ -22,7 +31,7 @@ pub struct ValidationCase { pub details: Option, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)] #[serde(rename_all = "snake_case")] pub enum ValidationStatus { Passed, @@ -89,6 +98,7 @@ pub fn run_baseline(paths: BaselinePaths) -> ValidationReport { passed, failed, cases, + video: None, } } @@ -153,5 +163,95 @@ fn render_text_report(report: &ValidationReport) -> String { out.push_str(&format!(" details: {details}\n")); } } + if let Some(video) = &report.video { + out.push_str("\nVideo validation summary\n"); + out.push_str(&format!("status: {:?}\n", video.status)); + if let Some(schema_version) = &video.validation_config.schema_version { + out.push_str(&format!("schema_version: {schema_version}\n")); + } + if let Some(name) = &video.validation_config.name { + out.push_str(&format!("validation_name: {name}\n")); + } + out.push_str(&format!( + "frame match: {:.3} ({} of {}, offset {} frames)\n", + video.frame_comparison.match_ratio, + video.frame_comparison.matched, + video.frame_comparison.compared, + video.frame_comparison.offset + )); + out.push_str(&format!( + "frame drift: {} frames ({:.3} sec)\n", + video.drift.frame_offset, video.drift.frame_offset_seconds + )); + if let Some(audio) = &video.audio_comparison { + out.push_str(&format!( + "audio match: {:.3} ({} of {}, offset {} chunks)\n", + audio.match_ratio, audio.matched, audio.compared, audio.offset + )); + } + if !video.triage.categories.is_empty() { + let categories: Vec = video + .triage + .categories + .iter() + .map(|category| format!("{category:?}")) + .collect(); + out.push_str(&format!("triage: {}\n", categories.join(", "))); + } + if !video.failures.is_empty() { + out.push_str("video failures:\n"); + for failure in &video.failures { + out.push_str(&format!("- {failure}\n")); + } + } + if !video.triage.suggestions.is_empty() { + out.push_str("triage suggestions:\n"); + for suggestion in &video.triage.suggestions { + out.push_str(&format!("- {suggestion}\n")); + } + } + } out } + +pub fn run_video_suite( + reference_path: &Path, + capture_path: &Path, + validation_path: Option<&Path>, +) -> ValidationReport { + let start = Instant::now(); + let mut cases = Vec::new(); + let (status, details, video_report) = + match run_video_validation_with_config(reference_path, capture_path, validation_path) { + Ok(report) => ( + report.status, + Some(format!( + "frame_match_ratio={:.3} drift_frames={}", + report.frame_comparison.match_ratio, report.drift.frame_offset + )), + Some(report), + ), + Err(err) => (ValidationStatus::Failed, Some(err), None), + }; + let duration_ms = start.elapsed().as_millis(); + cases.push(ValidationCase { + name: "video_validation".to_string(), + status, + duration_ms, + details, + }); + + let (passed, failed) = cases.iter().fold((0, 0), |acc, case| match case.status { + ValidationStatus::Passed => (acc.0 + 1, acc.1), + ValidationStatus::Failed => (acc.0, acc.1 + 1), + }); + + ValidationReport { + generated_at: chrono_stamp(), + total: cases.len(), + passed, + failed, + cases, + video: video_report, + } +} diff --git a/crates/recomp-validation/src/main.rs b/crates/recomp-validation/src/main.rs index 893eca8..052f0c2 100644 --- a/crates/recomp-validation/src/main.rs +++ b/crates/recomp-validation/src/main.rs @@ -1,5 +1,8 @@ -use clap::Parser; -use recomp_validation::{run_baseline, write_report, BaselinePaths}; +use clap::{Args, Parser, Subcommand}; +use recomp_validation::{ + hash_audio_file, hash_frames_dir, run_baseline, run_video_suite, write_hash_list, write_report, + BaselinePaths, +}; use std::path::PathBuf; #[derive(Parser, Debug)] @@ -7,33 +10,128 @@ use std::path::PathBuf; about = "Run baseline validation suite and emit regression reports", version )] -struct Args { +struct Cli { #[arg(long)] - out_dir: PathBuf, + out_dir: Option, #[arg(long)] repo_root: Option, + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand, Debug)] +enum Command { + Video(VideoArgs), + HashFrames(HashFramesArgs), + HashAudio(HashAudioArgs), +} + +#[derive(Args, Debug)] +struct VideoArgs { + #[arg(long)] + reference: PathBuf, + #[arg(long)] + capture: PathBuf, + #[arg(long)] + validation_config: Option, + #[arg(long)] + out_dir: PathBuf, +} + +#[derive(Args, Debug)] +struct HashFramesArgs { + #[arg(long)] + frames_dir: PathBuf, + #[arg(long)] + out: PathBuf, +} + +#[derive(Args, Debug)] +struct HashAudioArgs { + #[arg(long)] + audio_file: PathBuf, + #[arg(long)] + out: PathBuf, } fn main() { - let args = Args::parse(); - let repo_root = args.repo_root.unwrap_or_else(default_repo_root); - let report = run_baseline(BaselinePaths { - repo_root, - out_dir: args.out_dir.clone(), - }); - if let Err(err) = write_report(&args.out_dir, &report) { - eprintln!("failed to write validation report: {err}"); - std::process::exit(1); - } - if report.failed > 0 { - eprintln!("validation failed: {} cases failed", report.failed); - std::process::exit(1); + let args = Cli::parse(); + match args.command { + Some(Command::Video(cmd)) => { + let report = run_video_suite( + &cmd.reference, + &cmd.capture, + cmd.validation_config.as_deref(), + ); + if let Err(err) = write_report(&cmd.out_dir, &report) { + eprintln!("failed to write validation report: {err}"); + std::process::exit(1); + } + if report.failed > 0 { + eprintln!("validation failed: {} cases failed", report.failed); + std::process::exit(1); + } + println!( + "validation passed: {} cases, report written to {}", + report.total, + cmd.out_dir.display() + ); + } + Some(Command::HashFrames(cmd)) => { + let hashes = hash_frames_dir(&cmd.frames_dir).unwrap_or_else(|err| { + eprintln!("failed to hash frames: {err}"); + std::process::exit(1); + }); + write_hash_list(&cmd.out, &hashes).unwrap_or_else(|err| { + eprintln!("failed to write hash list: {err}"); + std::process::exit(1); + }); + println!( + "frame hashes written: {} entries -> {}", + hashes.len(), + cmd.out.display() + ); + } + Some(Command::HashAudio(cmd)) => { + let hashes = hash_audio_file(&cmd.audio_file).unwrap_or_else(|err| { + eprintln!("failed to hash audio: {err}"); + std::process::exit(1); + }); + write_hash_list(&cmd.out, &hashes).unwrap_or_else(|err| { + eprintln!("failed to write hash list: {err}"); + std::process::exit(1); + }); + println!( + "audio hashes written: {} entries -> {}", + hashes.len(), + cmd.out.display() + ); + } + None => { + let out_dir = args.out_dir.unwrap_or_else(|| { + eprintln!("--out-dir is required unless using a subcommand"); + std::process::exit(2); + }); + let repo_root = args.repo_root.unwrap_or_else(default_repo_root); + let report = run_baseline(BaselinePaths { + repo_root, + out_dir: out_dir.clone(), + }); + if let Err(err) = write_report(&out_dir, &report) { + eprintln!("failed to write validation report: {err}"); + std::process::exit(1); + } + if report.failed > 0 { + eprintln!("validation failed: {} cases failed", report.failed); + std::process::exit(1); + } + println!( + "validation passed: {} cases, report written to {}", + report.total, + out_dir.display() + ); + } } - println!( - "validation passed: {} cases, report written to {}", - report.total, - args.out_dir.display() - ); } fn default_repo_root() -> PathBuf { diff --git a/crates/recomp-validation/src/video.rs b/crates/recomp-validation/src/video.rs new file mode 100644 index 0000000..0205a45 --- /dev/null +++ b/crates/recomp-validation/src/video.rs @@ -0,0 +1,890 @@ +use crate::ValidationStatus; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::cmp::Ordering; +use std::fmt; +use std::fs; +use std::path::{Path, PathBuf}; + +const AUDIO_CHUNK_BYTES: usize = 4096; + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Timecode { + pub seconds: f64, +} + +impl Timecode { + pub fn from_seconds(seconds: f64) -> Result { + if seconds.is_finite() && seconds >= 0.0 { + Ok(Self { seconds }) + } else { + Err(format!("invalid timecode seconds: {seconds}")) + } + } + + pub fn parse(value: &str) -> Result { + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err("timecode is empty".to_string()); + } + if let Ok(seconds) = trimmed.parse::() { + return Self::from_seconds(seconds); + } + let parts: Vec<&str> = trimmed.split(':').collect(); + if parts.len() > 3 { + return Err(format!("timecode has too many segments: {value}")); + } + let mut secs = 0.0; + let mut multiplier = 1.0; + for (idx, part) in parts.iter().rev().enumerate() { + if idx == 0 { + secs += part + .parse::() + .map_err(|_| format!("invalid timecode seconds segment: {value}"))?; + } else { + let unit = part + .parse::() + .map_err(|_| format!("invalid timecode segment: {value}"))?; + multiplier *= 60.0; + secs += unit as f64 * multiplier; + } + } + Self::from_seconds(secs) + } + + pub fn to_frame_index(&self, fps: f32) -> Result { + if !fps.is_finite() || fps <= 0.0 { + return Err(format!("invalid fps: {fps}")); + } + let frame = (self.seconds * fps as f64).round(); + if frame < 0.0 { + Err("timecode produced negative frame index".to_string()) + } else { + Ok(frame as usize) + } + } +} + +impl fmt::Display for Timecode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let total_ms = (self.seconds * 1000.0).round() as u64; + let ms = total_ms % 1000; + let total_secs = total_ms / 1000; + let secs = total_secs % 60; + let total_mins = total_secs / 60; + let mins = total_mins % 60; + let hours = total_mins / 60; + write!(f, "{hours:02}:{mins:02}:{secs:02}.{ms:03}") + } +} + +impl Serialize for Timecode { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + +impl<'de> Deserialize<'de> for Timecode { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct TimecodeVisitor; + + impl serde::de::Visitor<'_> for TimecodeVisitor { + type Value = Timecode; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("timecode string (HH:MM:SS.mmm) or seconds value") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + Timecode::parse(value).map_err(E::custom) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + Timecode::from_seconds(value).map_err(E::custom) + } + + fn visit_i64(self, value: i64) -> Result + where + E: serde::de::Error, + { + Timecode::from_seconds(value as f64).map_err(E::custom) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + Timecode::from_seconds(value as f64).map_err(E::custom) + } + } + + deserializer.deserialize_any(TimecodeVisitor) + } +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct VideoSpec { + pub path: PathBuf, + pub width: u32, + pub height: u32, + pub fps: f32, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct NormalizationProfile { + pub width: u32, + pub height: u32, + pub fps: f32, + pub audio_sample_rate: u32, + #[serde(default)] + pub audio_channels: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct NormalizationConfig { + pub source_path: PathBuf, + pub normalized_path: PathBuf, + pub profile: NormalizationProfile, + #[serde(default)] + pub notes: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct Timeline { + pub start: Timecode, + pub end: Timecode, + #[serde(default)] + pub events: Vec, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct TimelineEvent { + pub name: String, + pub time: Timecode, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum HashFormat { + List, + Directory, + File, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct HashSource { + pub format: HashFormat, + pub path: PathBuf, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct HashSources { + pub frames: HashSource, + #[serde(default)] + pub audio: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct VideoThresholds { + pub frame_match_ratio: f32, + #[serde(default)] + pub audio_match_ratio: Option, + pub max_drift_frames: i32, + #[serde(default)] + pub max_dropped_frames: usize, + #[serde(default)] + pub max_audio_drift_chunks: Option, +} + +impl Default for VideoThresholds { + fn default() -> Self { + Self { + frame_match_ratio: 0.92, + audio_match_ratio: Some(0.9), + max_drift_frames: 3, + max_dropped_frames: 0, + max_audio_drift_chunks: None, + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Default)] +pub struct ValidationConfig { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub notes: Option, + #[serde(default)] + pub require_audio: Option, + #[serde(default)] + pub thresholds: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct ValidationConfigFile { + #[serde(default)] + pub schema_version: Option, + #[serde(flatten)] + pub validation: ValidationConfig, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct ReferenceVideoConfig { + #[serde(default)] + pub schema_version: Option, + #[serde(default)] + pub normalization: Option, + pub video: VideoSpec, + pub timeline: Timeline, + #[serde(default)] + pub hashes: Option, + #[serde(default)] + pub thresholds: Option, + #[serde(default)] + pub validation: Option, +} + +#[derive(Debug, Deserialize, Serialize, Clone)] +pub struct CaptureVideoConfig { + pub video: VideoSpec, + pub hashes: HashSources, +} + +#[derive(Debug, Serialize)] +pub struct VideoValidationReport { + pub status: ValidationStatus, + pub validation_config: ValidationConfigSummary, + #[serde(skip_serializing_if = "Option::is_none")] + pub normalization: Option, + pub triage: TriageSummary, + pub reference: VideoRunSummary, + pub capture: VideoRunSummary, + pub timeline: TimelineSummary, + pub frame_comparison: HashComparisonReport, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_comparison: Option, + pub drift: DriftSummary, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub failures: Vec, +} + +#[derive(Debug, Serialize)] +pub struct ValidationConfigSummary { + #[serde(skip_serializing_if = "Option::is_none")] + pub schema_version: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub notes: Option, + pub require_audio: bool, + pub thresholds: VideoThresholds, +} + +#[derive(Debug, Serialize)] +pub struct TriageSummary { + pub status: ValidationStatus, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub categories: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub findings: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub suggestions: Vec, +} + +#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum TriageCategory { + Pass, + ConfigMismatch, + ReferenceCoverage, + FrameMismatch, + AudioMismatch, + AudioMissing, + Unknown, +} + +#[derive(Debug, Serialize)] +pub struct VideoRunSummary { + pub path: String, + pub width: u32, + pub height: u32, + pub fps: f32, + pub frame_hashes: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_hashes: Option, +} + +#[derive(Debug, Serialize)] +pub struct TimelineSummary { + pub start: Timecode, + pub end: Timecode, + pub start_frame: usize, + pub end_frame: usize, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub events: Vec, +} + +#[derive(Debug, Serialize)] +pub struct HashComparisonReport { + pub matched: usize, + pub compared: usize, + pub match_ratio: f32, + pub threshold: f32, + pub offset: i32, + pub length_delta: i32, + pub reference_total: usize, + pub capture_total: usize, +} + +#[derive(Debug, Serialize)] +pub struct DriftSummary { + pub frame_offset: i32, + pub frame_offset_seconds: f64, + pub length_delta_frames: i32, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_offset_chunks: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub audio_length_delta_chunks: Option, +} + +#[derive(Debug)] +struct Alignment { + offset: i32, + compared: usize, + matched: usize, + match_ratio: f32, +} + +#[derive(Debug, Copy, Clone)] +enum HashRole { + Frames, + Audio, +} + +pub fn run_video_validation( + reference_path: &Path, + capture_path: &Path, +) -> Result { + run_video_validation_with_config(reference_path, capture_path, None) +} + +pub fn run_video_validation_with_config( + reference_path: &Path, + capture_path: &Path, + validation_path: Option<&Path>, +) -> Result { + let reference_src = fs::read_to_string(reference_path).map_err(|err| err.to_string())?; + let capture_src = fs::read_to_string(capture_path).map_err(|err| err.to_string())?; + let reference: ReferenceVideoConfig = + toml::from_str(&reference_src).map_err(|err| format!("invalid reference config: {err}"))?; + let capture: CaptureVideoConfig = + toml::from_str(&capture_src).map_err(|err| format!("invalid capture config: {err}"))?; + let validation_override = match validation_path { + Some(path) => Some(load_validation_config(path)?), + None => None, + }; + + let reference_dir = reference_path + .parent() + .ok_or_else(|| "reference config has no parent dir".to_string())?; + let capture_dir = capture_path + .parent() + .ok_or_else(|| "capture config has no parent dir".to_string())?; + + let reference_hashes = reference + .hashes + .clone() + .ok_or_else(|| "reference hashes missing".to_string())?; + let reference_validation = reference.validation.clone().unwrap_or_default(); + let override_validation = validation_override + .as_ref() + .map(|cfg| cfg.validation.clone()); + let merged_validation = ValidationConfig { + name: override_validation + .as_ref() + .and_then(|validation| validation.name.clone()) + .or(reference_validation.name), + notes: override_validation + .as_ref() + .and_then(|validation| validation.notes.clone()) + .or(reference_validation.notes), + require_audio: override_validation + .as_ref() + .and_then(|validation| validation.require_audio) + .or(reference_validation.require_audio), + thresholds: override_validation + .as_ref() + .and_then(|validation| validation.thresholds.clone()) + .or(reference_validation.thresholds), + }; + let thresholds = merged_validation + .thresholds + .clone() + .or_else(|| reference.thresholds.clone()) + .unwrap_or_default(); + let ref_frames = load_hashes(&reference_hashes.frames, reference_dir, HashRole::Frames)?; + let ref_audio = match &reference_hashes.audio { + Some(source) => Some(load_hashes(source, reference_dir, HashRole::Audio)?), + None => None, + }; + + let capture_frames = load_hashes(&capture.hashes.frames, capture_dir, HashRole::Frames)?; + let capture_audio = match &capture.hashes.audio { + Some(source) => Some(load_hashes(source, capture_dir, HashRole::Audio)?), + None => None, + }; + let require_audio = merged_validation + .require_audio + .unwrap_or_else(|| reference_hashes.audio.is_some()); + + let timeline_start = reference + .timeline + .start + .to_frame_index(reference.video.fps)?; + let timeline_end = reference.timeline.end.to_frame_index(reference.video.fps)?; + if timeline_end <= timeline_start { + return Err("timeline end must be after start".to_string()); + } + if timeline_start >= ref_frames.len() { + return Err("timeline start beyond reference frame hashes".to_string()); + } + + let mut failures = Vec::new(); + let clamped_end = timeline_end.min(ref_frames.len()); + if timeline_end > ref_frames.len() { + failures.push(format!( + "reference frame hashes cover {}, timeline ends at {}", + ref_frames.len(), + timeline_end + )); + } + + let mut config_mismatch = false; + let reference_coverage = timeline_end > ref_frames.len(); + let mut frame_mismatch = false; + let mut audio_mismatch = false; + let mut audio_missing = false; + + if reference.video.width != capture.video.width + || reference.video.height != capture.video.height + { + failures.push(format!( + "resolution mismatch: reference {}x{}, capture {}x{}", + reference.video.width, + reference.video.height, + capture.video.width, + capture.video.height + )); + config_mismatch = true; + } + if (reference.video.fps - capture.video.fps).abs() > f32::EPSILON { + failures.push(format!( + "fps mismatch: reference {:.3}, capture {:.3}", + reference.video.fps, capture.video.fps + )); + config_mismatch = true; + } + + let ref_slice = &ref_frames[timeline_start..clamped_end]; + let max_drift = thresholds.max_drift_frames; + let alignment = best_alignment(ref_slice, &capture_frames, max_drift); + let length_delta = capture_frames.len() as i32 - ref_slice.len() as i32; + let frame_match_ratio = if alignment.compared == 0 { + 0.0 + } else { + alignment.match_ratio + }; + if frame_match_ratio < thresholds.frame_match_ratio { + failures.push(format!( + "frame match ratio {:.3} below threshold {:.3}", + frame_match_ratio, thresholds.frame_match_ratio + )); + frame_mismatch = true; + } + if alignment.offset.abs() > thresholds.max_drift_frames { + failures.push(format!( + "frame drift {} exceeds max {}", + alignment.offset, thresholds.max_drift_frames + )); + frame_mismatch = true; + } + let length_delta_abs = length_delta.unsigned_abs() as usize; + if length_delta_abs > thresholds.max_dropped_frames { + failures.push(format!( + "frame length delta {} exceeds max dropped {}", + length_delta, thresholds.max_dropped_frames + )); + frame_mismatch = true; + } + + let mut triage_categories = Vec::new(); + let audio_report = match (ref_audio.as_ref(), capture_audio.as_ref()) { + (Some(reference_audio), Some(capture_audio)) => { + let max_audio_drift = thresholds + .max_audio_drift_chunks + .unwrap_or(thresholds.max_drift_frames); + let audio_alignment = best_alignment(reference_audio, capture_audio, max_audio_drift); + let audio_length_delta = capture_audio.len() as i32 - reference_audio.len() as i32; + let audio_match_ratio = if audio_alignment.compared == 0 { + 0.0 + } else { + audio_alignment.match_ratio + }; + if let Some(threshold) = thresholds.audio_match_ratio { + if audio_match_ratio < threshold { + failures.push(format!( + "audio match ratio {:.3} below threshold {:.3}", + audio_match_ratio, threshold + )); + audio_mismatch = true; + } + } + if audio_alignment.offset.abs() > max_audio_drift { + failures.push(format!( + "audio drift {} exceeds max {}", + audio_alignment.offset, max_audio_drift + )); + audio_mismatch = true; + } + Some(HashComparisonReport { + matched: audio_alignment.matched, + compared: audio_alignment.compared, + match_ratio: audio_match_ratio, + threshold: thresholds.audio_match_ratio.unwrap_or(0.0), + offset: audio_alignment.offset, + length_delta: audio_length_delta, + reference_total: reference_audio.len(), + capture_total: capture_audio.len(), + }) + } + (None, None) => None, + _ => { + if require_audio { + failures.push("audio hashes missing on one side".to_string()); + triage_categories.push(TriageCategory::AudioMissing); + audio_missing = true; + } + None + } + }; + + let status = if failures.is_empty() { + ValidationStatus::Passed + } else { + ValidationStatus::Failed + }; + + if config_mismatch { + triage_categories.push(TriageCategory::ConfigMismatch); + } + if reference_coverage { + triage_categories.push(TriageCategory::ReferenceCoverage); + } + if frame_mismatch { + triage_categories.push(TriageCategory::FrameMismatch); + } + if audio_mismatch { + triage_categories.push(TriageCategory::AudioMismatch); + } + if audio_missing && !triage_categories.contains(&TriageCategory::AudioMissing) { + triage_categories.push(TriageCategory::AudioMissing); + } + if triage_categories.is_empty() && status == ValidationStatus::Passed { + triage_categories.push(TriageCategory::Pass); + } + if triage_categories.is_empty() { + triage_categories.push(TriageCategory::Unknown); + } + + let mut suggestions = Vec::new(); + if triage_categories.contains(&TriageCategory::ConfigMismatch) { + suggestions.push( + "normalize capture to the reference profile or update video metadata".to_string(), + ); + } + if triage_categories.contains(&TriageCategory::ReferenceCoverage) { + suggestions.push( + "regenerate reference hashes or adjust timeline coverage to match available frames" + .to_string(), + ); + } + if triage_categories.contains(&TriageCategory::FrameMismatch) { + suggestions.push( + "inspect frame hashes near the reported drift offset for deterministic mismatches" + .to_string(), + ); + } + if triage_categories.contains(&TriageCategory::AudioMismatch) { + suggestions.push( + "compare audio hashes near the reported chunk offset and verify extraction settings" + .to_string(), + ); + } + if triage_categories.contains(&TriageCategory::AudioMissing) { + suggestions.push( + "generate audio hashes for both reference and capture or set validation.require_audio = false" + .to_string(), + ); + } + + let drift = DriftSummary { + frame_offset: alignment.offset, + frame_offset_seconds: alignment.offset as f64 / reference.video.fps as f64, + length_delta_frames: length_delta, + audio_offset_chunks: audio_report.as_ref().map(|report| report.offset), + audio_length_delta_chunks: audio_report.as_ref().map(|report| report.length_delta), + }; + + let frame_report = HashComparisonReport { + matched: alignment.matched, + compared: alignment.compared, + match_ratio: frame_match_ratio, + threshold: thresholds.frame_match_ratio, + offset: alignment.offset, + length_delta, + reference_total: ref_slice.len(), + capture_total: capture_frames.len(), + }; + + let validation_schema_version = validation_override + .as_ref() + .and_then(|cfg| cfg.schema_version.clone()) + .or_else(|| reference.schema_version.clone()); + + Ok(VideoValidationReport { + status, + validation_config: ValidationConfigSummary { + schema_version: validation_schema_version, + name: merged_validation.name, + notes: merged_validation.notes, + require_audio, + thresholds, + }, + normalization: reference.normalization.clone(), + triage: TriageSummary { + status, + categories: triage_categories, + findings: failures.clone(), + suggestions, + }, + reference: VideoRunSummary { + path: reference.video.path.display().to_string(), + width: reference.video.width, + height: reference.video.height, + fps: reference.video.fps, + frame_hashes: ref_frames.len(), + audio_hashes: ref_audio.as_ref().map(|items| items.len()), + }, + capture: VideoRunSummary { + path: capture.video.path.display().to_string(), + width: capture.video.width, + height: capture.video.height, + fps: capture.video.fps, + frame_hashes: capture_frames.len(), + audio_hashes: capture_audio.as_ref().map(|items| items.len()), + }, + timeline: TimelineSummary { + start: reference.timeline.start, + end: reference.timeline.end, + start_frame: timeline_start, + end_frame: clamped_end, + events: reference.timeline.events.clone(), + }, + frame_comparison: frame_report, + audio_comparison: audio_report, + drift, + failures, + }) +} + +pub fn hash_frames_dir(path: &Path) -> Result, String> { + load_dir_hashes(path) +} + +pub fn hash_audio_file(path: &Path) -> Result, String> { + load_file_hashes(path) +} + +pub fn write_hash_list(path: &Path, hashes: &[String]) -> Result<(), String> { + if hashes.is_empty() { + return Err("hash list is empty".to_string()); + } + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .map_err(|err| format!("create hash list dir {}: {err}", parent.display()))?; + } + let mut output = String::new(); + for hash in hashes { + output.push_str(hash); + output.push('\n'); + } + fs::write(path, output).map_err(|err| format!("write hash list {}: {err}", path.display())) +} + +fn best_alignment(reference: &[String], capture: &[String], max_offset: i32) -> Alignment { + let mut best = Alignment { + offset: 0, + compared: 0, + matched: 0, + match_ratio: 0.0, + }; + + for offset in -max_offset..=max_offset { + let mut matched = 0; + let mut compared = 0; + for (idx, reference_hash) in reference.iter().enumerate() { + let capture_idx = idx as i32 + offset; + if capture_idx < 0 || capture_idx >= capture.len() as i32 { + continue; + } + compared += 1; + if reference_hash == &capture[capture_idx as usize] { + matched += 1; + } + } + if compared == 0 { + continue; + } + let ratio = matched as f32 / compared as f32; + let ordering = ratio + .partial_cmp(&best.match_ratio) + .unwrap_or(Ordering::Less); + let better = match ordering { + Ordering::Greater => true, + Ordering::Equal => { + if compared > best.compared { + true + } else { + let offset_abs = offset.abs(); + let best_abs = best.offset.abs(); + compared == best.compared && offset_abs < best_abs + } + } + Ordering::Less => false, + }; + if better { + best = Alignment { + offset, + compared, + matched, + match_ratio: ratio, + }; + } + } + + best +} + +fn load_hashes( + source: &HashSource, + base_dir: &Path, + role: HashRole, +) -> Result, String> { + let resolved = resolve_path(base_dir, &source.path); + match source.format { + HashFormat::List => load_hash_list(&resolved), + HashFormat::Directory => match role { + HashRole::Frames => load_dir_hashes(&resolved), + HashRole::Audio => Err("audio hashes do not support directory format".to_string()), + }, + HashFormat::File => match role { + HashRole::Audio => load_file_hashes(&resolved), + HashRole::Frames => Err("frame hashes do not support file format".to_string()), + }, + } +} + +fn resolve_path(base_dir: &Path, path: &Path) -> PathBuf { + if path.is_absolute() { + path.to_path_buf() + } else { + base_dir.join(path) + } +} + +fn load_validation_config(path: &Path) -> Result { + let content = fs::read_to_string(path) + .map_err(|err| format!("read validation config {}: {err}", path.display()))?; + toml::from_str(&content).map_err(|err| format!("invalid validation config: {err}")) +} + +fn load_hash_list(path: &Path) -> Result, String> { + let content = fs::read_to_string(path) + .map_err(|err| format!("read hash list {}: {err}", path.display()))?; + let mut hashes = Vec::new(); + for (line_num, line) in content.lines().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') { + continue; + } + let parts: Vec<&str> = trimmed.split_whitespace().collect(); + let hash = match parts.len() { + 1 => parts[0], + 2 => parts[1], + _ => { + return Err(format!( + "invalid hash list entry at {}:{}", + path.display(), + line_num + 1 + )) + } + }; + hashes.push(hash.to_string()); + } + if hashes.is_empty() { + return Err(format!("hash list {} is empty", path.display())); + } + Ok(hashes) +} + +fn load_dir_hashes(path: &Path) -> Result, String> { + let mut entries: Vec = fs::read_dir(path) + .map_err(|err| format!("read hash dir {}: {err}", path.display()))? + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|entry| entry.is_file()) + .collect(); + entries.sort(); + if entries.is_empty() { + return Err(format!("hash dir {} is empty", path.display())); + } + let mut hashes = Vec::new(); + for entry in entries { + let bytes = + fs::read(&entry).map_err(|err| format!("read hash file {}: {err}", entry.display()))?; + hashes.push(sha256_bytes(&bytes)); + } + Ok(hashes) +} + +fn load_file_hashes(path: &Path) -> Result, String> { + let bytes = + fs::read(path).map_err(|err| format!("read hash file {}: {err}", path.display()))?; + if bytes.is_empty() { + return Err(format!("hash file {} is empty", path.display())); + } + let mut hashes = Vec::new(); + for chunk in bytes.chunks(AUDIO_CHUNK_BYTES) { + hashes.push(sha256_bytes(chunk)); + } + Ok(hashes) +} + +fn sha256_bytes(bytes: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(bytes); + let digest = hasher.finalize(); + format!("{:x}", digest) +} diff --git a/crates/recomp-validation/tests/video_validation.rs b/crates/recomp-validation/tests/video_validation.rs new file mode 100644 index 0000000..076c26c --- /dev/null +++ b/crates/recomp-validation/tests/video_validation.rs @@ -0,0 +1,257 @@ +use recomp_validation::{ + hash_audio_file, hash_frames_dir, run_video_validation, run_video_validation_with_config, + write_hash_list, Timecode, ValidationStatus, +}; +use sha2::{Digest, Sha256}; +use std::fs; + +#[test] +fn timecode_parses_hms_and_seconds() { + let tc = Timecode::parse("01:02:03.500").expect("parse hms"); + assert!((tc.seconds - 3723.5).abs() < 0.001); + let tc = Timecode::parse("90.25").expect("parse seconds"); + assert!((tc.seconds - 90.25).abs() < 0.001); +} + +#[test] +fn video_validation_passes_with_offset() { + let temp = tempfile::tempdir().expect("tempdir"); + let ref_frames = vec![ + "a".to_string(), + "b".to_string(), + "c".to_string(), + "d".to_string(), + "e".to_string(), + ]; + let capture_frames = vec![ + "x".to_string(), + "a".to_string(), + "b".to_string(), + "c".to_string(), + "d".to_string(), + "e".to_string(), + ]; + + let ref_hash_path = temp.path().join("reference_frames.txt"); + let capture_hash_path = temp.path().join("capture_frames.txt"); + write_hash_list(&ref_hash_path, &ref_frames).expect("write ref hashes"); + write_hash_list(&capture_hash_path, &capture_frames).expect("write capture hashes"); + + let reference_toml = format!( + r#"[video] +path = "reference.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[timeline] +start = "00:00:00.000" +end = "00:00:00.167" + +[hashes.frames] +format = "list" +path = "{}" + +[thresholds] +frame_match_ratio = 0.99 +max_drift_frames = 1 +max_dropped_frames = 1 +"#, + ref_hash_path.display() + ); + let capture_toml = format!( + r#"[video] +path = "capture.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[hashes.frames] +format = "list" +path = "{}" +"#, + capture_hash_path.display() + ); + + let reference_path = temp.path().join("reference_video.toml"); + let capture_path = temp.path().join("capture_video.toml"); + fs::write(&reference_path, reference_toml).expect("write reference config"); + fs::write(&capture_path, capture_toml).expect("write capture config"); + + let report = run_video_validation(&reference_path, &capture_path).expect("run validation"); + assert_eq!(report.status, ValidationStatus::Passed); + assert_eq!(report.frame_comparison.matched, 5); + assert_eq!(report.frame_comparison.offset, 1); + assert_eq!(report.drift.frame_offset, 1); +} + +#[test] +fn video_validation_fails_on_low_match_ratio() { + let temp = tempfile::tempdir().expect("tempdir"); + let ref_frames = vec!["a".to_string(), "b".to_string(), "c".to_string()]; + let capture_frames = vec!["a".to_string(), "x".to_string(), "y".to_string()]; + + let ref_hash_path = temp.path().join("reference_frames.txt"); + let capture_hash_path = temp.path().join("capture_frames.txt"); + write_hash_list(&ref_hash_path, &ref_frames).expect("write ref hashes"); + write_hash_list(&capture_hash_path, &capture_frames).expect("write capture hashes"); + + let reference_toml = format!( + r#"[video] +path = "reference.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[timeline] +start = "0" +end = "0.100" + +[hashes.frames] +format = "list" +path = "{}" + +[thresholds] +frame_match_ratio = 0.9 +max_drift_frames = 0 +max_dropped_frames = 0 +"#, + ref_hash_path.display() + ); + let capture_toml = format!( + r#"[video] +path = "capture.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[hashes.frames] +format = "list" +path = "{}" +"#, + capture_hash_path.display() + ); + + let reference_path = temp.path().join("reference_video.toml"); + let capture_path = temp.path().join("capture_video.toml"); + fs::write(&reference_path, reference_toml).expect("write reference config"); + fs::write(&capture_path, capture_toml).expect("write capture config"); + + let report = run_video_validation(&reference_path, &capture_path).expect("run validation"); + assert_eq!(report.status, ValidationStatus::Failed); + assert!(report + .failures + .iter() + .any(|failure| failure.contains("frame match ratio"))); +} + +#[test] +fn hash_generation_matches_normalized_outputs() { + let temp = tempfile::tempdir().expect("tempdir"); + let frames_dir = temp.path().join("frames"); + fs::create_dir_all(&frames_dir).expect("create frames dir"); + let frame_a = frames_dir.join("00000001.png"); + let frame_b = frames_dir.join("00000002.png"); + fs::write(&frame_a, b"frame-one").expect("write frame a"); + fs::write(&frame_b, b"frame-two").expect("write frame b"); + + let frame_hashes = hash_frames_dir(&frames_dir).expect("hash frames"); + let expected_frames = vec![sha256_bytes(b"frame-one"), sha256_bytes(b"frame-two")]; + assert_eq!(frame_hashes, expected_frames); + + let audio_path = temp.path().join("audio.wav"); + let mut first = vec![0u8; 4096]; + first[0] = 1; + let second = vec![2u8; 4096]; + let mut audio = Vec::new(); + audio.extend_from_slice(&first); + audio.extend_from_slice(&second); + fs::write(&audio_path, &audio).expect("write audio"); + + let audio_hashes = hash_audio_file(&audio_path).expect("hash audio"); + let expected_audio = vec![sha256_bytes(&first), sha256_bytes(&second)]; + assert_eq!(audio_hashes, expected_audio); +} + +#[test] +fn validation_override_config_applies_thresholds() { + let temp = tempfile::tempdir().expect("tempdir"); + let ref_frames = vec!["a".to_string(), "b".to_string(), "c".to_string()]; + let capture_frames = vec!["a".to_string(), "x".to_string(), "y".to_string()]; + + let ref_hash_path = temp.path().join("reference_frames.txt"); + let capture_hash_path = temp.path().join("capture_frames.txt"); + write_hash_list(&ref_hash_path, &ref_frames).expect("write ref hashes"); + write_hash_list(&capture_hash_path, &capture_frames).expect("write capture hashes"); + + let reference_toml = format!( + r#"[video] +path = "reference.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[timeline] +start = "0" +end = "0.100" + +[hashes.frames] +format = "list" +path = "{}" + +[thresholds] +frame_match_ratio = 0.95 +max_drift_frames = 0 +max_dropped_frames = 0 +"#, + ref_hash_path.display() + ); + let capture_toml = format!( + r#"[video] +path = "capture.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[hashes.frames] +format = "list" +path = "{}" +"#, + capture_hash_path.display() + ); + let validation_toml = r#"schema_version = "1" +name = "override" +notes = "Relax thresholds" +require_audio = false + +[thresholds] +frame_match_ratio = 0.0 +max_drift_frames = 1 +max_dropped_frames = 2 +"#; + + let reference_path = temp.path().join("reference_video.toml"); + let capture_path = temp.path().join("capture_video.toml"); + let validation_path = temp.path().join("validation_config.toml"); + fs::write(&reference_path, reference_toml).expect("write reference config"); + fs::write(&capture_path, capture_toml).expect("write capture config"); + fs::write(&validation_path, validation_toml).expect("write validation config"); + + let report = + run_video_validation_with_config(&reference_path, &capture_path, Some(&validation_path)) + .expect("run validation"); + assert_eq!(report.status, ValidationStatus::Passed); + assert_eq!( + report.validation_config.schema_version.as_deref(), + Some("1") + ); + assert_eq!(report.validation_config.name.as_deref(), Some("override")); + assert!((report.validation_config.thresholds.frame_match_ratio - 0.0).abs() < 0.0001); +} + +fn sha256_bytes(bytes: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(bytes); + let digest = hasher.finalize(); + format!("{:x}", digest) +} diff --git a/docs/automated-recomp-architecture-plan.md b/docs/automated-recomp-architecture-plan.md new file mode 100644 index 0000000..f4ee4c9 --- /dev/null +++ b/docs/automated-recomp-architecture-plan.md @@ -0,0 +1,93 @@ +# Automated Recompilation Architecture Plan + +## Status +Draft v0.1 + +## Goals +- Provide a concrete, end-to-end architecture for fully automated static recompilation. +- Define a hybrid local plus AWS deployment that keeps inputs and outputs cleanly separated. +- Specify an agent-managed pipeline using the GPT-5.2-Codex API via the OpenAI Responses API. +- Make security and provenance a first-class concern across the pipeline. + +## Scope +- Config-driven recompilation of non-proprietary inputs as defined by existing specs. +- Local developer runs and AWS-backed scale-out runs. +- Automation, observability, and auditability for the full pipeline lifecycle. + +## Non-Goals +- Running or storing proprietary game assets. +- Replacing existing spec-level definitions for formats or runtime ABI. +- Defining UI experiences beyond minimal operator dashboards. + +## Architecture Overview + +### Local Stack +- Recomp Orchestrator (local): CLI and daemon that accepts run requests and manages the pipeline. +- Local Artifact Store: content-addressed cache for inputs, intermediate artifacts, and outputs. +- Local Execution Pool: sandboxed workers for parsing, analysis, and codegen steps. +- Local Validation Harness: deterministic replays and output validation on local hardware. + +### AWS Stack +- Run Control Plane: API layer for submission, status, and metadata. +- Orchestration Service: AWS Step Functions for stateful pipelines and retries. +- Job Queue: SQS for work item fanout to workers. +- Compute Pool: ECS or Batch for stateless workers (CPU/GPU tiers). +- Artifact Store: S3 with immutable object versioning and lifecycle policies. +- Metadata Store: DynamoDB or Postgres for run state, provenance, and indexing. +- Model Gateway: service that brokers access to GPT-5.2-Codex via the Responses API. +- Validation Farm: managed runners that execute deterministic replays and compare outputs. + +## Core Services and Responsibilities +- Run Control Plane: authenticate requests, enforce policy, and emit run events. +- Orchestrator: define stages, retries, and dependency ordering for each run. +- Artifact Store: store all immutable inputs and outputs with content hashes. +- Metadata Store: track run status, provenance, and artifact lineage. +- Execution Workers: perform deterministic transforms using the pipeline specs. +- Model Gateway: normalize prompts, enforce redaction, and apply model routing rules. +- Validation Harness: execute deterministic checks and write validation reports. + +## Data Flow (Hybrid) +1. Intake: local or cloud intake validates inputs and creates a run request. +2. Normalize: inputs are normalized, hashed, and written to the Artifact Store. +3. Plan: the agent planner generates a run plan using GPT-5.2-Codex. +4. Execute: workers process plan stages and emit artifacts and logs. +5. Validate: validation runners compare outputs to reference baselines. +6. Package: build outputs are packaged with manifests and integrity reports. +7. Publish: outputs are stored in the Artifact Store and indexed in Metadata. + +## Security and Compliance +- Classify inputs and outputs by provenance and sensitivity. +- Enforce least privilege IAM roles for each service and worker tier. +- Store secrets in AWS Secrets Manager and local equivalents. +- Encrypt data at rest and in transit with KMS-managed keys. +- Maintain immutable audit logs for all run requests and model prompts. +- Enforce redaction rules before any model request leaves the environment. + +## Agent-Managed Pipeline Using GPT-5.2-Codex +- Use the OpenAI Responses API as the sole model interface for GPT-5.2-Codex. +- Use structured responses with explicit schemas for plans, diffs, and decisions. +- Apply model routing rules that can fall back to GPT-5.2 if GPT-5.2-Codex is unavailable. +- Capture prompts, responses, and model metadata in the audit log. +- Provide tool access only through the Model Gateway to enforce policy. + +## Automation and Operations +- Local runs: CLI triggers a local orchestrator workflow with deterministic stages. +- Cloud runs: EventBridge schedules and manual triggers submit runs to the Control Plane. +- Retry policy: bounded retries with exponential backoff and circuit breakers. +- Approval gates: optional human approval for high-cost or high-risk stages. +- Rollbacks: failed stages retain artifacts and logs for replay. + +## Observability +- Structured logs for each stage with run-id correlation. +- Metrics for queue depth, worker utilization, and validation pass rates. +- Traces for end-to-end run latency across services. + +## Rollout Phases +- Phase 1: local-only orchestration and agent planning. +- Phase 2: hybrid runs with shared Artifact Store and cloud validation. +- Phase 3: full AWS orchestration with auto-scaling execution pools. + +## Open Questions +- Do we need a dedicated schema registry for agent outputs? +- Which stages should be allowed to run without human approval? +- What is the minimum local hardware profile for deterministic validation? diff --git a/docs/automation-loop.md b/docs/automation-loop.md new file mode 100644 index 0000000..56dc9ba --- /dev/null +++ b/docs/automation-loop.md @@ -0,0 +1,53 @@ +# Automated Recompilation Loop + +This document describes the intended automation loop for static recompilation. The goal is a +single command that runs intake, build, capture, and validation without copying proprietary +assets into the repo. + +## Loop Overview +1. Validate provenance and input formats. +2. Intake (XCI or homebrew) and lift to `module.json`. +3. Build the emitted Rust project. +4. Run the rebuilt binary with deterministic runtime settings. +5. Capture video/audio output to an external artifact root. +6. Generate hashes and run validation. +7. Emit `run-manifest.json` and `validation-report.json`. + +## Core Inputs +- `automation.toml` (config schema implemented in `recomp automate`). +- `reference_video.toml` and `capture_video.toml`. +- `input_script.toml` for deterministic input replay. + +## Outputs +- Build artifacts under `out//`. +- Capture artifacts under `artifacts/<title>/capture/`. +- Validation artifacts under `artifacts/<title>/validation/`. +- `run-manifest.json` for per-step timing, hashes, and provenance. + +## Asset Separation +All assets (RomFS, reference video, capture output) remain outside the repo. Only hashes and +metadata should be committed. + +## Automation Config +`automation.toml` defines inputs, outputs, capture paths, and commands. Start from +`samples/automation.toml` and update the paths for your environment. Key sections: +- `schema_version` +- `[inputs]` mode (`homebrew`, `xci`, `lifted`), provenance, title config, and inputs. +- `[outputs]` work root and optional overrides for intake/lift/build dirs. +- `[reference]` reference/capture video config paths (plus optional validation config). +- `[capture]` capture video path and extracted frames/audio locations. +- `[commands]` build/run/capture/extract commands (plus optional lift command for XCI). +- `[run]` resume and lift settings (optional). + +Invoke the loop with: +```bash +recomp automate --config automation.toml +``` + +## DKCR Validation Inputs +The DKCR validation run requires external reference and capture artifacts. Track the required +paths and timecodes in `docs/dkcr-validation-prereqs.md` before wiring a DKCR-specific +`automation.toml`. + +## Next Steps +- Iterate on capture automation and tighten determinism for external tools. diff --git a/docs/dkcr-hd-boot-path.md b/docs/dkcr-hd-boot-path.md new file mode 100644 index 0000000..6950906 --- /dev/null +++ b/docs/dkcr-hd-boot-path.md @@ -0,0 +1,51 @@ +# DKCR HD Boot Path (Scaffold) + +This document sketches a first-level boot path using the new runtime stubs. The goal is to +capture the minimal service, graphics, audio, and input wiring needed to reach the first +playable level without bundling proprietary assets. + +## Boot Flow Summary +- Mount external RomFS at `game-data/dkcr-hd/romfs`. +- Initialize stub services (SM, FS, VI, HID, audio) for early boot. +- Submit placeholder graphics commands and present frames. +- Submit placeholder audio buffers. +- Queue deterministic input frames. + +## Runtime Stub Shape +The runtime exposes a small boot scaffold that records steps and uses stub backends for +services, graphics, audio, and input. + +```rust +use recomp_runtime::{ + AudioBuffer, BootContext, BootPlan, FrameDescriptor, InputEvent, InputFrame, + ServiceCallSpec, ServiceStubSpec, StubBehavior, +}; +use recomp_runtime::{CommandStream}; + +let mut boot = BootContext::new("DKCR HD Sample") + .with_assets_root("game-data/dkcr-hd/romfs"); + +boot.register_service_stubs(&[ + ServiceStubSpec::new("svc_sm", StubBehavior::Log), + ServiceStubSpec::new("svc_fs", StubBehavior::Log), + ServiceStubSpec::new("svc_vi", StubBehavior::Log), + ServiceStubSpec::new("svc_hid", StubBehavior::Log), + ServiceStubSpec::new("svc_audout", StubBehavior::Log), +]); + +let plan = BootPlan::new() + .service_call(ServiceCallSpec::new("svc_sm", vec![])) + .service_call(ServiceCallSpec::new("svc_fs", vec![])) + .gfx_stream(CommandStream::new(vec![0xdead_beef])) + .present(FrameDescriptor::new(1, 1280, 720)) + .audio(AudioBuffer::new(256, 2, 48_000)) + .input(InputFrame::new(0, vec![InputEvent { time: 0, code: 1, value: 1 }])); + +let trace = boot.run_plan(&plan).expect("boot plan"); +println!("boot steps: {}", trace.steps().len()); +``` + +## Notes +- `samples/dkcr-hd/title.toml` contains stub mappings and the RomFS path. +- `samples/dkcr-hd/patches/first-level.toml` records placeholder patches for the first level. +- Replace stub service calls with real implementations as the pipeline matures. diff --git a/docs/dkcr-hd-runbook.md b/docs/dkcr-hd-runbook.md new file mode 100644 index 0000000..cb31542 --- /dev/null +++ b/docs/dkcr-hd-runbook.md @@ -0,0 +1,64 @@ +# DKCR HD macOS/aarch64 Runbook (Scaffold) + +This runbook documents a reproducible build and run loop for the SPEC-200 scaffold on +macOS/aarch64. It uses placeholder inputs and does not bundle any retail assets. + +## Prerequisites +- macOS on Apple Silicon (aarch64). +- Rust toolchain installed via `rustup`. +- Optional: Nix + devenv for the repo dev shell. + +## Build and Run +1. Enter the dev shell (optional). + +``` +nix develop --impure +``` + +2. Run the pipeline for the DKCR HD sample. + +``` +cargo run -p recomp-cli -- run \ + --module samples/dkcr-hd/module.json \ + --config samples/dkcr-hd/title.toml \ + --provenance samples/dkcr-hd/provenance.toml \ + --out-dir out/dkcr-hd +``` + +3. Build the emitted project. + +``` +cargo build --manifest-path out/dkcr-hd/Cargo.toml +``` + +4. Run the emitted binary. + +``` +cargo run --manifest-path out/dkcr-hd/Cargo.toml +``` + +5. Capture a validation run and compare against the reference timeline. + +``` +scripts/capture-video-macos.sh artifacts/dkcr-hd +ffmpeg -i artifacts/dkcr-hd/capture.mp4 artifacts/dkcr-hd/frames/%08d.png +ffmpeg -i artifacts/dkcr-hd/capture.mp4 -vn -acodec pcm_s16le artifacts/dkcr-hd/audio.wav + +recomp-validation hash-frames --frames-dir artifacts/dkcr-hd/frames --out artifacts/dkcr-hd/frames.hashes +recomp-validation hash-audio --audio-file artifacts/dkcr-hd/audio.wav --out artifacts/dkcr-hd/audio.hashes + +cp samples/capture_video.toml artifacts/dkcr-hd/capture.toml +# Edit artifacts/dkcr-hd/capture.toml to point at the capture hashes above. + +recomp-validation video \ + --reference samples/reference_video.toml \ + --capture artifacts/dkcr-hd/capture.toml \ + --out-dir artifacts/dkcr-hd/validation +``` + +Note: The automated validation loop is paused until SPEC-210/220/230/240 are implemented. + +## External Assets +- RomFS assets are expected at `game-data/dkcr-hd/romfs`. +- Replace placeholder inputs under `samples/dkcr-hd/inputs/` with real artifacts in a + private workspace before attempting full DKCR HD validation. diff --git a/docs/dkcr-validation-prereqs.md b/docs/dkcr-validation-prereqs.md new file mode 100644 index 0000000..f71f5ec --- /dev/null +++ b/docs/dkcr-validation-prereqs.md @@ -0,0 +1,31 @@ +# DKCR Validation Prerequisites + +This document captures the external inputs required to run DKCR HD video validation. These +artifacts are not stored in the repo and must be supplied locally for each run. + +## Required Inputs +- Absolute path to the reference video file, or absolute paths to precomputed reference frame + and audio hash lists. +- Absolute path to the capture video file, or absolute paths to precomputed capture frame + and audio hash lists. +- Confirmed first-level start and end timecodes for the reference timeline. + +## Hash List Paths (If Precomputed) +- Reference frames hash list path (absolute). +- Reference audio hash list path (absolute). +- Capture frames hash list path (absolute). +- Capture audio hash list path (absolute). + +## Timeline Confirmation +Provide the exact first-level start and end timecodes in HH:MM:SS.mmm or seconds format. If the +existing timeline in `samples/reference_video.toml` is correct, explicitly confirm it. + +## Optional Inputs +- Input replay script path (absolute) if you want deterministic input playback. +- Capture device settings (resolution, fps) used during recording. + +## Once Provided +- Update `samples/reference_video.toml` with the absolute reference video path and timeline. +- Update `samples/capture_video.toml` with the absolute capture video path. +- Run the validation command described in `docs/validation-video.md`. +- Review `validation-report.json` and capture any triage notes. diff --git a/docs/input-replay.md b/docs/input-replay.md new file mode 100644 index 0000000..d601032 --- /dev/null +++ b/docs/input-replay.md @@ -0,0 +1,79 @@ +# Input Replay Notes + +Input replay is required to align validation runs with a reference video that includes +player interactions. This document summarizes the expected workflow and artifacts. + +## Workflow +1. Author or record an `input_script.toml`. +2. Run the rebuilt binary with the input replay enabled. +3. Capture video/audio and validate against the reference timeline. + +## Input Script Schema +`input_script.toml` is a versioned, deterministic script describing input events and alignment markers. +All timestamps are relative to replay start (time zero). + +Top-level fields: +- `schema_version` (string, currently `"1"`). +- `[metadata]` (required). +- `[[events]]` (required, ordered list; order is preserved for same timestamp). +- `[[markers]]` (optional, ordered list). + +`[metadata]` fields: +- `title` (string, required). +- `controller` (string, required; descriptive profile name). +- `timing_mode` (string, required; `"ms"` or `"frames"`). +- `recorded_at` (string, optional; ISO 8601). +- `notes` (string, optional). + +`[[events]]` fields: +- `time_ms` (u64, required when `timing_mode = "ms"`). +- `frame` (u64, required when `timing_mode = "frames"`). +- `control` (u32, required; runtime input code). +- `value` (i32, required; button/axis value). +- `note` (string, optional). + +`[[markers]]` fields: +- `name` (string, required; unique). +- `time_ms` (u64, required when `timing_mode = "ms"`). +- `frame` (u64, required when `timing_mode = "frames"`). +- `note` (string, optional). + +Example: +```toml +schema_version = "1" + +[metadata] +title = "Sample Replay" +controller = "pro_controller" +timing_mode = "ms" + +[[events]] +time_ms = 0 +control = 100 +value = 1 + +[[markers]] +name = "boot" +time_ms = 0 +``` + +Parser rules: +- `schema_version` must match the runtime's supported version. +- `metadata` must include `title`, `controller`, and `timing_mode`. +- `events` must be non-empty and use the time field for the selected `timing_mode`. +- `markers` must have unique names and use the same timing base as events. + +## Playback Integration +- Load and validate the script before boot. +- Build a deterministic playback queue and feed the runtime input backend as time advances. +- For identical timestamps, playback preserves script order. +- Marker ordering is stable for identical timestamps. + +## Alignment Tips +- Keep a deterministic start point (boot marker). +- Align the first interaction with a visible cue in the reference video. +- Use markers to resync at key events. + +## Notes +- Inputs remain external; only hashes and metadata are stored in the repo. +- Deterministic replay is required for stable validation. diff --git a/docs/reference-media.md b/docs/reference-media.md new file mode 100644 index 0000000..be7fe20 --- /dev/null +++ b/docs/reference-media.md @@ -0,0 +1,38 @@ +# Reference Media Normalization + +Reference videos may come from different sources and formats. Normalization ensures comparisons +are stable and predictable. + +## Canonical Profile +- Resolution: 1280x720 +- Frame rate: 30 fps, constant (CFR) +- Audio: 48 kHz PCM, 2 channels + +## Normalization Workflow +1. Transcode the source to the canonical profile. +2. Extract frames and audio from the normalized output. +3. Generate frame and audio hash lists. +4. Record normalization metadata and hashes in `reference_video.toml`. + +## Scripted Pipeline +`scripts/normalize-reference-video.sh` runs the full workflow. + +```bash +scripts/normalize-reference-video.sh /path/to/source.mov artifacts/reference +``` + +Outputs: +- `artifacts/reference/reference-normalized.mp4` +- `artifacts/reference/frames/` (PNG frames) +- `artifacts/reference/audio.wav` +- `artifacts/reference/frames.hashes` +- `artifacts/reference/audio.hashes` + +## Storage Policy +Reference media stays outside the repo. Only hashes and metadata are tracked. + +## Notes +If the source is variable frame rate, normalize to constant fps before hashing. +Record the normalization profile and source path in `[normalization]` within +`reference_video.toml`. +For DKCR-specific validation inputs, see `docs/dkcr-validation-prereqs.md`. diff --git a/docs/validation-baseline.md b/docs/validation-baseline.md index c51a4a0..5cbbb28 100644 --- a/docs/validation-baseline.md +++ b/docs/validation-baseline.md @@ -6,6 +6,9 @@ This document defines the baseline validation suite and thresholds for correctne - `runtime_config_defaults`: Runtime config defaults to handheld mode. - `pipeline_minimal_sample`: Minimal sample pipeline emits expected artifacts and detects inputs. +## Video Validation +Video-based validation is a separate workflow that compares reference and capture hashes. See `docs/validation-video.md` for configuration, hashing, and report details. + ## Thresholds - All baseline cases must pass (0 failures). - Reports must be generated on every run (JSON + text). diff --git a/docs/validation-video.md b/docs/validation-video.md new file mode 100644 index 0000000..da869d4 --- /dev/null +++ b/docs/validation-video.md @@ -0,0 +1,96 @@ +# Video Validation Workflow + +This workflow compares a normalized reference gameplay video against a captured run using +deterministic hash lists. The comparison is coarse, intended to detect large visual or audio +regressions. + +## Inputs +- `reference_video.toml`: reference metadata, timeline, hashes, and validation config. +- `capture_video.toml`: capture metadata and hash sources. +- Frame hash inputs: + - A list file (`format = "list"`) with one hash per line, in frame order. + - A directory (`format = "directory"`) of frame images hashed in filename order. +- Audio hash inputs: + - A list file (`format = "list"`) with one hash per chunk. + - A raw file (`format = "file"`) hashed in fixed chunks (4096 bytes). + +## Reference Config +Use `samples/reference_video.toml` as a template. Capture configs are similar but only need +`[video]` and `[hashes]`. A starter capture template lives at `samples/capture_video.toml`. +Optional overrides can live in `validation_config.toml` (see `samples/validation_config.toml`) +and be passed with `--validation-config`. +`reference_video.toml` now supports: +- `schema_version`: config schema version string. +- `[normalization]`: source and profile metadata for the normalized reference. +- `[validation]`: optional name, notes, thresholds, and `require_audio` for the comparison. +See `docs/reference-media.md` for the normalization flow. + +## DKCR Prerequisites +Before running DKCR validation, gather the external artifacts listed in +`docs/dkcr-validation-prereqs.md`. The reference and capture paths must be absolute, and the +first-level start/end timecodes must be confirmed. + +## Hash Generation +Generate hash lists from deterministic inputs: + +```bash +recomp-validation hash-frames --frames-dir artifacts/frames --out artifacts/frames.hashes +recomp-validation hash-audio --audio-file artifacts/audio.wav --out artifacts/audio.hashes +``` + +If you already have precomputed hashes, point `hashes.frames` or `hashes.audio` at the list +files directly. + +## Capture (macOS) +Use `scripts/capture-video-macos.sh` to record a run. Set the device indices to match your capture +setup (use `ffmpeg -f avfoundation -list_devices true -i \"\"` to enumerate devices). + +```bash +scripts/capture-video-macos.sh artifacts/capture +``` + +Extract frames and audio from the capture before hashing: + +```bash +ffmpeg -i artifacts/capture/capture.mp4 artifacts/capture/frames/%08d.png +ffmpeg -i artifacts/capture/capture.mp4 -vn -acodec pcm_s16le artifacts/capture/audio.wav +``` + +## Comparison +Run the comparison and emit `validation-report.json`: + +```bash +recomp-validation video \ + --reference reference_video.toml \ + --capture capture_video.toml \ + --validation-config validation_config.toml \ + --out-dir artifacts/validation +``` + +## Report Fields +The JSON report includes: +- `video.validation_config`: schema version, validation name, and thresholds. +- `video.normalization`: normalized source metadata (if provided). +- `video.triage`: categories, findings, and suggestions for follow-up. +- `video.status`: overall pass/fail. +- `video.frame_comparison`: matched/compared counts, match ratio, and frame offset. +- `video.audio_comparison`: audio match ratio and chunk drift (if provided). +- `video.drift`: frame and audio drift summary. +- `video.failures`: threshold violations. + +## Thresholds +Thresholds are configured in `reference_video.toml` under `[validation.thresholds]`. The +legacy top-level `[thresholds]` block is still accepted. Defaults are: +- `frame_match_ratio = 0.92` +- `audio_match_ratio = 0.90` +- `max_drift_frames = 3` +- `max_dropped_frames = 0` + +Tune thresholds per title and keep the drift window small to avoid false positives. + +## Manual Review +When validation fails: +- Inspect the frame hash lists near the reported drift offset. +- Compare audio hashes around the reported chunk offset. +- If a mismatch is expected (e.g., cutscene timing), record a note in the provenance metadata. +- Track follow-ups in the triage notes and update `validation.notes` if needed. diff --git a/docs/xci-intake.md b/docs/xci-intake.md new file mode 100644 index 0000000..e71e588 --- /dev/null +++ b/docs/xci-intake.md @@ -0,0 +1,103 @@ +# XCI Intake Workflow (Scaffold) + +This workflow ingests a user-supplied XCI and keyset, extracts ExeFS and NSO segments, and +emits RomFS assets into a separate output root. The current implementation includes a +mock extractor for non-proprietary tests and fixtures. Real-world extraction should be +wired to an external tool (e.g., hactool) in a private workspace. + +## Inputs +- XCI image (path to `.xci`). +- Keyset (path to `.keys` or `.keyset`). +- Provenance metadata listing both inputs with hashes. + +## CLI Usage +```bash +cargo run -p recomp-cli -- xci-intake \ + --xci path/to/title.xci \ + --keys path/to/title.keys \ + --provenance provenance.toml \ + --out-dir out/xci-intake \ + --assets-dir assets/xci-intake \ + --xci-tool auto +``` + +Optional program selection: +```bash +cargo run -p recomp-cli -- xci-intake \ + --xci path/to/title.xci \ + --keys path/to/title.keys \ + --provenance provenance.toml \ + --config title.toml \ + --out-dir out/xci-intake \ + --assets-dir assets/xci-intake \ + --xci-tool hactool +``` + +Tool selection: +- `--xci-tool auto` (default): use `hactoolnet` or `hactool` if found on `PATH`, else fall back to the mock extractor. +- `--xci-tool hactool` or `--xci-tool hactoolnet`: require the specified tool. +- `--xci-tool mock`: force the mock extractor even if tools are available. +- `--xci-tool-path /path/to/hactool`: override the tool executable location. + +Environment overrides: +- `RECOMP_XCI_TOOL=auto|hactool|hactoolnet|mock` +- `RECOMP_XCI_TOOL_PATH=/path/to/hactool` + +The XCI intake config recognizes these optional fields at the top level: +- `program_title_id` +- `program_version` +- `program_content_type` (defaults to `program`) + +## Provenance Requirements +The provenance file must list the XCI and keyset as inputs, for example: +```toml +[[inputs]] +path = "title.xci" +format = "xci" +sha256 = "..." +size = 123 +role = "retail_image" + +[[inputs]] +path = "title.keys" +format = "keyset" +sha256 = "..." +size = 456 +role = "decryption_keys" +``` + +## Outputs +- `out_dir/exefs/` contains extracted ExeFS files. +- `out_dir/segments/` contains decompressed NSO segments. +- `out_dir/module.json` and `out_dir/manifest.json` record hashes and metadata. +- `assets_dir/romfs/` contains extracted RomFS assets. + +## Mock Extractor +For tests and fixtures, the mock extractor expects a JSON payload in the `.xci` file: +```json +{ + "schema_version": "1", + "programs": [ + { + "title_id": "0100000000000000", + "content_type": "program", + "version": "1.0.0", + "nca": { "data_b64": "..." }, + "exefs": [ + { "name": "main", "data_b64": "..." } + ], + "nso": [ + { "name": "main", "data_b64": "..." } + ] + } + ], + "romfs": { "image_b64": "..." } +} +``` + +## Notes +- The implementation refuses to place assets inside `out_dir` or vice versa. +- Real extraction should run outside the repo and only copy non-proprietary metadata + into tracked files. +- External extraction uses `--outdir`, `--exefsdir`, and `--romfsdir` flags that are + compatible with recent `hactool`/`hactoolnet` builds; adjust tool paths if needed. diff --git a/samples/automation.toml b/samples/automation.toml new file mode 100644 index 0000000..f9ed3c1 --- /dev/null +++ b/samples/automation.toml @@ -0,0 +1,33 @@ +schema_version = "1" + +[inputs] +mode = "lifted" +module_json = "samples/minimal/module.json" +provenance = "samples/minimal/provenance.toml" +config = "samples/minimal/title.toml" +runtime_path = "crates/recomp-runtime" + +[outputs] +work_root = "out/automation-minimal" + +[reference] +reference_video_toml = "samples/reference_video.toml" +capture_video_toml = "samples/capture_video.toml" +validation_config_toml = "samples/validation_config.toml" + +[capture] +video_path = "artifacts/capture/capture.mp4" +frames_dir = "artifacts/capture/frames" +audio_file = "artifacts/capture/audio.wav" + +[commands] +build = ["cargo", "build", "--manifest-path", "out/automation-minimal/build/Cargo.toml"] +run = ["/path/to/recompiled/binary"] +capture = ["scripts/capture-video-macos.sh", "artifacts/capture"] +extract_frames = ["ffmpeg", "-i", "artifacts/capture/capture.mp4", "artifacts/capture/frames/%08d.png"] +extract_audio = ["ffmpeg", "-i", "artifacts/capture/capture.mp4", "-vn", "-acodec", "pcm_s16le", "artifacts/capture/audio.wav"] + +[run] +resume = true +lift_entry = "entry" +lift_mode = "decode" diff --git a/samples/capture_video.toml b/samples/capture_video.toml new file mode 100644 index 0000000..08ee3ff --- /dev/null +++ b/samples/capture_video.toml @@ -0,0 +1,15 @@ +schema_version = "1" + +[video] +path = "artifacts/capture/capture.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[hashes.frames] +format = "list" +path = "artifacts/capture/frames.hashes" + +[hashes.audio] +format = "list" +path = "artifacts/capture/audio.hashes" diff --git a/samples/dkcr-hd/README.md b/samples/dkcr-hd/README.md new file mode 100644 index 0000000..024d944 --- /dev/null +++ b/samples/dkcr-hd/README.md @@ -0,0 +1,15 @@ +# DKCR HD Sample (Scaffold) + +This sample is a non-proprietary scaffold for SPEC-200. It mirrors the intended DKCR HD boot path +without bundling any retail assets or keys. All inputs here are placeholders with minimal magic +bytes so provenance validation can run. + +## Files +- `module.json` is a minimal lifted module that invokes boot-related syscalls. +- `title.toml` records runtime, asset path, and stub mapping for the first-level milestone. +- `patches/first-level.toml` lists placeholder patches for a first-level boot path. +- `provenance.toml` tracks placeholder inputs (XCI, keyset, program NCA/ExeFS, NSO, NPDM). + +## Asset Policy +- RomFS assets are external and are expected at `game-data/dkcr-hd/romfs`. +- Replace placeholder inputs in `inputs/` with real artifacts in a private workspace. diff --git a/samples/dkcr-hd/inputs/dkcr-hd.xci b/samples/dkcr-hd/inputs/dkcr-hd.xci new file mode 100644 index 0000000..e5568b5 --- /dev/null +++ b/samples/dkcr-hd/inputs/dkcr-hd.xci @@ -0,0 +1 @@ +DUMMYXCI diff --git a/samples/dkcr-hd/inputs/exefs.pfs0 b/samples/dkcr-hd/inputs/exefs.pfs0 new file mode 100644 index 0000000..74d71ae --- /dev/null +++ b/samples/dkcr-hd/inputs/exefs.pfs0 @@ -0,0 +1 @@ +PFS0 diff --git a/samples/dkcr-hd/inputs/main.npdm b/samples/dkcr-hd/inputs/main.npdm new file mode 100644 index 0000000..b32f660 --- /dev/null +++ b/samples/dkcr-hd/inputs/main.npdm @@ -0,0 +1 @@ +NPDM diff --git a/samples/dkcr-hd/inputs/main.nso b/samples/dkcr-hd/inputs/main.nso new file mode 100644 index 0000000..e9afb92 --- /dev/null +++ b/samples/dkcr-hd/inputs/main.nso @@ -0,0 +1 @@ +NSO0 diff --git a/samples/dkcr-hd/inputs/program.nca b/samples/dkcr-hd/inputs/program.nca new file mode 100644 index 0000000..d247b1e --- /dev/null +++ b/samples/dkcr-hd/inputs/program.nca @@ -0,0 +1 @@ +NCA3 diff --git a/samples/dkcr-hd/inputs/title.keys b/samples/dkcr-hd/inputs/title.keys new file mode 100644 index 0000000..47eb9bd --- /dev/null +++ b/samples/dkcr-hd/inputs/title.keys @@ -0,0 +1 @@ +DUMMYKEYS diff --git a/samples/dkcr-hd/module.json b/samples/dkcr-hd/module.json new file mode 100644 index 0000000..d71b49e --- /dev/null +++ b/samples/dkcr-hd/module.json @@ -0,0 +1,17 @@ +{ + "arch": "aarch64", + "functions": [ + { + "name": "entry", + "ops": [ + { "op": "syscall", "name": "svc_boot", "args": [] }, + { "op": "syscall", "name": "svc_sm", "args": [] }, + { "op": "syscall", "name": "svc_fs", "args": [] }, + { "op": "syscall", "name": "svc_vi", "args": [] }, + { "op": "syscall", "name": "svc_hid", "args": [] }, + { "op": "syscall", "name": "svc_audout", "args": [] }, + { "op": "ret" } + ] + } + ] +} diff --git a/samples/dkcr-hd/patches/first-level.toml b/samples/dkcr-hd/patches/first-level.toml new file mode 100644 index 0000000..be1fbca --- /dev/null +++ b/samples/dkcr-hd/patches/first-level.toml @@ -0,0 +1,23 @@ +schema_version = "1" +patch_set = "first_level" +notes = "Placeholder patch set for DKCR HD first-level boot." + +[[patches]] +id = "force-performance-mode" +kind = "runtime_override" +target = "nn::oe::GetPerformanceMode" +value = "handheld" +notes = "Placeholder: force handheld mode for deterministic output." + +[[patches]] +id = "skip-boot-logo" +kind = "branch-nop" +target = "func:nnMain+0x0000_1000" +notes = "Placeholder: skip boot logo when enabled." + +[[patches]] +id = "stub-network" +kind = "service-stub" +target = "nn::nifm::IStaticService" +value = "stub" +notes = "Placeholder: stub network service to unblock first-level boot." diff --git a/samples/dkcr-hd/provenance.toml b/samples/dkcr-hd/provenance.toml new file mode 100644 index 0000000..e5648fa --- /dev/null +++ b/samples/dkcr-hd/provenance.toml @@ -0,0 +1,65 @@ +schema_version = "1" + +[title] +name = "DKCR HD Sample" +title_id = "0100000000000000" +version = "0.1.0" +region = "US" + +[collection] +device = "demo" +collected_at = "2026-02-03" +notes = "Scaffold provenance with placeholder inputs; proprietary assets remain external." + +[collection.tool] +name = "manual" +version = "1.0" + +[[inputs]] +path = "module.json" +format = "lifted_json" +sha256 = "0927f44712c04c3eb9b3c75e8678138c7253374862abf23e4346495a7e360f35" +size = 489 +role = "lifted_module" + +[[inputs]] +path = "inputs/dkcr-hd.xci" +format = "xci" +sha256 = "c68ddc13f5d1f0ed4cd866cd8c8fcba80ab974fe3e4edae55da586377ff4f47d" +size = 9 +role = "retail_image" + +[[inputs]] +path = "inputs/title.keys" +format = "keyset" +sha256 = "f17a692cd78aafa541c98e73d62f18199512ea39b0900339384842c1457bf9a9" +size = 10 +role = "decryption_keys" + +[[inputs]] +path = "inputs/program.nca" +format = "nca" +sha256 = "13b3ee74e75cd5ace4bd578c92ae3f4ee7ae37fc8ffe6101970617d4077045df" +size = 5 +role = "program_nca" + +[[inputs]] +path = "inputs/exefs.pfs0" +format = "exefs" +sha256 = "1cffc3ff1e9b8a5d9cfa30e10f644cb07addc491e278d55207797663763acd3f" +size = 5 +role = "exefs" + +[[inputs]] +path = "inputs/main.nso" +format = "nso0" +sha256 = "b06873320cf57661c5781d7d923f262761145ccbaee6fb11f556c12c0293165c" +size = 5 +role = "main_executable" + +[[inputs]] +path = "inputs/main.npdm" +format = "npdm" +sha256 = "de0f02829e8eef270e4f2346083e01abea971920535fff90b253da1924b07c35" +size = 5 +role = "process_metadata" diff --git a/samples/dkcr-hd/title.toml b/samples/dkcr-hd/title.toml new file mode 100644 index 0000000..0c4a6ea --- /dev/null +++ b/samples/dkcr-hd/title.toml @@ -0,0 +1,51 @@ +title = "DKCR HD Sample" +entry = "entry" +abi_version = "0.1.0" + +[runtime] +performance_mode = "handheld" + +[runtime.memory_layout] +[[runtime.memory_layout.regions]] +name = "code" +base = 0x1000_0000 +size = 0x0008_0000 +permissions = { read = true, write = false, execute = true } + +[[runtime.memory_layout.regions]] +name = "rodata" +base = 0x1008_0000 +size = 0x0004_0000 +permissions = { read = true, write = false, execute = false } + +[[runtime.memory_layout.regions]] +name = "data" +base = 0x100c_0000 +size = 0x0008_0000 +permissions = { read = true, write = true, execute = false } + +[[runtime.memory_layout.regions]] +name = "heap" +base = 0x2000_0000 +size = 0x0040_0000 +permissions = { read = true, write = true, execute = false } + +[[runtime.memory_layout.regions]] +name = "stack" +base = 0x3000_0000 +size = 0x0010_0000 +permissions = { read = true, write = true, execute = false } + +[assets] +romfs_root = "game-data/dkcr-hd/romfs" + +[patches] +patch_set = "patches/first-level.toml" + +[stubs] +svc_boot = "log" +svc_sm = "log" +svc_fs = "log" +svc_vi = "log" +svc_hid = "log" +svc_audout = "log" diff --git a/samples/input-replay/input_script.toml b/samples/input-replay/input_script.toml new file mode 100644 index 0000000..e10d695 --- /dev/null +++ b/samples/input-replay/input_script.toml @@ -0,0 +1,42 @@ +schema_version = "1" + +[metadata] +title = "Sample Replay" +controller = "pro_controller" +timing_mode = "ms" +recorded_at = "2026-02-03T00:00:00Z" +notes = "Synthetic input sequence for testing input replay." + +[[events]] +time_ms = 0 +control = 100 +value = 1 +note = "Start button down" + +[[events]] +time_ms = 100 +control = 100 +value = 0 +note = "Start button up" + +[[events]] +time_ms = 120 +control = 200 +value = 1 +note = "A button down" + +[[events]] +time_ms = 180 +control = 200 +value = 0 +note = "A button up" + +[[markers]] +name = "boot" +time_ms = 0 +note = "Boot marker" + +[[markers]] +name = "first_input" +time_ms = 120 +note = "First interaction" diff --git a/samples/reference_video.toml b/samples/reference_video.toml new file mode 100644 index 0000000..979761d --- /dev/null +++ b/samples/reference_video.toml @@ -0,0 +1,51 @@ +schema_version = "2" + +[video] +path = "artifacts/reference/reference-normalized.mp4" +width = 1280 +height = 720 +fps = 30.0 + +[normalization] +source_path = "/path/to/source.mov" +normalized_path = "artifacts/reference/reference-normalized.mp4" +notes = "Normalized to the canonical profile for hashing." + +[normalization.profile] +width = 1280 +height = 720 +fps = 30.0 +audio_sample_rate = 48000 +audio_channels = 2 + +[timeline] +start = "00:00:05.000" +end = "00:02:30.000" + +[[timeline.events]] +name = "level_start" +time = "00:00:05.000" + +[[timeline.events]] +name = "level_complete" +time = "00:02:30.000" + +[hashes.frames] +format = "list" +path = "artifacts/reference/frames.hashes" + +[hashes.audio] +format = "list" +path = "artifacts/reference/audio.hashes" + +[validation] +name = "reference-run" +notes = "First-level timeline." +require_audio = true + +[validation.thresholds] +frame_match_ratio = 0.92 +audio_match_ratio = 0.9 +max_drift_frames = 3 +max_dropped_frames = 5 +max_audio_drift_chunks = 3 diff --git a/samples/validation_config.toml b/samples/validation_config.toml new file mode 100644 index 0000000..af5f6b8 --- /dev/null +++ b/samples/validation_config.toml @@ -0,0 +1,11 @@ +schema_version = "1" +name = "override-profile" +notes = "Adjust validation thresholds for exploratory runs." +require_audio = false + +[thresholds] +frame_match_ratio = 0.85 +audio_match_ratio = 0.8 +max_drift_frames = 5 +max_dropped_frames = 10 +max_audio_drift_chunks = 5 diff --git a/scripts/capture-video-macos.sh b/scripts/capture-video-macos.sh new file mode 100755 index 0000000..5c99acf --- /dev/null +++ b/scripts/capture-video-macos.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +set -euo pipefail + +if ! command -v ffmpeg >/dev/null 2>&1; then + echo "ffmpeg is required for capture. Install it with 'brew install ffmpeg'." >&2 + exit 1 +fi + +OUT_DIR=${1:-artifacts/capture} +DURATION_SECONDS=${DURATION_SECONDS:-30} +FPS=${FPS:-30} +VIDEO_SIZE=${VIDEO_SIZE:-1280x720} +VIDEO_DEVICE=${VIDEO_DEVICE:-1} +AUDIO_DEVICE=${AUDIO_DEVICE:-0} + +mkdir -p "$OUT_DIR" + +ffmpeg \ + -f avfoundation \ + -framerate "$FPS" \ + -video_size "$VIDEO_SIZE" \ + -i "${VIDEO_DEVICE}:${AUDIO_DEVICE}" \ + -t "$DURATION_SECONDS" \ + "$OUT_DIR/capture.mp4" + +echo "Capture complete: $OUT_DIR/capture.mp4" diff --git a/scripts/normalize-reference-video.sh b/scripts/normalize-reference-video.sh new file mode 100755 index 0000000..5b851b9 --- /dev/null +++ b/scripts/normalize-reference-video.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [[ $# -lt 2 ]]; then + echo "Usage: $0 <source_video> <out_dir>" >&2 + exit 2 +fi + +SOURCE_VIDEO="$1" +OUT_DIR="$2" + +WIDTH="${WIDTH:-1280}" +HEIGHT="${HEIGHT:-720}" +FPS="${FPS:-30}" +AUDIO_RATE="${AUDIO_RATE:-48000}" +AUDIO_CHANNELS="${AUDIO_CHANNELS:-2}" + +NORMALIZED_VIDEO="${OUT_DIR}/reference-normalized.mp4" +FRAMES_DIR="${OUT_DIR}/frames" +AUDIO_WAV="${OUT_DIR}/audio.wav" +FRAMES_HASHES="${OUT_DIR}/frames.hashes" +AUDIO_HASHES="${OUT_DIR}/audio.hashes" + +mkdir -p "${OUT_DIR}" "${FRAMES_DIR}" + +ffmpeg -y -i "${SOURCE_VIDEO}" \ + -vf "scale=${WIDTH}:${HEIGHT},fps=${FPS}" \ + -r "${FPS}" -fps_mode cfr \ + -c:v libx264 -preset slow -crf 18 -pix_fmt yuv420p \ + -c:a pcm_s16le -ar "${AUDIO_RATE}" -ac "${AUDIO_CHANNELS}" \ + "${NORMALIZED_VIDEO}" + +ffmpeg -y -i "${NORMALIZED_VIDEO}" "${FRAMES_DIR}/%08d.png" +ffmpeg -y -i "${NORMALIZED_VIDEO}" -vn -acodec pcm_s16le -ar "${AUDIO_RATE}" -ac "${AUDIO_CHANNELS}" "${AUDIO_WAV}" + +VALIDATOR=() +if command -v recomp-validation >/dev/null 2>&1; then + VALIDATOR=(recomp-validation) +elif command -v cargo >/dev/null 2>&1; then + VALIDATOR=(cargo run -p recomp-validation --) +else + echo "recomp-validation not found and cargo unavailable" >&2 + exit 1 +fi + +"${VALIDATOR[@]}" hash-frames --frames-dir "${FRAMES_DIR}" --out "${FRAMES_HASHES}" +"${VALIDATOR[@]}" hash-audio --audio-file "${AUDIO_WAV}" --out "${AUDIO_HASHES}" + +echo "normalized reference written to ${NORMALIZED_VIDEO}" +echo "hashes written to ${FRAMES_HASHES} and ${AUDIO_HASHES}" diff --git a/specs/README.md b/specs/README.md index 4d557a3..60d3ef5 100644 --- a/specs/README.md +++ b/specs/README.md @@ -29,6 +29,12 @@ This folder contains the project specs for the Switch static recompilation prese - SPEC-180-XCI-INTAKE.md - SPEC-190-VIDEO-BASED-VALIDATION.md - SPEC-200-DKCR-HD-FIRST-LEVEL.md +- SPEC-210-AUTOMATED-RECOMP-LOOP.md +- SPEC-220-INPUT-REPLAY.md +- SPEC-230-REFERENCE-MEDIA-NORMALIZATION.md +- SPEC-240-VALIDATION-ORCHESTRATION.md +- SPEC-250-AUTOMATION-SERVICES.md +- SPEC-260-AGENT-PIPELINE-SECURITY.md ## Template - SPEC-TEMPLATE.md diff --git a/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md b/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md index 002072e..c9a91b1 100644 --- a/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md +++ b/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md @@ -1,7 +1,12 @@ # SPEC-046: Runtime Memory Layout Configuration ## Status -Draft v0.1 +Draft v0.2 + +## Rationale +- Implemented `runtime.memory_layout` parsing with defaults and validation. +- Emitted configured layouts in the build manifest and runtime init code. +- Added tests for default and custom layouts. ## Purpose Allow runtime memory layout to be configured via `title.toml`, with safe defaults when omitted. diff --git a/specs/SPEC-047-MEMORY-IMAGE-INIT.md b/specs/SPEC-047-MEMORY-IMAGE-INIT.md index e93c7c5..622a7ef 100644 --- a/specs/SPEC-047-MEMORY-IMAGE-INIT.md +++ b/specs/SPEC-047-MEMORY-IMAGE-INIT.md @@ -1,7 +1,12 @@ # SPEC-047: Memory Image Initialization From Module Segments ## Status -Draft v0.1 +Draft v0.2 + +## Rationale +- Added segment descriptors in module metadata and build manifests. +- Emit initial segment blobs and zero-fill descriptors into pipeline outputs. +- Runtime initialization loads init segments and zeroes BSS with tests. ## Purpose Populate runtime memory regions with initial data derived from module segments (code/rodata/data/bss) so lifted output can execute meaningful memory-backed logic. diff --git a/specs/SPEC-180-XCI-INTAKE.md b/specs/SPEC-180-XCI-INTAKE.md index 2144b7a..b570866 100644 --- a/specs/SPEC-180-XCI-INTAKE.md +++ b/specs/SPEC-180-XCI-INTAKE.md @@ -1,7 +1,13 @@ # SPEC-180: XCI Title Intake ## Status -Draft v0.1 +Draft v0.2 + +## Rationale +- Added XCI intake CLI wiring with optional program selection config. +- Enforced provenance inputs for XCI images and keysets. +- Implemented deterministic ExeFS/NSO extraction and RomFS asset separation. +- Added non-proprietary tests using a mock extractor. ## Purpose Define how the pipeline ingests a user-supplied XCI and extracts code and assets while preserving legal separation and deterministic outputs. diff --git a/specs/SPEC-190-VIDEO-BASED-VALIDATION.md b/specs/SPEC-190-VIDEO-BASED-VALIDATION.md index 045326f..63a1073 100644 --- a/specs/SPEC-190-VIDEO-BASED-VALIDATION.md +++ b/specs/SPEC-190-VIDEO-BASED-VALIDATION.md @@ -1,7 +1,11 @@ # SPEC-190: Video-Based Validation ## Status -Draft v0.1 +Draft v0.3 + +## Rationale +- Validation flow exists, but DKCR validation is paused until the automation loop and input replay are in place. +- New dependencies: SPEC-210 (automation), SPEC-220 (input replay), SPEC-230 (normalization), SPEC-240 (orchestration). ## Purpose Define a validation workflow that compares recompiled output against a reference gameplay video when no instrumented emulator is available. @@ -28,6 +32,10 @@ Define a validation workflow that compares recompiled output against a reference - The report must highlight drift, dropped frames, or audio desync beyond thresholds. - Validation artifacts must remain outside the repo and be referenced via provenance metadata. +## Operator Inputs +- External reference and capture artifacts are required to run DKCR validation. +- Absolute paths and timeline confirmations are tracked in `docs/dkcr-validation-prereqs.md`. + ## Interfaces and Data - `reference_video.toml` with: - input video path diff --git a/specs/SPEC-200-DKCR-HD-FIRST-LEVEL.md b/specs/SPEC-200-DKCR-HD-FIRST-LEVEL.md index 7b8b774..094d3b2 100644 --- a/specs/SPEC-200-DKCR-HD-FIRST-LEVEL.md +++ b/specs/SPEC-200-DKCR-HD-FIRST-LEVEL.md @@ -1,7 +1,11 @@ # SPEC-200: DKCR HD First-Level Milestone (macOS/aarch64) ## Status -Draft v0.1 +Draft v0.3 + +## Rationale +- DKCR validation is paused until the automation loop, input replay, and normalization specs land. +- The existing scaffold remains, but the end-to-end validation loop is not yet automated. ## Purpose Define the first title milestone for the DKCR HD XCI on macOS/aarch64, using video-based validation to confirm the first level is playable. @@ -28,6 +32,10 @@ Define the first title milestone for the DKCR HD XCI on macOS/aarch64, using vid - RomFS assets must be loaded from an external, user-managed path. - Validation must compare the first level segment against the reference video and record results. +## Operator Inputs +- DKCR validation depends on external reference and capture artifacts. +- Absolute paths and timeline confirmations are tracked in `docs/dkcr-validation-prereqs.md`. + ## Interfaces and Data - `title.toml` for DKCR HD configuration (stubbed services, patches, asset paths). - `provenance.toml` for XCI and reference video inputs. diff --git a/specs/SPEC-210-AUTOMATED-RECOMP-LOOP.md b/specs/SPEC-210-AUTOMATED-RECOMP-LOOP.md new file mode 100644 index 0000000..fcdf61e --- /dev/null +++ b/specs/SPEC-210-AUTOMATED-RECOMP-LOOP.md @@ -0,0 +1,74 @@ +# SPEC-210: Automated Recompilation Loop + +## Status +Draft v0.2 + +## Rationale +- Added an automation.toml schema and validator for end-to-end runs. +- Added a CLI orchestrator that drives intake, lift, build, capture, hash, and validation steps. +- Added deterministic run-manifest emission with artifact hashes and step summaries. + +## Purpose +Define an automated loop that drives intake, recompilation, execution, capture, and validation in a repeatable pipeline. + +## Goals +- Provide a single entry point that runs the full static recompilation loop. +- Generate deterministic artifacts and a run manifest for every attempt. +- Support incremental iteration while keeping proprietary assets external. + +## Non-Goals +- Fully automated legal intake of retail assets. +- Replacing human review of subjective rendering issues. + +## Background +Validation depends on comparing a captured run against a reference video with user inputs. The project needs a stable automation loop so iteration is fast and reproducible while asset separation stays intact. + +## Requirements +- The loop must accept a config that points to: + - input artifacts (XCI, keyset, module.json, etc.) + - output roots (build, capture, validation) + - reference timeline and input script paths + - toolchain paths (hactool/hactoolnet, ffmpeg) +- The loop must: + - validate provenance and input formats before running + - run intake/lift/build steps and capture stdout/stderr per step + - execute the rebuilt binary with a deterministic runtime config + - capture video/audio output into an external artifact root + - generate frame/audio hashes and run validation + - emit a run manifest with step timings and artifact paths +- The loop must allow resuming from intermediate stages when inputs are unchanged. +- The loop must never copy proprietary assets into the repo or build outputs. + +## Interfaces and Data +- `automation.toml` (example fields): + - `[inputs]` paths for XCI, keyset, module.json, provenance. + - `[outputs]` build_root, capture_root, validation_root. + - `[tools]` hactool_path, ffmpeg_path. + - `[reference]` reference_video_toml, input_script_toml. + - `[run]` command overrides for build/run/capture. +- Output: + - `run-manifest.json` (step results, hashes, timings) + - `validation-report.json` from the validation step + +## Deliverables +- Automation config schema and validator. +- Orchestrator CLI command (or script) that runs the full loop. +- Run manifest format with deterministic ordering. + +## Open Questions +- How should caching be keyed (full input hash, partial stage hash)? +- How should partial failures be recorded for rerun? + +## Acceptance Criteria +- A single command runs intake, build, capture, and validation in sequence. +- The run manifest lists all artifacts with hashes and sizes. +- Re-running with identical inputs yields identical artifacts and validation results. + +## Risks +- External tool versions can break determinism. +- Capture timing jitter can cause false validation failures. + +## References +- SPEC-180 XCI Intake +- SPEC-190 Video-Based Validation +- SPEC-220 Input Replay diff --git a/specs/SPEC-220-INPUT-REPLAY.md b/specs/SPEC-220-INPUT-REPLAY.md new file mode 100644 index 0000000..9078f39 --- /dev/null +++ b/specs/SPEC-220-INPUT-REPLAY.md @@ -0,0 +1,71 @@ +# SPEC-220: Input Replay and Interaction Scripts + +## Status +Draft v0.2 + +## Rationale +- Added an input script parser/validator and deterministic playback queue in the runtime. +- Added sample input script data plus docs to align with reference timelines. +- Added unit tests for ordering and marker alignment. + +## Purpose +Define a deterministic input replay format and runtime integration so validation runs can mirror reference video interactions. + +## Goals +- Record or author input scripts that can be replayed deterministically. +- Support time-based and frame-based event scheduling. +- Keep input data separate from proprietary assets. + +## Non-Goals +- Full fidelity controller emulation for all hardware variants. +- Automated extraction of inputs from videos. + +## Background +Reference videos include user interactions. To compare recompiled output to the reference, we need repeatable input playback that can be aligned to the reference timeline. + +## Requirements +- Define a versioned input script format with: + - metadata (title, controller profile, timing mode) + - ordered input events with timestamps or frame indices + - optional markers for timeline alignment +- Support common input types: + - button press/release + - analog axis values + - system/menu button events (optional) +- Provide deterministic playback in the runtime: + - stable ordering for simultaneous events + - configurable timing base (ms or frame index) + - ability to pause, fast-forward, or rewind for debugging +- Emit a replay log for validation and debugging. + +## Interfaces and Data +- `input_script.toml`: + - `schema_version` + - `[metadata]` title, controller, timing_mode + - `[[events]]` time or frame, control, value + - `[[markers]]` name, time/frame +- Runtime integration: + - input script loader + - playback queue feeding the runtime input backend + +## Deliverables +- Input script parser and validator. +- Runtime playback module that feeds input events deterministically. +- Tests that confirm repeatable playback and alignment. + +## Open Questions +- Should input scripts support multiple controller sources? +- How to express analog deadzones and smoothing? + +## Acceptance Criteria +- A sample input script replays deterministically across two runs. +- Playback order is stable for simultaneous events. +- Markers can be aligned to reference video timecodes. + +## Risks +- Input timing drift can skew validation results. +- Games with dynamic input latency may require per-title tuning. + +## References +- SPEC-190 Video-Based Validation +- SPEC-210 Automated Recompilation Loop diff --git a/specs/SPEC-230-REFERENCE-MEDIA-NORMALIZATION.md b/specs/SPEC-230-REFERENCE-MEDIA-NORMALIZATION.md new file mode 100644 index 0000000..d8e34b8 --- /dev/null +++ b/specs/SPEC-230-REFERENCE-MEDIA-NORMALIZATION.md @@ -0,0 +1,65 @@ +# SPEC-230: Reference Media Normalization + +## Status +Draft v0.2 + +## Rationale +- Documented the canonical profile and normalization workflow. +- Added a normalization script plus sample reference metadata. +- Added tests that validate frame/audio hash stability. + +## Purpose +Define how reference videos and audio are normalized into comparable artifacts for validation. + +## Goals +- Normalize reference media into a canonical resolution, frame rate, and audio format. +- Record normalization metadata alongside reference timeline data. +- Ensure deterministic hash generation for frame and audio comparisons. + +## Non-Goals +- Storing copyrighted reference video files in the repo. +- Pixel-perfect matching against compressed sources. + +## Background +Reference videos may come from disparate sources (e.g., YouTube). Normalization ensures that comparisons are stable and that drift detection is meaningful. + +## Requirements +- Define a canonical media profile: + - resolution (e.g., 1280x720) + - frame rate (e.g., 30 fps) + - audio sample rate (e.g., 48 kHz, PCM) +- Provide a normalization pipeline that: + - trims to the first-level timeline + - exports normalized frames and audio + - records the normalization command and source metadata +- Store normalization metadata in `reference_video.toml`. +- Keep reference media outside the repo; only hashes and metadata are stored. + +## Interfaces and Data +- `reference_video.toml`: + - source path, normalized path + - canonical profile (width/height/fps/sample rate) + - timeline start/end and markers + - hash sources for frames and audio + +## Deliverables +- Normalization script or documented command sequence. +- Reference media metadata schema updates. +- Tests for hash generation stability on normalized assets. + +## Open Questions +- Should normalization include color space conversion metadata? +- How to handle variable frame rate sources? + +## Acceptance Criteria +- A reference clip can be normalized to the canonical profile. +- Hashes for the normalized clip are stable across two runs. +- Timeline markers align to normalized frames deterministically. + +## Risks +- Source compression artifacts may reduce similarity metrics. +- Variable frame rate sources can introduce drift. + +## References +- SPEC-190 Video-Based Validation +- SPEC-210 Automated Recompilation Loop diff --git a/specs/SPEC-240-VALIDATION-ORCHESTRATION.md b/specs/SPEC-240-VALIDATION-ORCHESTRATION.md new file mode 100644 index 0000000..38b1bd2 --- /dev/null +++ b/specs/SPEC-240-VALIDATION-ORCHESTRATION.md @@ -0,0 +1,69 @@ +# SPEC-240: Validation Orchestration and Triage + +## Status +Draft v0.2 + +## Rationale +- Extended validation reports with triage summaries and normalized metadata. +- Added optional validation override configs for threshold tuning. +- Added deterministic tests for report generation and hash stability. + +## Purpose +Define the orchestration of validation runs, reporting, and triage so regression detection is automated and actionable. + +## Goals +- Run validation steps automatically within the recompilation loop. +- Produce structured reports that highlight drift and likely causes. +- Enable iterative tuning of thresholds without losing provenance. + +## Non-Goals +- Automatic root-cause analysis for all failures. +- Replacing human judgment for subjective visual quality. + +## Background +Validation must be repeatable and consistent across runs. A dedicated orchestration layer can standardize comparison steps and surface failures clearly. + +## Requirements +- Accept reference and capture configs plus optional input script metadata. +- Generate a validation report with: + - frame and audio match ratios + - drift offsets and dropped frame counts + - threshold pass/fail results + - links to artifacts (hash lists, diff frames) +- Emit a triage summary with suggested next steps: + - re-run capture + - adjust thresholds + - check input alignment +- Store validation metadata alongside the run manifest. + +## Interfaces and Data +- `reference_video.toml` `[validation]` section: + - thresholds, notes, and audio requirements +- `validation-config.toml` (optional override): + - threshold overrides and drift tolerance windows +- `validation-report.json`: + - status, metrics, and failure details + - artifact references (paths and hashes) + +## Deliverables +- Validation runner that integrates with recomp-validation. +- Report schema and triage summary generator. +- Documentation for interpreting validation results. + +## Open Questions +- Should we emit frame diff image sets on failure by default? +- How should we encode threshold overrides in provenance? + +## Acceptance Criteria +- A validation run generates a report and triage summary in one command. +- Reports are deterministic for identical inputs and captures. +- Failures include enough context to reproduce and debug. + +## Risks +- Overly strict thresholds can generate false negatives. +- Poor capture quality can mask true regressions. + +## References +- SPEC-190 Video-Based Validation +- SPEC-210 Automated Recompilation Loop +- SPEC-230 Reference Media Normalization diff --git a/specs/SPEC-250-AUTOMATION-SERVICES.md b/specs/SPEC-250-AUTOMATION-SERVICES.md new file mode 100644 index 0000000..6654e3f --- /dev/null +++ b/specs/SPEC-250-AUTOMATION-SERVICES.md @@ -0,0 +1,86 @@ +# SPEC-250: Automation Services and Data Flow + +## Status +Draft v0.1 + +## Purpose +Define the service architecture and data flow for fully automated static recompilation across local and AWS environments. + +## Goals +- Describe the core services and their responsibilities. +- Define the run lifecycle and required data flow events. +- Provide minimal interface schemas for run submission and status. + +## Non-Goals +- Detailed runtime ABI or module formats (covered elsewhere). +- UI or operator console requirements. + +## Background +- The pipeline must be fully automated while preserving strict input and output separation. +- Hybrid deployment is required to support local testing and cloud scale. + +## Requirements +- The architecture MUST support both local-only and AWS-backed execution. +- Each run MUST be traceable from intake to output with immutable provenance records. +- Artifact storage MUST be content-addressed and immutable once written. +- The orchestration layer MUST support retries and resumable stages. +- Workers MUST be stateless and operate on explicit inputs and outputs. +- The model interface MUST be isolated behind a Model Gateway service. + +## Interfaces and Data +- Run submission request (minimal JSON schema): + +```json +{ + "run_id": "uuid", + "module_manifest": "artifact://hash", + "config_manifest": "artifact://hash", + "provenance_manifest": "artifact://hash", + "requested_by": "principal_id", + "priority": "standard", + "execution_mode": "local|cloud|hybrid" +} +``` + +- Run status record (minimal JSON schema): + +```json +{ + "run_id": "uuid", + "state": "queued|running|blocked|failed|succeeded", + "current_stage": "string", + "artifacts": ["artifact://hash"], + "started_at": "rfc3339", + "updated_at": "rfc3339" +} +``` + +Required events: +- `recomp.run.requested` +- `recomp.run.planned` +- `recomp.run.stage.completed` +- `recomp.run.validation.completed` +- `recomp.run.completed` + +## Deliverables +- Service inventory with ownership and run-time responsibilities. +- Run lifecycle state machine definition. +- Documented data flow with required events and artifacts. + +## Open Questions +- Should run state be sourced from a single metadata store or event log only? +- What is the minimum artifact retention policy for failed runs? + +## Acceptance Criteria +- A run can be submitted using the minimal schema and observed end-to-end. +- Every stage emits an event with deterministic artifacts and logs. +- The architecture supports running the same input locally or in AWS without changing manifests. + +## Risks +- Overly granular services could increase operational complexity. +- Divergent local and cloud behavior could reduce determinism. + +## References +- SPEC-030-RECOMP-PIPELINE.md +- SPEC-210-AUTOMATED-RECOMP-LOOP.md +- SPEC-240-VALIDATION-ORCHESTRATION.md diff --git a/specs/SPEC-260-AGENT-PIPELINE-SECURITY.md b/specs/SPEC-260-AGENT-PIPELINE-SECURITY.md new file mode 100644 index 0000000..8e8e2a0 --- /dev/null +++ b/specs/SPEC-260-AGENT-PIPELINE-SECURITY.md @@ -0,0 +1,83 @@ +# SPEC-260: Agent Pipeline Security and Automation + +## Status +Draft v0.1 + +## Purpose +Define security, governance, and automation requirements for the agent-managed recompilation pipeline using GPT-5.2-Codex. + +## Goals +- Establish security controls for model usage and artifact handling. +- Define automation triggers, approvals, and auditability. +- Provide guardrails for deterministic, policy-compliant agent behavior. + +## Non-Goals +- Network topology diagrams or detailed infrastructure templates. +- Model evaluation or benchmark methodology. + +## Background +- Automated recompilation requires using an LLM to plan and supervise stages. +- The pipeline must keep inputs and outputs cleanly separated while preserving provenance. + +## Requirements +- The Model Gateway MUST be the only egress path for model requests. +- The pipeline MUST use the OpenAI Responses API for GPT-5.2-Codex. +- Prompts and responses MUST be logged with run-id correlation. +- Inputs MUST be redacted to remove sensitive content before any model request. +- Model responses MUST be validated against schemas before execution. +- All agent actions MUST be reproducible from stored prompts and artifacts. +- Automation triggers MUST support both manual and scheduled execution. +- High-cost stages MUST support optional human approval gates. +- Secrets MUST be stored in managed secret stores and never in logs. +- Encryption MUST be enforced for all artifact storage and transport. + +## Interfaces and Data +- Model request envelope (minimal JSON schema): + +```json +{ + "run_id": "uuid", + "stage": "string", + "model": "gpt-5.2-codex", + "reasoning_effort": "low|medium|high|xhigh", + "input_artifacts": ["artifact://hash"], + "redaction_profile": "policy-id", + "response_schema": "schema-id" +} +``` + +- Automation policy record (minimal JSON schema): + +```json +{ + "policy_id": "string", + "requires_approval": true, + "max_cost_usd": 500, + "allowed_models": ["gpt-5.2-codex", "gpt-5.2"], + "run_windows": ["weekday:09:00-18:00"] +} +``` + +## Deliverables +- Security control checklist for model usage and artifact handling. +- Automation policy definitions for scheduled and manual runs. +- Audit log format covering prompts, responses, and approvals. + +## Open Questions +- What redaction profiles are required for homebrew vs research inputs? +- What is the default reasoning_effort for each pipeline stage? + +## Acceptance Criteria +- Every model call is routed through the Model Gateway with a stored audit record. +- Every automated run can be paused for approval when policy requires. +- A complete run can be replayed with the same prompts and artifacts. + +## Risks +- Overly strict gating could slow iteration. +- Inconsistent redaction could leak sensitive data. + +## References +- SPEC-020-INPUTS-PROVENANCE.md +- SPEC-095-BUILD-MANIFEST-INTEGRITY.md +- SPEC-096-BUNDLE-MANIFEST-INTEGRITY.md +- SPEC-210-AUTOMATED-RECOMP-LOOP.md