From 2375fb9ceeb5257ce57ef8f456a21e39137d6315 Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 14:37:52 -0800 Subject: [PATCH 1/7] Add runtime memory model spec --- PLANS.md | 16 ++++++++ specs/README.md | 1 + specs/SPEC-045-RUNTIME-MEMORY.md | 65 ++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 specs/SPEC-045-RUNTIME-MEMORY.md diff --git a/PLANS.md b/PLANS.md index de559ad..507f4f0 100644 --- a/PLANS.md +++ b/PLANS.md @@ -6,6 +6,7 @@ This file tracks implementation work derived from specs that do not yet have a c - SPEC-000 Project Charter and Ethics - SPEC-010 Target Platform Baseline - SPEC-020 Inputs and Provenance +- SPEC-045 Runtime Memory Model and Load/Store Lowering - SPEC-090 Build, Packaging, and Distribution - SPEC-095 Build Manifest Integrity - SPEC-096 Bundle Manifest Integrity @@ -59,6 +60,21 @@ Exit criteria (from SPEC-020) - The toolchain refuses to build without provenance metadata. - A format detector identifies NCA/ExeFS/NSO0/NRO0/NRR0 inputs and logs the chosen path. +## SPEC-045: Runtime Memory Model and Load/Store Lowering +Outcome +- Block-based output can execute basic load/store instructions against a minimal runtime memory model. + +Work items +- [ ] Define a memory layout descriptor schema and emit it with outputs. +- [ ] Implement runtime memory regions with alignment, bounds, and permission checks. +- [ ] Lower ISA load/store ops to runtime memory helper calls. +- [ ] Add tests and sample blocks that validate load/store behavior and error handling. + +Exit criteria (from SPEC-045) +- Block-based output executes a test block with loads and stores using runtime helpers. +- Unaligned or out-of-bounds accesses return deterministic error codes. +- A sample pipeline output includes a memory layout descriptor that matches runtime regions. + ## SPEC-090: Build, Packaging, and Distribution Outcome - Produce a reproducible, policy-compliant bundle layout with a release checklist. diff --git a/specs/README.md b/specs/README.md index 9203213..aea077a 100644 --- a/specs/README.md +++ b/specs/README.md @@ -8,6 +8,7 @@ This folder contains the project specs for the Switch static recompilation prese - SPEC-020-INPUTS-PROVENANCE.md - SPEC-030-RECOMP-PIPELINE.md - SPEC-040-RUNTIME-ABI.md +- SPEC-045-RUNTIME-MEMORY.md - SPEC-050-CPU-ISA.md - SPEC-060-GPU-GRAPHICS.md - SPEC-070-OS-SERVICES.md diff --git a/specs/SPEC-045-RUNTIME-MEMORY.md b/specs/SPEC-045-RUNTIME-MEMORY.md new file mode 100644 index 0000000..2ded224 --- /dev/null +++ b/specs/SPEC-045-RUNTIME-MEMORY.md @@ -0,0 +1,65 @@ +# SPEC-045: Runtime Memory Model and Load/Store Lowering + +## Status +Draft v0.1 + +## Purpose +Define the minimal runtime memory model and the lowering rules needed so block-based output can execute load/store instructions beyond stub paths. + +## Goals +- Establish a byte-addressable, little-endian memory model shared by the runtime and lifted code. +- Specify the runtime ABI surface for minimal load/store operations. +- Define lowering rules for load/store instructions into runtime memory operations. +- Enable block-based output to execute simple memory-backed logic in tests and samples. + +## Non-Goals +- Full MMU emulation or virtual memory paging. +- Cache coherency modeling or performance tuning. +- Memory-mapped IO or GPU memory behavior. + +## Background +The current ISA execution and block-based output rely on stubbed or in-memory test scaffolds. A minimal runtime memory model is needed to execute lifted blocks with load/store instructions consistently across pipeline and runtime layers. + +## Requirements +- The runtime address space is 64-bit, byte-addressable, and little-endian. +- A minimal set of memory regions is defined: code, rodata, data, heap, and stack. +- Each region has a base address, size, and permissions (R/W/X) provided by config or manifest metadata. +- Runtime memory access enforces: + - Alignment rules per access size (1, 2, 4, 8 bytes). + - Bounds checks against the owning region. + - Permission checks for read, write, and execute as applicable. +- Load/store semantics are deterministic: + - Loads return the zero-extended value of the addressed bytes. + - Stores write the least-significant bytes of the value. +- Errors are surfaced with explicit error codes that can be reported by the runtime and by test harnesses. + +## Interfaces and Data +- Runtime ABI exposes memory access helpers with C ABI stability: + - `recomp_mem_load_u8`, `recomp_mem_load_u16`, `recomp_mem_load_u32`, `recomp_mem_load_u64`. + - `recomp_mem_store_u8`, `recomp_mem_store_u16`, `recomp_mem_store_u32`, `recomp_mem_store_u64`. + - Each function returns a status code and writes the value via out-parameter (for loads). +- A memory layout descriptor is emitted in output metadata, listing each region's base, size, and permissions. +- Lowering rules map ISA load/store operations to the corresponding runtime helpers using computed effective addresses. + +## Deliverables +- A runtime memory module implementing region tracking and load/store helpers. +- Lowering logic that rewrites load/store instructions into runtime calls. +- Metadata schema updates for memory layout descriptors. +- Tests that execute blocks containing load/store instructions and validate memory effects and error paths. + +## Open Questions +- Which error codes should be standardized across runtime and pipeline layers? +- How should initial memory images be populated for code and data regions? +- When should the runtime trap versus return an error code to the caller? + +## Acceptance Criteria +- Block-based output executes a test block with loads and stores using the runtime helpers. +- Unaligned or out-of-bounds accesses return deterministic error codes. +- A sample pipeline output includes a memory layout descriptor that matches the runtime regions. + +## Risks +- Mismatched assumptions between ISA semantics and runtime helpers could cause subtle correctness bugs. +- Early memory model decisions may constrain future MMU or IO modeling. + +## References +- https://developer.arm.com/documentation/den0024/a From 7a2095d5d3bbcda2dff351b762fddf139d6ccfc6 Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 15:41:11 -0800 Subject: [PATCH 2/7] Implement runtime memory model and load/store lowering --- PLANS.md | 8 +- crates/recomp-pipeline/src/lib.rs | 1 + crates/recomp-pipeline/src/memory.rs | 86 +++++ crates/recomp-pipeline/src/output.rs | 20 ++ crates/recomp-pipeline/src/pipeline.rs | 71 +++- crates/recomp-pipeline/tests/pipeline.rs | 61 ++++ crates/recomp-runtime/src/lib.rs | 83 +++++ crates/recomp-runtime/src/memory.rs | 429 +++++++++++++++++++++++ specs/SPEC-045-RUNTIME-MEMORY.md | 2 +- 9 files changed, 740 insertions(+), 21 deletions(-) create mode 100644 crates/recomp-pipeline/src/memory.rs create mode 100644 crates/recomp-runtime/src/memory.rs diff --git a/PLANS.md b/PLANS.md index 507f4f0..46a6acf 100644 --- a/PLANS.md +++ b/PLANS.md @@ -65,10 +65,10 @@ Outcome - Block-based output can execute basic load/store instructions against a minimal runtime memory model. Work items -- [ ] Define a memory layout descriptor schema and emit it with outputs. -- [ ] Implement runtime memory regions with alignment, bounds, and permission checks. -- [ ] Lower ISA load/store ops to runtime memory helper calls. -- [ ] Add tests and sample blocks that validate load/store behavior and error handling. +- [x] Define a memory layout descriptor schema and emit it with outputs. +- [x] Implement runtime memory regions with alignment, bounds, and permission checks. +- [x] Lower ISA load/store ops to runtime memory helper calls. +- [x] Add tests and sample blocks that validate load/store behavior and error handling. Exit criteria (from SPEC-045) - Block-based output executes a test block with loads and stores using runtime helpers. diff --git a/crates/recomp-pipeline/src/lib.rs b/crates/recomp-pipeline/src/lib.rs index 4071d4c..fcd119a 100644 --- a/crates/recomp-pipeline/src/lib.rs +++ b/crates/recomp-pipeline/src/lib.rs @@ -2,6 +2,7 @@ pub mod bundle; pub mod config; pub mod homebrew; pub mod input; +pub mod memory; pub mod output; pub mod pipeline; pub mod provenance; diff --git a/crates/recomp-pipeline/src/memory.rs b/crates/recomp-pipeline/src/memory.rs new file mode 100644 index 0000000..37f63bd --- /dev/null +++ b/crates/recomp-pipeline/src/memory.rs @@ -0,0 +1,86 @@ +use serde::Serialize; + +#[derive(Debug, Serialize, Clone)] +pub struct MemoryLayoutDescriptor { + pub regions: Vec, +} + +impl MemoryLayoutDescriptor { + pub fn minimal_default() -> Self { + Self { + regions: vec![ + MemoryRegionDescriptor::new( + "code", + 0x1000_0000, + 0x0001_0000, + MemoryPermissionsDescriptor::new(true, false, true), + ), + MemoryRegionDescriptor::new( + "rodata", + 0x1001_0000, + 0x0001_0000, + MemoryPermissionsDescriptor::new(true, false, false), + ), + MemoryRegionDescriptor::new( + "data", + 0x1002_0000, + 0x0001_0000, + MemoryPermissionsDescriptor::new(true, true, false), + ), + MemoryRegionDescriptor::new( + "heap", + 0x2000_0000, + 0x0004_0000, + MemoryPermissionsDescriptor::new(true, true, false), + ), + MemoryRegionDescriptor::new( + "stack", + 0x3000_0000, + 0x0004_0000, + MemoryPermissionsDescriptor::new(true, true, false), + ), + ], + } + } +} + +#[derive(Debug, Serialize, Clone)] +pub struct MemoryRegionDescriptor { + pub name: String, + pub base: u64, + pub size: u64, + pub permissions: MemoryPermissionsDescriptor, +} + +impl MemoryRegionDescriptor { + pub fn new( + name: impl Into, + base: u64, + size: u64, + permissions: MemoryPermissionsDescriptor, + ) -> Self { + Self { + name: name.into(), + base, + size, + permissions, + } + } +} + +#[derive(Debug, Serialize, Clone, Copy)] +pub struct MemoryPermissionsDescriptor { + pub read: bool, + pub write: bool, + pub execute: bool, +} + +impl MemoryPermissionsDescriptor { + pub fn new(read: bool, write: bool, execute: bool) -> Self { + Self { + read, + write, + execute, + } + } +} diff --git a/crates/recomp-pipeline/src/output.rs b/crates/recomp-pipeline/src/output.rs index 69e7026..77e2d26 100644 --- a/crates/recomp-pipeline/src/output.rs +++ b/crates/recomp-pipeline/src/output.rs @@ -1,3 +1,4 @@ +use crate::memory::MemoryLayoutDescriptor; use crate::pipeline::{ensure_dir, FunctionBody, RustFunction, RustProgram, RustTerminator}; use serde::Serialize; use sha2::{Digest, Sha256}; @@ -16,6 +17,7 @@ pub struct BuildManifest { pub config_sha256: String, pub provenance_sha256: String, pub inputs: Vec, + pub memory_layout: MemoryLayoutDescriptor, pub manifest_self_hash_basis: String, pub generated_files: Vec, } @@ -175,6 +177,7 @@ fn emit_main_rs(program: &RustProgram) -> String { }); out.push_str(");\n"); out.push_str(" recomp_runtime::init(&runtime_config);\n"); + emit_memory_layout_init(&mut out, &program.memory_layout); out.push_str(&format!(" {}()?;\n", program.entry)); out.push_str( " Ok(())\n} @@ -189,6 +192,23 @@ fn emit_main_rs(program: &RustProgram) -> String { out } +fn emit_memory_layout_init(out: &mut String, layout: &MemoryLayoutDescriptor) { + out.push_str(" let memory_layout = recomp_runtime::MemoryLayout::new(vec![\n"); + for region in &layout.regions { + out.push_str(&format!( + " recomp_runtime::MemoryRegionSpec::new(\"{}\", {}, {}, recomp_runtime::MemoryPermissions::new({}, {}, {})),\n", + region.name, + format!("{:#x}", region.base), + format!("{:#x}", region.size), + region.permissions.read, + region.permissions.write, + region.permissions.execute + )); + } + out.push_str(" ]);\n"); + out.push_str(" recomp_runtime::init_memory(memory_layout)?;\n"); +} + fn emit_function(function: &RustFunction) -> String { let mut out = String::new(); out.push_str(&format!( diff --git a/crates/recomp-pipeline/src/pipeline.rs b/crates/recomp-pipeline/src/pipeline.rs index cd3afc5..c13187b 100644 --- a/crates/recomp-pipeline/src/pipeline.rs +++ b/crates/recomp-pipeline/src/pipeline.rs @@ -1,6 +1,7 @@ use crate::config::{PerformanceMode, StubBehavior, TitleConfig}; use crate::homebrew::ModuleJson; use crate::input::{Block, Function, Module, Op, Terminator}; +use crate::memory::MemoryLayoutDescriptor; use crate::output::{emit_project, BuildManifest, GeneratedFile, InputSummary}; use crate::provenance::{ProvenanceManifest, ValidatedInput}; use pathdiff::diff_paths; @@ -104,6 +105,7 @@ pub fn run_pipeline(options: PipelineOptions) -> Result::new(), }; @@ -165,6 +167,7 @@ fn translate_module(module: &Module, config: &TitleConfig) -> Result { + Op::LoadI8 { dst, addr, offset } => { track_reg(regs, dst); track_reg(regs, addr); - lines.push(format!( - "panic!({});", - rust_string_literal("load op not supported in runtime") - )); + emit_load(lines, dst, addr, *offset, "mem_load_u8"); + } + Op::LoadI16 { dst, addr, offset } => { + track_reg(regs, dst); + track_reg(regs, addr); + emit_load(lines, dst, addr, *offset, "mem_load_u16"); + } + Op::LoadI32 { dst, addr, offset } => { + track_reg(regs, dst); + track_reg(regs, addr); + emit_load(lines, dst, addr, *offset, "mem_load_u32"); } - Op::StoreI8 { src, addr, .. } - | Op::StoreI16 { src, addr, .. } - | Op::StoreI32 { src, addr, .. } - | Op::StoreI64 { src, addr, .. } => { + Op::LoadI64 { dst, addr, offset } => { + track_reg(regs, dst); + track_reg(regs, addr); + emit_load(lines, dst, addr, *offset, "mem_load_u64"); + } + Op::StoreI8 { src, addr, offset } => { track_reg(regs, src); track_reg(regs, addr); - lines.push(format!( - "panic!({});", - rust_string_literal("store op not supported in runtime") - )); + emit_store(lines, src, addr, *offset, "mem_store_u8"); + } + Op::StoreI16 { src, addr, offset } => { + track_reg(regs, src); + track_reg(regs, addr); + emit_store(lines, src, addr, *offset, "mem_store_u16"); + } + Op::StoreI32 { src, addr, offset } => { + track_reg(regs, src); + track_reg(regs, addr); + emit_store(lines, src, addr, *offset, "mem_store_u32"); + } + Op::StoreI64 { src, addr, offset } => { + track_reg(regs, src); + track_reg(regs, addr); + emit_store(lines, src, addr, *offset, "mem_store_u64"); } Op::Br { target } => { lines.push(format!( @@ -470,6 +491,23 @@ fn render_call_line(target: &str) -> String { } } +fn emit_load(lines: &mut Vec, dst: &str, addr: &str, offset: i64, helper: &str) { + let address_expr = format!("({addr} as u64).wrapping_add({offset} as u64)"); + lines.push(format!("let __recomp_addr = {address_expr};")); + lines.push(format!( + "let __recomp_value = recomp_runtime::{helper}(__recomp_addr)?;" + )); + lines.push(format!("{dst} = __recomp_value as i64;")); +} + +fn emit_store(lines: &mut Vec, src: &str, addr: &str, offset: i64, helper: &str) { + let address_expr = format!("({addr} as u64).wrapping_add({offset} as u64)"); + lines.push(format!("let __recomp_addr = {address_expr};")); + lines.push(format!( + "recomp_runtime::{helper}(__recomp_addr, {src} as u64)?;" + )); +} + fn render_cond_expr(cond: &str) -> Option { let expr = match cond { "eq" => "flag_z", @@ -556,6 +594,7 @@ pub struct RustProgram { pub entry: String, pub functions: Vec, pub performance_mode: PerformanceMode, + pub memory_layout: MemoryLayoutDescriptor, } impl RustProgram { diff --git a/crates/recomp-pipeline/tests/pipeline.rs b/crates/recomp-pipeline/tests/pipeline.rs index fa7d791..5d8a2ea 100644 --- a/crates/recomp-pipeline/tests/pipeline.rs +++ b/crates/recomp-pipeline/tests/pipeline.rs @@ -92,6 +92,13 @@ fn pipeline_emits_project() { let manifest_json: serde_json::Value = serde_json::from_str(&manifest_src).expect("parse manifest.json"); assert!(manifest_json.get("module_sha256").is_some()); + assert!(manifest_json.get("memory_layout").is_some()); + let regions = manifest_json + .get("memory_layout") + .and_then(|value| value.get("regions")) + .and_then(|value| value.as_array()) + .expect("memory_layout.regions array"); + assert_eq!(regions.len(), 5); assert_eq!( manifest_json .get("manifest_self_hash_basis") @@ -109,6 +116,60 @@ fn pipeline_emits_project() { assert_eq!(report.detected_inputs.len(), 2); } +#[test] +fn pipeline_lowers_load_store_ops() { + let temp = tempfile::tempdir().expect("tempdir"); + let module_path = temp.path().join("module.json"); + let config_path = temp.path().join("title.toml"); + let provenance_path = temp.path().join("provenance.toml"); + let nso_path = temp.path().join("main.nso"); + let out_dir = temp.path().join("out"); + let runtime_path = PathBuf::from("../crates/recomp-runtime"); + + let module_json = r#"{ + "arch": "aarch64", + "functions": [ + { + "name": "entry", + "ops": [ + { "op": "const_i64", "dst": "x0", "imm": 4096 }, + { "op": "const_i64", "dst": "x1", "imm": 123 }, + { "op": "store_i32", "src": "x1", "addr": "x0", "offset": 0 }, + { "op": "load_i32", "dst": "x2", "addr": "x0", "offset": 0 }, + { "op": "ret" } + ] + } + ] +}"#; + + fs::write(&module_path, module_json).expect("write module"); + fs::write(&config_path, CONFIG_TOML).expect("write config"); + fs::write(&nso_path, b"NSO0").expect("write nso"); + + let module_hash = sha256_hex(module_json.as_bytes()); + let nso_hash = sha256_hex(b"NSO0"); + let provenance = format!( + "schema_version = \"1\"\n\n[title]\nname = \"Minimal Sample\"\ntitle_id = \"0100000000000000\"\nversion = \"1.0.0\"\nregion = \"US\"\n\n[collection]\ndevice = \"demo\"\ncollected_at = \"2026-01-30\"\n\n[collection.tool]\nname = \"manual\"\nversion = \"1.0\"\n\n[[inputs]]\npath = \"module.json\"\nformat = \"lifted_json\"\nsha256 = \"{module_hash}\"\nsize = {module_size}\nrole = \"lifted_module\"\n\n[[inputs]]\npath = \"main.nso\"\nformat = \"nso0\"\nsha256 = \"{nso_hash}\"\nsize = 4\nrole = \"main_executable\"\n", + module_hash = module_hash, + module_size = module_json.len() + ); + fs::write(&provenance_path, provenance).expect("write provenance"); + + run_pipeline(PipelineOptions { + module_path, + config_path, + provenance_path, + out_dir: out_dir.clone(), + runtime_path, + }) + .expect("pipeline runs"); + + let main_rs = out_dir.join("src/main.rs"); + let main_src = fs::read_to_string(main_rs).expect("read main.rs"); + assert!(main_src.contains("mem_store_u32")); + assert!(main_src.contains("mem_load_u32")); +} + #[test] fn pipeline_rejects_homebrew_module_json() { let temp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/recomp-runtime/src/lib.rs b/crates/recomp-runtime/src/lib.rs index d4e7171..4e427b5 100644 --- a/crates/recomp-runtime/src/lib.rs +++ b/crates/recomp-runtime/src/lib.rs @@ -1,6 +1,7 @@ use std::fmt; mod homebrew; +mod memory; pub const ABI_VERSION: &str = "0.1.0"; @@ -8,6 +9,11 @@ pub use homebrew::{ entrypoint_shim, DeterministicClock, InputEvent, InputQueue, LoaderConfig, LoaderConfigBuilder, LoaderConfigEntry, LoaderConfigKey, NroEntrypoint, RuntimeManifest, ServiceStub, NRO_ENTRY_X1, }; +pub use memory::{ + init_memory, recomp_mem_load_u16, recomp_mem_load_u32, recomp_mem_load_u64, recomp_mem_load_u8, + recomp_mem_store_u16, recomp_mem_store_u32, recomp_mem_store_u64, recomp_mem_store_u8, + MemoryLayout, MemoryLayoutError, MemoryPermissions, MemoryRegionSpec, MemoryStatus, +}; pub use recomp_gfx::{CommandStream, GraphicsBackend, GraphicsError, StubBackend}; pub use recomp_services::{ stub_handler, ServiceAccessControl, ServiceCall, ServiceError, ServiceLogger, ServiceRegistry, @@ -48,6 +54,14 @@ pub enum RuntimeError { MissingLoaderConfigKey { key: LoaderConfigKey }, #[error("runtime manifest serialization failed: {message}")] ManifestSerialize { message: String }, + #[error("memory layout error: {0}")] + MemoryLayout(#[from] MemoryLayoutError), + #[error("memory access error {status:?} at {address:#x} size {size}")] + MemoryAccess { + status: MemoryStatus, + address: u64, + size: usize, + }, } pub type RuntimeResult = Result; @@ -63,6 +77,75 @@ pub fn init(config: &RuntimeConfig) { ); } +pub fn init_default_memory(layout: MemoryLayout) -> RuntimeResult<()> { + init_memory(layout)?; + Ok(()) +} + +pub fn mem_load_u8(address: u64) -> RuntimeResult { + memory::mem_load_u8(address).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 1, + }) +} + +pub fn mem_load_u16(address: u64) -> RuntimeResult { + memory::mem_load_u16(address).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 2, + }) +} + +pub fn mem_load_u32(address: u64) -> RuntimeResult { + memory::mem_load_u32(address).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 4, + }) +} + +pub fn mem_load_u64(address: u64) -> RuntimeResult { + memory::mem_load_u64(address).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 8, + }) +} + +pub fn mem_store_u8(address: u64, value: u64) -> RuntimeResult<()> { + memory::mem_store_u8(address, value).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 1, + }) +} + +pub fn mem_store_u16(address: u64, value: u64) -> RuntimeResult<()> { + memory::mem_store_u16(address, value).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 2, + }) +} + +pub fn mem_store_u32(address: u64, value: u64) -> RuntimeResult<()> { + memory::mem_store_u32(address, value).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 4, + }) +} + +pub fn mem_store_u64(address: u64, value: u64) -> RuntimeResult<()> { + memory::mem_store_u64(address, value).map_err(|status| RuntimeError::MemoryAccess { + status, + address, + size: 8, + }) +} + pub fn syscall_log(name: &str, args: &[i64]) -> RuntimeResult<()> { println!("[recomp-runtime] syscall {name} args={}", ArgsDisplay(args)); Ok(()) diff --git a/crates/recomp-runtime/src/memory.rs b/crates/recomp-runtime/src/memory.rs new file mode 100644 index 0000000..a5341a7 --- /dev/null +++ b/crates/recomp-runtime/src/memory.rs @@ -0,0 +1,429 @@ +use std::sync::{Mutex, OnceLock}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +pub enum MemoryStatus { + Ok = 0, + Unaligned = 1, + OutOfBounds = 2, + PermissionDenied = 3, + Unmapped = 4, + Uninitialized = 5, + InvalidOutPtr = 6, + Internal = 7, +} + +impl MemoryStatus { + pub fn is_ok(self) -> bool { + matches!(self, MemoryStatus::Ok) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct MemoryPermissions { + read: bool, + write: bool, + execute: bool, +} + +impl MemoryPermissions { + pub const fn new(read: bool, write: bool, execute: bool) -> Self { + Self { + read, + write, + execute, + } + } + + pub const fn read_only() -> Self { + Self::new(true, false, false) + } + + pub const fn read_write() -> Self { + Self::new(true, true, false) + } + + pub const fn read_execute() -> Self { + Self::new(true, false, true) + } + + pub fn allows_read(self) -> bool { + self.read + } + + pub fn allows_write(self) -> bool { + self.write + } + + pub fn allows_execute(self) -> bool { + self.execute + } +} + +#[derive(Debug, Clone)] +pub struct MemoryRegionSpec { + pub name: String, + pub base: u64, + pub size: u64, + pub permissions: MemoryPermissions, +} + +impl MemoryRegionSpec { + pub fn new( + name: impl Into, + base: u64, + size: u64, + permissions: MemoryPermissions, + ) -> Self { + Self { + name: name.into(), + base, + size, + permissions, + } + } +} + +#[derive(Debug, Clone)] +pub struct MemoryLayout { + pub regions: Vec, +} + +impl MemoryLayout { + pub fn new(regions: Vec) -> Self { + Self { regions } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum MemoryLayoutError { + #[error("memory region {name} has zero size")] + ZeroSizedRegion { name: String }, + #[error("memory region {name} overflows address space")] + RegionOverflow { name: String }, + #[error("memory regions {left} and {right} overlap")] + RegionOverlap { left: String, right: String }, +} + +#[derive(Debug, Clone, Copy)] +enum AccessKind { + Read, + Write, + #[allow(dead_code)] + Execute, +} + +#[derive(Debug)] +struct MemoryRegion { + spec: MemoryRegionSpec, + data: Vec, +} + +#[derive(Debug)] +struct RuntimeMemory { + regions: Vec, +} + +impl RuntimeMemory { + fn new(layout: &MemoryLayout) -> Result { + let mut regions = Vec::with_capacity(layout.regions.len()); + for spec in &layout.regions { + if spec.size == 0 { + return Err(MemoryLayoutError::ZeroSizedRegion { + name: spec.name.clone(), + }); + } + let end = spec.base.checked_add(spec.size).ok_or_else(|| { + MemoryLayoutError::RegionOverflow { + name: spec.name.clone(), + } + })?; + regions.push((spec.clone(), end)); + } + + regions.sort_by(|a, b| a.0.base.cmp(&b.0.base)); + for pair in regions.windows(2) { + let left = &pair[0].0; + let left_end = pair[0].1; + let right = &pair[1].0; + if left_end > right.base { + return Err(MemoryLayoutError::RegionOverlap { + left: left.name.clone(), + right: right.name.clone(), + }); + } + } + + let mapped = regions + .into_iter() + .map(|(spec, _)| MemoryRegion { + data: vec![0u8; spec.size as usize], + spec, + }) + .collect(); + + Ok(Self { regions: mapped }) + } + + fn load(&self, address: u64, size: usize) -> Result { + let region = self.resolve_region(address, size, AccessKind::Read)?; + let offset = (address - region.spec.base) as usize; + let mut value = 0u64; + for i in 0..size { + value |= (region.data[offset + i] as u64) << (i * 8); + } + Ok(value) + } + + fn store(&mut self, address: u64, size: usize, value: u64) -> Result<(), MemoryStatus> { + let region = self.resolve_region_mut(address, size, AccessKind::Write)?; + let offset = (address - region.spec.base) as usize; + for i in 0..size { + region.data[offset + i] = ((value >> (i * 8)) & 0xFF) as u8; + } + Ok(()) + } + + fn resolve_region( + &self, + address: u64, + size: usize, + access: AccessKind, + ) -> Result<&MemoryRegion, MemoryStatus> { + let index = self.resolve_region_inner(address, size, access)?; + Ok(&self.regions[index]) + } + + fn resolve_region_mut( + &mut self, + address: u64, + size: usize, + access: AccessKind, + ) -> Result<&mut MemoryRegion, MemoryStatus> { + let index = self.resolve_region_inner(address, size, access)?; + Ok(&mut self.regions[index]) + } + + fn resolve_region_inner( + &self, + address: u64, + size: usize, + access: AccessKind, + ) -> Result { + if size == 0 { + return Err(MemoryStatus::Internal); + } + let size_u64 = size as u64; + if address % size_u64 != 0 { + return Err(MemoryStatus::Unaligned); + } + let end = address + .checked_add(size_u64) + .ok_or(MemoryStatus::OutOfBounds)?; + + for (index, region) in self.regions.iter().enumerate() { + let region_end = region + .spec + .base + .checked_add(region.spec.size) + .ok_or(MemoryStatus::OutOfBounds)?; + if address < region.spec.base || address >= region_end { + continue; + } + if end > region_end { + return Err(MemoryStatus::OutOfBounds); + } + if !self.check_permissions(region, access) { + return Err(MemoryStatus::PermissionDenied); + } + return Ok(index); + } + + Err(MemoryStatus::Unmapped) + } + + fn check_permissions(&self, region: &MemoryRegion, access: AccessKind) -> bool { + match access { + AccessKind::Read => region.spec.permissions.allows_read(), + AccessKind::Write => region.spec.permissions.allows_write(), + AccessKind::Execute => region.spec.permissions.allows_execute(), + } + } +} + +static MEMORY: OnceLock> = OnceLock::new(); + +pub fn init_memory(layout: MemoryLayout) -> Result<(), MemoryLayoutError> { + let memory = RuntimeMemory::new(&layout)?; + let _ = MEMORY.set(Mutex::new(memory)); + Ok(()) +} + +fn with_memory_mut(mut f: F) -> MemoryStatus +where + F: FnMut(&mut RuntimeMemory) -> Result<(), MemoryStatus>, +{ + let memory = match MEMORY.get() { + Some(memory) => memory, + None => return MemoryStatus::Uninitialized, + }; + let mut guard = match memory.lock() { + Ok(guard) => guard, + Err(_) => return MemoryStatus::Internal, + }; + match f(&mut guard) { + Ok(()) => MemoryStatus::Ok, + Err(err) => err, + } +} + +fn with_memory(mut f: F) -> Result +where + F: FnMut(&RuntimeMemory) -> Result, +{ + let memory = match MEMORY.get() { + Some(memory) => memory, + None => return Err(MemoryStatus::Uninitialized), + }; + let guard = match memory.lock() { + Ok(guard) => guard, + Err(_) => return Err(MemoryStatus::Internal), + }; + f(&guard) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_load_u8(address: u64, out: *mut u64) -> MemoryStatus { + mem_load_raw(address, 1, out) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_load_u16(address: u64, out: *mut u64) -> MemoryStatus { + mem_load_raw(address, 2, out) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_load_u32(address: u64, out: *mut u64) -> MemoryStatus { + mem_load_raw(address, 4, out) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_load_u64(address: u64, out: *mut u64) -> MemoryStatus { + mem_load_raw(address, 8, out) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_store_u8(address: u64, value: u64) -> MemoryStatus { + mem_store_raw(address, 1, value) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_store_u16(address: u64, value: u64) -> MemoryStatus { + mem_store_raw(address, 2, value) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_store_u32(address: u64, value: u64) -> MemoryStatus { + mem_store_raw(address, 4, value) +} + +#[no_mangle] +pub extern "C" fn recomp_mem_store_u64(address: u64, value: u64) -> MemoryStatus { + mem_store_raw(address, 8, value) +} + +fn mem_load_raw(address: u64, size: usize, out: *mut u64) -> MemoryStatus { + if out.is_null() { + return MemoryStatus::InvalidOutPtr; + } + match with_memory(|memory| memory.load(address, size)) { + Ok(value) => unsafe { + *out = value; + MemoryStatus::Ok + }, + Err(err) => err, + } +} + +fn mem_store_raw(address: u64, size: usize, value: u64) -> MemoryStatus { + with_memory_mut(|memory| memory.store(address, size, value)) +} + +pub(crate) fn mem_load_u8(address: u64) -> Result { + mem_load(address, 1) +} + +pub(crate) fn mem_load_u16(address: u64) -> Result { + mem_load(address, 2) +} + +pub(crate) fn mem_load_u32(address: u64) -> Result { + mem_load(address, 4) +} + +pub(crate) fn mem_load_u64(address: u64) -> Result { + mem_load(address, 8) +} + +pub(crate) fn mem_store_u8(address: u64, value: u64) -> Result<(), MemoryStatus> { + mem_store(address, 1, value) +} + +pub(crate) fn mem_store_u16(address: u64, value: u64) -> Result<(), MemoryStatus> { + mem_store(address, 2, value) +} + +pub(crate) fn mem_store_u32(address: u64, value: u64) -> Result<(), MemoryStatus> { + mem_store(address, 4, value) +} + +pub(crate) fn mem_store_u64(address: u64, value: u64) -> Result<(), MemoryStatus> { + mem_store(address, 8, value) +} + +fn mem_load(address: u64, size: usize) -> Result { + with_memory(|memory| memory.load(address, size)) +} + +fn mem_store(address: u64, size: usize, value: u64) -> Result<(), MemoryStatus> { + let status = with_memory_mut(|memory| memory.store(address, size, value)); + if status.is_ok() { + Ok(()) + } else { + Err(status) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_layout() -> MemoryLayout { + MemoryLayout::new(vec![ + MemoryRegionSpec::new("data", 0x1000, 0x40, MemoryPermissions::read_write()), + MemoryRegionSpec::new("ro", 0x2000, 0x40, MemoryPermissions::read_only()), + ]) + } + + #[test] + fn load_store_round_trip() { + init_memory(test_layout()).expect("init memory"); + mem_store_u32(0x1004, 0xDEADBEEF).expect("store"); + let value = mem_load_u32(0x1004).expect("load"); + assert_eq!(value, 0xDEADBEEF); + } + + #[test] + fn unaligned_is_error() { + init_memory(test_layout()).expect("init memory"); + let err = mem_load_u32(0x1002).unwrap_err(); + assert_eq!(err, MemoryStatus::Unaligned); + } + + #[test] + fn permission_denied_for_write() { + init_memory(test_layout()).expect("init memory"); + let err = mem_store_u8(0x2000, 1).unwrap_err(); + assert_eq!(err, MemoryStatus::PermissionDenied); + } +} diff --git a/specs/SPEC-045-RUNTIME-MEMORY.md b/specs/SPEC-045-RUNTIME-MEMORY.md index 2ded224..ec92d42 100644 --- a/specs/SPEC-045-RUNTIME-MEMORY.md +++ b/specs/SPEC-045-RUNTIME-MEMORY.md @@ -1,7 +1,7 @@ # SPEC-045: Runtime Memory Model and Load/Store Lowering ## Status -Draft v0.1 +Draft v0.2 ## Purpose Define the minimal runtime memory model and the lowering rules needed so block-based output can execute load/store instructions beyond stub paths. From 213c6c3e76eeecf1e60b9cbef947f98579f08796 Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 15:42:13 -0800 Subject: [PATCH 3/7] Fix memory layout emission formatting --- crates/recomp-pipeline/src/output.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/recomp-pipeline/src/output.rs b/crates/recomp-pipeline/src/output.rs index 77e2d26..170fe2c 100644 --- a/crates/recomp-pipeline/src/output.rs +++ b/crates/recomp-pipeline/src/output.rs @@ -196,10 +196,10 @@ fn emit_memory_layout_init(out: &mut String, layout: &MemoryLayoutDescriptor) { out.push_str(" let memory_layout = recomp_runtime::MemoryLayout::new(vec![\n"); for region in &layout.regions { out.push_str(&format!( - " recomp_runtime::MemoryRegionSpec::new(\"{}\", {}, {}, recomp_runtime::MemoryPermissions::new({}, {}, {})),\n", + " recomp_runtime::MemoryRegionSpec::new(\"{}\", {:#x}, {:#x}, recomp_runtime::MemoryPermissions::new({}, {}, {})),\n", region.name, - format!("{:#x}", region.base), - format!("{:#x}", region.size), + region.base, + region.size, region.permissions.read, region.permissions.write, region.permissions.execute From 8ec723e4ffc83a9797c1f0a17af819da2d2af224 Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 16:08:11 -0800 Subject: [PATCH 4/7] Add configurable memory layout to title config --- crates/recomp-pipeline/src/config.rs | 52 ++++++++++++++++++++++++ crates/recomp-pipeline/src/pipeline.rs | 2 +- crates/recomp-pipeline/tests/pipeline.rs | 31 +++++++++++++- samples/minimal/title.toml | 31 ++++++++++++++ 4 files changed, 114 insertions(+), 2 deletions(-) diff --git a/crates/recomp-pipeline/src/config.rs b/crates/recomp-pipeline/src/config.rs index d7f08e2..fea6f1b 100644 --- a/crates/recomp-pipeline/src/config.rs +++ b/crates/recomp-pipeline/src/config.rs @@ -1,3 +1,4 @@ +use crate::memory::{MemoryLayoutDescriptor, MemoryPermissionsDescriptor, MemoryRegionDescriptor}; use serde::Deserialize; use std::collections::BTreeMap; use std::str::FromStr; @@ -46,6 +47,26 @@ struct RawRuntimeConfig { performance_mode: Option, } +#[derive(Debug, Deserialize)] +struct RawMemoryLayoutConfig { + regions: Vec, +} + +#[derive(Debug, Deserialize)] +struct RawMemoryRegionConfig { + name: String, + base: u64, + size: u64, + permissions: RawMemoryPermissionsConfig, +} + +#[derive(Debug, Deserialize)] +struct RawMemoryPermissionsConfig { + read: bool, + write: bool, + execute: bool, +} + #[derive(Debug)] pub struct RuntimeConfig { pub performance_mode: PerformanceMode, @@ -60,6 +81,8 @@ struct RawTitleConfig { stubs: BTreeMap, #[serde(default)] runtime: Option, + #[serde(default)] + memory_layout: Option, } #[derive(Debug)] @@ -69,6 +92,7 @@ pub struct TitleConfig { pub abi_version: String, pub stubs: BTreeMap, pub runtime: RuntimeConfig, + pub memory_layout: MemoryLayoutDescriptor, } impl TitleConfig { @@ -85,12 +109,40 @@ impl TitleConfig { .and_then(|runtime| runtime.performance_mode) .unwrap_or_else(|| "handheld".to_string()); let performance_mode = PerformanceMode::from_str(&runtime_mode)?; + let memory_layout = match raw.memory_layout { + Some(layout) => parse_memory_layout(layout)?, + None => MemoryLayoutDescriptor::minimal_default(), + }; Ok(TitleConfig { title: raw.title, entry: raw.entry, abi_version: raw.abi_version, stubs, runtime: RuntimeConfig { performance_mode }, + memory_layout, }) } } + +fn parse_memory_layout(layout: RawMemoryLayoutConfig) -> Result { + if layout.regions.is_empty() { + return Err("memory_layout.regions must include at least one region".to_string()); + } + let regions = layout + .regions + .into_iter() + .map(|region| { + MemoryRegionDescriptor::new( + region.name, + region.base, + region.size, + MemoryPermissionsDescriptor::new( + region.permissions.read, + region.permissions.write, + region.permissions.execute, + ), + ) + }) + .collect(); + Ok(MemoryLayoutDescriptor { regions }) +} diff --git a/crates/recomp-pipeline/src/pipeline.rs b/crates/recomp-pipeline/src/pipeline.rs index c13187b..99b8800 100644 --- a/crates/recomp-pipeline/src/pipeline.rs +++ b/crates/recomp-pipeline/src/pipeline.rs @@ -167,7 +167,7 @@ fn translate_module(module: &Module, config: &TitleConfig) -> Result Date: Mon, 2 Feb 2026 16:28:39 -0800 Subject: [PATCH 5/7] Add configurable memory layout and init images --- PLANS.md | 32 ++++++ crates/recomp-pipeline/src/config.rs | 21 ++-- crates/recomp-pipeline/src/homebrew/lift.rs | 2 + crates/recomp-pipeline/src/input.rs | 23 ++++ crates/recomp-pipeline/src/memory.rs | 63 +++++++++++ crates/recomp-pipeline/src/output.rs | 55 +++++++++- crates/recomp-pipeline/src/pipeline.rs | 110 ++++++++++++++++++- crates/recomp-pipeline/tests/pipeline.rs | 96 +++++++++++++++-- crates/recomp-runtime/src/lib.rs | 18 +++- crates/recomp-runtime/src/memory.rs | 113 ++++++++++++++++++-- specs/README.md | 2 + specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md | 53 +++++++++ specs/SPEC-047-MEMORY-IMAGE-INIT.md | 54 ++++++++++ 13 files changed, 612 insertions(+), 30 deletions(-) create mode 100644 specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md create mode 100644 specs/SPEC-047-MEMORY-IMAGE-INIT.md diff --git a/PLANS.md b/PLANS.md index 46a6acf..fc6d9ca 100644 --- a/PLANS.md +++ b/PLANS.md @@ -7,6 +7,8 @@ This file tracks implementation work derived from specs that do not yet have a c - SPEC-010 Target Platform Baseline - SPEC-020 Inputs and Provenance - SPEC-045 Runtime Memory Model and Load/Store Lowering +- SPEC-046 Runtime Memory Layout Configuration +- SPEC-047 Memory Image Initialization - SPEC-090 Build, Packaging, and Distribution - SPEC-095 Build Manifest Integrity - SPEC-096 Bundle Manifest Integrity @@ -75,6 +77,36 @@ Exit criteria (from SPEC-045) - Unaligned or out-of-bounds accesses return deterministic error codes. - A sample pipeline output includes a memory layout descriptor that matches runtime regions. +## SPEC-046: Runtime Memory Layout Configuration +Outcome +- Runtime memory layout is configurable via `title.toml` while preserving a safe default. + +Work items +- [ ] Extend `title.toml` schema to include `runtime.memory_layout` regions. +- [ ] Validate region overlap, zero sizes, and overflow errors. +- [ ] Emit configured memory layout in `manifest.json` and generated runtime init. +- [ ] Add tests for default layout and custom layout parsing. + +Exit criteria (from SPEC-046) +- Custom memory layout in `title.toml` is parsed and emitted in `manifest.json`. +- Invalid layouts fail the pipeline with clear errors. +- Default behavior remains unchanged when no layout is provided. + +## SPEC-047: Memory Image Initialization +Outcome +- Runtime memory is initialized from module segment metadata (code/rodata/data/bss). + +Work items +- [ ] Define segment descriptor schema and carry it through pipeline output metadata. +- [ ] Populate runtime memory regions with initial segment bytes and zeroed bss. +- [ ] Validate init sizes and bounds during initialization. +- [ ] Add tests covering initialized load/store behavior and error paths. + +Exit criteria (from SPEC-047) +- A sample module with init bytes executes a load/store path against initialized memory. +- BSS regions are zeroed deterministically. +- Invalid init sizes or region mismatches fail with clear errors. + ## SPEC-090: Build, Packaging, and Distribution Outcome - Produce a reproducible, policy-compliant bundle layout with a release checklist. diff --git a/crates/recomp-pipeline/src/config.rs b/crates/recomp-pipeline/src/config.rs index fea6f1b..0f83af1 100644 --- a/crates/recomp-pipeline/src/config.rs +++ b/crates/recomp-pipeline/src/config.rs @@ -41,10 +41,12 @@ impl FromStr for PerformanceMode { } } -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Default)] struct RawRuntimeConfig { #[serde(default)] performance_mode: Option, + #[serde(default)] + memory_layout: Option, } #[derive(Debug, Deserialize)] @@ -81,8 +83,6 @@ struct RawTitleConfig { stubs: BTreeMap, #[serde(default)] runtime: Option, - #[serde(default)] - memory_layout: Option, } #[derive(Debug)] @@ -104,12 +104,12 @@ impl TitleConfig { let parsed = StubBehavior::from_str(&behavior)?; stubs.insert(name, parsed); } - let runtime_mode = raw - .runtime - .and_then(|runtime| runtime.performance_mode) + let runtime = raw.runtime.unwrap_or_default(); + let runtime_mode = runtime + .performance_mode .unwrap_or_else(|| "handheld".to_string()); let performance_mode = PerformanceMode::from_str(&runtime_mode)?; - let memory_layout = match raw.memory_layout { + let memory_layout = match runtime.memory_layout { Some(layout) => parse_memory_layout(layout)?, None => MemoryLayoutDescriptor::minimal_default(), }; @@ -125,9 +125,6 @@ impl TitleConfig { } fn parse_memory_layout(layout: RawMemoryLayoutConfig) -> Result { - if layout.regions.is_empty() { - return Err("memory_layout.regions must include at least one region".to_string()); - } let regions = layout .regions .into_iter() @@ -144,5 +141,7 @@ fn parse_memory_layout(layout: RawMemoryLayoutConfig) -> Result, pub functions: Vec, } +#[derive(Debug, Deserialize, Serialize)] +pub struct ModuleSegment { + pub name: String, + pub base: u64, + pub size: u64, + pub permissions: ModuleSegmentPermissions, + #[serde(default)] + pub init_path: Option, + #[serde(default)] + pub init_size: Option, + #[serde(default)] + pub zero_fill: bool, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ModuleSegmentPermissions { + pub read: bool, + pub write: bool, + pub execute: bool, +} + #[derive(Debug, Deserialize, Serialize)] pub struct Function { pub name: String, diff --git a/crates/recomp-pipeline/src/memory.rs b/crates/recomp-pipeline/src/memory.rs index 37f63bd..9c16bbb 100644 --- a/crates/recomp-pipeline/src/memory.rs +++ b/crates/recomp-pipeline/src/memory.rs @@ -42,6 +42,35 @@ impl MemoryLayoutDescriptor { ], } } + + pub fn validate(&self) -> Result<(), String> { + if self.regions.is_empty() { + return Err("memory layout must define at least one region".to_string()); + } + + let mut ranges = Vec::with_capacity(self.regions.len()); + for region in &self.regions { + if region.size == 0 { + return Err(format!("memory region {} has zero size", region.name)); + } + let end = region + .base + .checked_add(region.size) + .ok_or_else(|| format!("memory region {} overflows address space", region.name))?; + ranges.push((region.name.as_str(), region.base, end)); + } + + ranges.sort_by(|a, b| a.1.cmp(&b.1)); + for window in ranges.windows(2) { + let left = window[0]; + let right = window[1]; + if left.2 > right.1 { + return Err(format!("memory regions {} and {} overlap", left.0, right.0)); + } + } + + Ok(()) + } } #[derive(Debug, Serialize, Clone)] @@ -68,6 +97,40 @@ impl MemoryRegionDescriptor { } } +#[derive(Debug, Serialize, Clone)] +pub struct MemoryImageDescriptor { + pub init_segments: Vec, + pub zero_segments: Vec, +} + +impl MemoryImageDescriptor { + pub fn empty() -> Self { + Self { + init_segments: Vec::new(), + zero_segments: Vec::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.init_segments.is_empty() && self.zero_segments.is_empty() + } +} + +#[derive(Debug, Serialize, Clone)] +pub struct MemoryInitSegmentDescriptor { + pub name: String, + pub base: u64, + pub size: u64, + pub init_path: String, +} + +#[derive(Debug, Serialize, Clone)] +pub struct MemoryZeroSegmentDescriptor { + pub name: String, + pub base: u64, + pub size: u64, +} + #[derive(Debug, Serialize, Clone, Copy)] pub struct MemoryPermissionsDescriptor { pub read: bool, diff --git a/crates/recomp-pipeline/src/output.rs b/crates/recomp-pipeline/src/output.rs index 170fe2c..f7dc718 100644 --- a/crates/recomp-pipeline/src/output.rs +++ b/crates/recomp-pipeline/src/output.rs @@ -1,5 +1,7 @@ -use crate::memory::MemoryLayoutDescriptor; -use crate::pipeline::{ensure_dir, FunctionBody, RustFunction, RustProgram, RustTerminator}; +use crate::memory::{MemoryImageDescriptor, MemoryLayoutDescriptor}; +use crate::pipeline::{ + ensure_dir, FunctionBody, MemoryImageSource, RustFunction, RustProgram, RustTerminator, +}; use serde::Serialize; use sha2::{Digest, Sha256}; use std::fs; @@ -18,6 +20,8 @@ pub struct BuildManifest { pub provenance_sha256: String, pub inputs: Vec, pub memory_layout: MemoryLayoutDescriptor, + #[serde(skip_serializing_if = "Option::is_none")] + pub memory_image: Option, pub manifest_self_hash_basis: String, pub generated_files: Vec, } @@ -44,6 +48,7 @@ pub fn emit_project( runtime_rel: &Path, program: &RustProgram, manifest: &BuildManifest, + memory_sources: &[MemoryImageSource], ) -> Result<(Vec, BuildManifest), String> { ensure_dir(out_dir).map_err(|err| err.to_string())?; @@ -71,6 +76,23 @@ pub fn emit_project( size: main_rs.len() as u64, }); + if !memory_sources.is_empty() { + for source in memory_sources { + let dest_path = out_dir.join(&source.dest_path); + if let Some(parent) = dest_path.parent() { + ensure_dir(parent).map_err(|err| err.to_string())?; + } + let bytes = fs::read(&source.source_path).map_err(|err| err.to_string())?; + fs::write(&dest_path, &bytes).map_err(|err| err.to_string())?; + written.push(dest_path.clone()); + generated_files.push(GeneratedFile { + path: source.dest_path.to_string_lossy().to_string(), + sha256: sha256_bytes(&bytes), + size: bytes.len() as u64, + }); + } + } + let manifest_path = out_dir.join("manifest.json"); let mut updated_manifest = manifest.clone(); updated_manifest.generated_files = generated_files; @@ -178,6 +200,7 @@ fn emit_main_rs(program: &RustProgram) -> String { out.push_str(");\n"); out.push_str(" recomp_runtime::init(&runtime_config);\n"); emit_memory_layout_init(&mut out, &program.memory_layout); + emit_memory_image_init(&mut out, program.memory_image.as_ref()); out.push_str(&format!(" {}()?;\n", program.entry)); out.push_str( " Ok(())\n} @@ -209,6 +232,34 @@ fn emit_memory_layout_init(out: &mut String, layout: &MemoryLayoutDescriptor) { out.push_str(" recomp_runtime::init_memory(memory_layout)?;\n"); } +fn emit_memory_image_init(out: &mut String, image: Option<&MemoryImageDescriptor>) { + let Some(image) = image else { + return; + }; + + out.push_str(" let mut init_segments = Vec::new();\n"); + for segment in &image.init_segments { + out.push_str(&format!( + " let bytes = std::fs::read(\"{}\").map_err(|err| recomp_runtime::RuntimeError::Io {{ message: err.to_string() }})?;\n", + segment.init_path + )); + out.push_str(&format!( + " init_segments.push(recomp_runtime::MemoryInitSegment::new(\"{}\", {:#x}, {:#x}, bytes));\n", + segment.name, segment.base, segment.size + )); + } + + out.push_str(" let zero_segments = vec![\n"); + for segment in &image.zero_segments { + out.push_str(&format!( + " recomp_runtime::MemoryZeroSegment::new(\"{}\", {:#x}, {:#x}),\n", + segment.name, segment.base, segment.size + )); + } + out.push_str(" ];\n"); + out.push_str(" recomp_runtime::apply_memory_image(&init_segments, &zero_segments)?;\n"); +} + fn emit_function(function: &RustFunction) -> String { let mut out = String::new(); out.push_str(&format!( diff --git a/crates/recomp-pipeline/src/pipeline.rs b/crates/recomp-pipeline/src/pipeline.rs index 99b8800..5301a4a 100644 --- a/crates/recomp-pipeline/src/pipeline.rs +++ b/crates/recomp-pipeline/src/pipeline.rs @@ -1,7 +1,10 @@ use crate::config::{PerformanceMode, StubBehavior, TitleConfig}; use crate::homebrew::ModuleJson; use crate::input::{Block, Function, Module, Op, Terminator}; -use crate::memory::MemoryLayoutDescriptor; +use crate::memory::{ + MemoryImageDescriptor, MemoryInitSegmentDescriptor, MemoryLayoutDescriptor, + MemoryZeroSegmentDescriptor, +}; use crate::output::{emit_project, BuildManifest, GeneratedFile, InputSummary}; use crate::provenance::{ProvenanceManifest, ValidatedInput}; use pathdiff::diff_paths; @@ -25,6 +28,12 @@ pub struct PipelineReport { pub detected_inputs: Vec, } +#[derive(Debug)] +pub struct MemoryImageSource { + pub source_path: PathBuf, + pub dest_path: PathBuf, +} + #[derive(Debug, thiserror::Error)] pub enum PipelineError { #[error("io error: {0}")] @@ -80,7 +89,8 @@ pub fn run_pipeline(options: PipelineOptions) -> Result Result::new(), }; let (files_written, _manifest) = - emit_project(&out_dir, &runtime_rel, &program, &manifest).map_err(PipelineError::Emit)?; + emit_project(&out_dir, &runtime_rel, &program, &manifest, &memory_sources) + .map_err(PipelineError::Emit)?; Ok(PipelineReport { out_dir, @@ -154,7 +166,95 @@ fn looks_like_homebrew_module(value: &serde_json::Value) -> bool { .is_some() } -fn translate_module(module: &Module, config: &TitleConfig) -> Result { +fn build_memory_image( + module: &Module, + module_path: &Path, +) -> Result<(Option, Vec), PipelineError> { + if module.segments.is_empty() { + return Ok((None, Vec::new())); + } + + let base_dir = module_path.parent().unwrap_or_else(|| Path::new(".")); + let mut descriptor = MemoryImageDescriptor::empty(); + let mut sources = Vec::new(); + + for (index, segment) in module.segments.iter().enumerate() { + let mut wrote_init = false; + if let Some(init_path) = &segment.init_path { + let source_path = base_dir.join(init_path); + let metadata = fs::metadata(&source_path)?; + let file_size = metadata.len(); + let init_size = segment.init_size.unwrap_or(file_size); + if init_size != file_size { + return Err(PipelineError::Module(format!( + "segment {} init_size {} does not match file size {}", + segment.name, init_size, file_size + ))); + } + if init_size > segment.size { + return Err(PipelineError::Module(format!( + "segment {} init_size {} exceeds segment size {}", + segment.name, init_size, segment.size + ))); + } + let name = sanitize_segment_name(&segment.name, index); + let dest_path = PathBuf::from(format!("segments/{name}.bin")); + descriptor.init_segments.push(MemoryInitSegmentDescriptor { + name: segment.name.clone(), + base: segment.base, + size: init_size, + init_path: dest_path.to_string_lossy().to_string(), + }); + sources.push(MemoryImageSource { + source_path, + dest_path, + }); + wrote_init = true; + + if segment.zero_fill && segment.size > init_size { + descriptor.zero_segments.push(MemoryZeroSegmentDescriptor { + name: format!("{}_bss", segment.name), + base: segment.base + init_size, + size: segment.size - init_size, + }); + } + } + + if segment.zero_fill && !wrote_init { + descriptor.zero_segments.push(MemoryZeroSegmentDescriptor { + name: segment.name.clone(), + base: segment.base, + size: segment.size, + }); + } + } + + if descriptor.is_empty() { + return Ok((None, Vec::new())); + } + + Ok((Some(descriptor), sources)) +} + +fn sanitize_segment_name(name: &str, index: usize) -> String { + let mut sanitized: String = name + .chars() + .map(|ch| match ch { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' => ch, + _ => '-', + }) + .collect(); + if sanitized.trim_matches('-').is_empty() { + sanitized = "segment".to_string(); + } + format!("{sanitized}-{index}") +} + +fn translate_module( + module: &Module, + config: &TitleConfig, + memory_image: Option, +) -> Result { let entry = config.entry.clone(); let mut functions = Vec::new(); for function in &module.functions { @@ -168,6 +268,7 @@ fn translate_module(module: &Module, config: &TitleConfig) -> Result, pub performance_mode: PerformanceMode, pub memory_layout: MemoryLayoutDescriptor, + pub memory_image: Option, } impl RustProgram { diff --git a/crates/recomp-pipeline/tests/pipeline.rs b/crates/recomp-pipeline/tests/pipeline.rs index dc62df8..15af8de 100644 --- a/crates/recomp-pipeline/tests/pipeline.rs +++ b/crates/recomp-pipeline/tests/pipeline.rs @@ -26,14 +26,17 @@ title = "Minimal Sample" entry = "entry" abi_version = "0.1.0" -[memory_layout] -[[memory_layout.regions]] +[runtime] +performance_mode = "handheld" + +[runtime.memory_layout] +[[runtime.memory_layout.regions]] name = "code" base = 0x1000_0000 size = 0x0001_0000 permissions = { read = true, write = false, execute = true } -[[memory_layout.regions]] +[[runtime.memory_layout.regions]] name = "data" base = 0x2000_0000 size = 0x0004_0000 @@ -87,7 +90,7 @@ fn pipeline_emits_project() { let runtime_path = PathBuf::from("../crates/recomp-runtime"); fs::write(&module_path, MODULE_JSON).expect("write module"); - fs::write(&config_path, CONFIG_TOML).expect("write config"); + fs::write(&config_path, CONFIG_TOML_DEFAULT_LAYOUT).expect("write config"); fs::write(&nso_path, b"NSO0").expect("write nso"); let module_hash = sha256_hex(MODULE_JSON.as_bytes()); @@ -127,7 +130,7 @@ fn pipeline_emits_project() { .and_then(|value| value.get("regions")) .and_then(|value| value.as_array()) .expect("memory_layout.regions array"); - assert_eq!(regions.len(), 2); + assert_eq!(regions.len(), 5); assert_eq!( manifest_json .get("manifest_self_hash_basis") @@ -172,7 +175,7 @@ fn pipeline_lowers_load_store_ops() { }"#; fs::write(&module_path, module_json).expect("write module"); - fs::write(&config_path, CONFIG_TOML).expect("write config"); + fs::write(&config_path, CONFIG_TOML_DEFAULT_LAYOUT).expect("write config"); fs::write(&nso_path, b"NSO0").expect("write nso"); let module_hash = sha256_hex(module_json.as_bytes()); @@ -199,6 +202,87 @@ fn pipeline_lowers_load_store_ops() { assert!(main_src.contains("mem_load_u32")); } +#[test] +fn pipeline_emits_memory_image() { + let temp = tempfile::tempdir().expect("tempdir"); + let module_path = temp.path().join("module.json"); + let config_path = temp.path().join("title.toml"); + let provenance_path = temp.path().join("provenance.toml"); + let nso_path = temp.path().join("main.nso"); + let data_path = temp.path().join("data.bin"); + let out_dir = temp.path().join("out"); + let runtime_path = PathBuf::from("../crates/recomp-runtime"); + + let module_json = r#"{ + "arch": "aarch64", + "segments": [ + { + "name": "data", + "base": 4096, + "size": 8, + "permissions": { "read": true, "write": true, "execute": false }, + "init_path": "data.bin", + "init_size": 4, + "zero_fill": true + } + ], + "functions": [ + { "name": "entry", "ops": [ { "op": "ret" } ] } + ] +}"#; + + fs::write(&module_path, module_json).expect("write module"); + fs::write(&config_path, CONFIG_TOML_DEFAULT_LAYOUT).expect("write config"); + fs::write(&nso_path, b"NSO0").expect("write nso"); + fs::write(&data_path, [1u8, 2, 3, 4]).expect("write data"); + + let module_hash = sha256_hex(module_json.as_bytes()); + let nso_hash = sha256_hex(b"NSO0"); + let provenance = format!( + "schema_version = \"1\"\n\n[title]\nname = \"Minimal Sample\"\ntitle_id = \"0100000000000000\"\nversion = \"1.0.0\"\nregion = \"US\"\n\n[collection]\ndevice = \"demo\"\ncollected_at = \"2026-01-30\"\n\n[collection.tool]\nname = \"manual\"\nversion = \"1.0\"\n\n[[inputs]]\npath = \"module.json\"\nformat = \"lifted_json\"\nsha256 = \"{module_hash}\"\nsize = {module_size}\nrole = \"lifted_module\"\n\n[[inputs]]\npath = \"main.nso\"\nformat = \"nso0\"\nsha256 = \"{nso_hash}\"\nsize = 4\nrole = \"main_executable\"\n", + module_hash = module_hash, + module_size = module_json.len() + ); + fs::write(&provenance_path, provenance).expect("write provenance"); + + run_pipeline(PipelineOptions { + module_path, + config_path, + provenance_path, + out_dir: out_dir.clone(), + runtime_path, + }) + .expect("pipeline runs"); + + let segment_path = out_dir.join("segments/data-0.bin"); + assert!(segment_path.exists(), "segment file emitted"); + let segment_bytes = fs::read(&segment_path).expect("read segment file"); + assert_eq!(segment_bytes, [1u8, 2, 3, 4]); + + let manifest = out_dir.join("manifest.json"); + let manifest_src = fs::read_to_string(manifest).expect("read manifest.json"); + let manifest_json: serde_json::Value = + serde_json::from_str(&manifest_src).expect("parse manifest.json"); + let memory_image = manifest_json + .get("memory_image") + .and_then(|value| value.as_object()) + .expect("memory_image object"); + let init_segments = memory_image + .get("init_segments") + .and_then(|value| value.as_array()) + .expect("init_segments array"); + let zero_segments = memory_image + .get("zero_segments") + .and_then(|value| value.as_array()) + .expect("zero_segments array"); + assert_eq!(init_segments.len(), 1); + assert_eq!(zero_segments.len(), 1); + + let main_rs = out_dir.join("src/main.rs"); + let main_src = fs::read_to_string(main_rs).expect("read main.rs"); + assert!(main_src.contains("apply_memory_image")); +} + #[test] fn pipeline_rejects_homebrew_module_json() { let temp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/recomp-runtime/src/lib.rs b/crates/recomp-runtime/src/lib.rs index 4e427b5..b47cfcb 100644 --- a/crates/recomp-runtime/src/lib.rs +++ b/crates/recomp-runtime/src/lib.rs @@ -12,7 +12,8 @@ pub use homebrew::{ pub use memory::{ init_memory, recomp_mem_load_u16, recomp_mem_load_u32, recomp_mem_load_u64, recomp_mem_load_u8, recomp_mem_store_u16, recomp_mem_store_u32, recomp_mem_store_u64, recomp_mem_store_u8, - MemoryLayout, MemoryLayoutError, MemoryPermissions, MemoryRegionSpec, MemoryStatus, + MemoryInitSegment, MemoryLayout, MemoryLayoutError, MemoryPermissions, MemoryRegionSpec, + MemoryStatus, MemoryZeroSegment, }; pub use recomp_gfx::{CommandStream, GraphicsBackend, GraphicsError, StubBackend}; pub use recomp_services::{ @@ -54,6 +55,8 @@ pub enum RuntimeError { MissingLoaderConfigKey { key: LoaderConfigKey }, #[error("runtime manifest serialization failed: {message}")] ManifestSerialize { message: String }, + #[error("io error: {message}")] + Io { message: String }, #[error("memory layout error: {0}")] MemoryLayout(#[from] MemoryLayoutError), #[error("memory access error {status:?} at {address:#x} size {size}")] @@ -82,6 +85,19 @@ pub fn init_default_memory(layout: MemoryLayout) -> RuntimeResult<()> { Ok(()) } +pub fn apply_memory_image( + init_segments: &[MemoryInitSegment], + zero_segments: &[MemoryZeroSegment], +) -> RuntimeResult<()> { + memory::apply_memory_image(init_segments, zero_segments).map_err(|status| { + RuntimeError::MemoryAccess { + status, + address: 0, + size: 0, + } + }) +} + pub fn mem_load_u8(address: u64) -> RuntimeResult { memory::mem_load_u8(address).map_err(|status| RuntimeError::MemoryAccess { status, diff --git a/crates/recomp-runtime/src/memory.rs b/crates/recomp-runtime/src/memory.rs index a5341a7..bfdeee6 100644 --- a/crates/recomp-runtime/src/memory.rs +++ b/crates/recomp-runtime/src/memory.rs @@ -10,7 +10,8 @@ pub enum MemoryStatus { Unmapped = 4, Uninitialized = 5, InvalidOutPtr = 6, - Internal = 7, + SizeMismatch = 7, + Internal = 8, } impl MemoryStatus { @@ -111,6 +112,7 @@ enum AccessKind { Write, #[allow(dead_code)] Execute, + Init, } #[derive(Debug)] @@ -166,7 +168,7 @@ impl RuntimeMemory { } fn load(&self, address: u64, size: usize) -> Result { - let region = self.resolve_region(address, size, AccessKind::Read)?; + let region = self.resolve_region(address, size, AccessKind::Read, true)?; let offset = (address - region.spec.base) as usize; let mut value = 0u64; for i in 0..size { @@ -176,7 +178,7 @@ impl RuntimeMemory { } fn store(&mut self, address: u64, size: usize, value: u64) -> Result<(), MemoryStatus> { - let region = self.resolve_region_mut(address, size, AccessKind::Write)?; + let region = self.resolve_region_mut(address, size, AccessKind::Write, true)?; let offset = (address - region.spec.base) as usize; for i in 0..size { region.data[offset + i] = ((value >> (i * 8)) & 0xFF) as u8; @@ -184,13 +186,40 @@ impl RuntimeMemory { Ok(()) } + fn write_bytes_init( + &mut self, + address: u64, + size: u64, + bytes: &[u8], + ) -> Result<(), MemoryStatus> { + if bytes.len() as u64 != size { + return Err(MemoryStatus::SizeMismatch); + } + let region = self.resolve_region_mut(address, size as usize, AccessKind::Init, false)?; + let offset = (address - region.spec.base) as usize; + let end = offset + bytes.len(); + region.data[offset..end].copy_from_slice(bytes); + Ok(()) + } + + fn zero_fill_init(&mut self, address: u64, size: u64) -> Result<(), MemoryStatus> { + let region = self.resolve_region_mut(address, size as usize, AccessKind::Init, false)?; + let offset = (address - region.spec.base) as usize; + let end = offset + size as usize; + for value in &mut region.data[offset..end] { + *value = 0; + } + Ok(()) + } + fn resolve_region( &self, address: u64, size: usize, access: AccessKind, + require_alignment: bool, ) -> Result<&MemoryRegion, MemoryStatus> { - let index = self.resolve_region_inner(address, size, access)?; + let index = self.resolve_region_inner(address, size, access, require_alignment)?; Ok(&self.regions[index]) } @@ -199,8 +228,9 @@ impl RuntimeMemory { address: u64, size: usize, access: AccessKind, + require_alignment: bool, ) -> Result<&mut MemoryRegion, MemoryStatus> { - let index = self.resolve_region_inner(address, size, access)?; + let index = self.resolve_region_inner(address, size, access, require_alignment)?; Ok(&mut self.regions[index]) } @@ -209,12 +239,13 @@ impl RuntimeMemory { address: u64, size: usize, access: AccessKind, + require_alignment: bool, ) -> Result { if size == 0 { return Err(MemoryStatus::Internal); } let size_u64 = size as u64; - if address % size_u64 != 0 { + if require_alignment && address % size_u64 != 0 { return Err(MemoryStatus::Unaligned); } let end = address @@ -247,6 +278,7 @@ impl RuntimeMemory { AccessKind::Read => region.spec.permissions.allows_read(), AccessKind::Write => region.spec.permissions.allows_write(), AccessKind::Execute => region.spec.permissions.allows_execute(), + AccessKind::Init => true, } } } @@ -259,6 +291,62 @@ pub fn init_memory(layout: MemoryLayout) -> Result<(), MemoryLayoutError> { Ok(()) } +#[derive(Debug, Clone)] +pub struct MemoryInitSegment { + pub name: String, + pub base: u64, + pub size: u64, + pub bytes: Vec, +} + +impl MemoryInitSegment { + pub fn new(name: impl Into, base: u64, size: u64, bytes: Vec) -> Self { + Self { + name: name.into(), + base, + size, + bytes, + } + } +} + +#[derive(Debug, Clone)] +pub struct MemoryZeroSegment { + pub name: String, + pub base: u64, + pub size: u64, +} + +impl MemoryZeroSegment { + pub fn new(name: impl Into, base: u64, size: u64) -> Self { + Self { + name: name.into(), + base, + size, + } + } +} + +pub fn apply_memory_image( + init_segments: &[MemoryInitSegment], + zero_segments: &[MemoryZeroSegment], +) -> Result<(), MemoryStatus> { + let status = with_memory_mut(|memory| { + for segment in init_segments { + memory.write_bytes_init(segment.base, segment.size, &segment.bytes)?; + } + for segment in zero_segments { + memory.zero_fill_init(segment.base, segment.size)?; + } + Ok(()) + }); + if status.is_ok() { + Ok(()) + } else { + Err(status) + } +} + fn with_memory_mut(mut f: F) -> MemoryStatus where F: FnMut(&mut RuntimeMemory) -> Result<(), MemoryStatus>, @@ -426,4 +514,17 @@ mod tests { let err = mem_store_u8(0x2000, 1).unwrap_err(); assert_eq!(err, MemoryStatus::PermissionDenied); } + + #[test] + fn apply_memory_image_writes_and_zeros() { + init_memory(test_layout()).expect("init memory"); + let init = MemoryInitSegment::new("data", 0x1000, 4, vec![1, 2, 3, 4]); + let zero = MemoryZeroSegment::new("bss", 0x1004, 4); + apply_memory_image(&[init], &[zero]).expect("apply image"); + + let value = mem_load_u32(0x1000).expect("load"); + assert_eq!(value, 0x04030201); + let zeroed = mem_load_u32(0x1004).expect("load zero"); + assert_eq!(zeroed, 0); + } } diff --git a/specs/README.md b/specs/README.md index aea077a..bf0f8eb 100644 --- a/specs/README.md +++ b/specs/README.md @@ -9,6 +9,8 @@ This folder contains the project specs for the Switch static recompilation prese - SPEC-030-RECOMP-PIPELINE.md - SPEC-040-RUNTIME-ABI.md - SPEC-045-RUNTIME-MEMORY.md +- SPEC-046-RUNTIME-MEMORY-CONFIG.md +- SPEC-047-MEMORY-IMAGE-INIT.md - SPEC-050-CPU-ISA.md - SPEC-060-GPU-GRAPHICS.md - SPEC-070-OS-SERVICES.md diff --git a/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md b/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md new file mode 100644 index 0000000..002072e --- /dev/null +++ b/specs/SPEC-046-RUNTIME-MEMORY-CONFIG.md @@ -0,0 +1,53 @@ +# SPEC-046: Runtime Memory Layout Configuration + +## Status +Draft v0.1 + +## Purpose +Allow runtime memory layout to be configured via `title.toml`, with safe defaults when omitted. + +## Goals +- Extend `title.toml` to define memory regions and permissions. +- Keep existing default layout when no config is provided. +- Validate region definitions deterministically. +- Emit configured layout in build metadata. + +## Non-Goals +- Per-title MMU or virtual memory modeling. +- Dynamic resizing or runtime reconfiguration. + +## Background +The current runtime memory layout is hardcoded in the pipeline. A config-driven layout enables per-title tuning while preserving deterministic output. + +## Requirements +- `title.toml` supports a `runtime.memory_layout` section with explicit regions. +- Each region defines: `name`, `base`, `size`, and `permissions` (read/write/execute). +- Regions must be non-overlapping, non-zero size, and not overflow `u64`. +- If no memory layout is provided, the pipeline uses the existing minimal default. +- The build manifest records the configured memory layout. + +## Interfaces and Data +- TOML schema: + - `[runtime.memory_layout]` contains an array of `[[runtime.memory_layout.regions]]`. + - Each region uses `{ name, base, size, permissions = { read, write, execute } }`. +- Validation errors are surfaced during pipeline parsing. + +## Deliverables +- Config parser extensions for memory layout. +- Deterministic validation and defaulting logic. +- Updated manifest emission and generated runtime initialization. +- Tests covering default layout and custom layout parsing. + +## Open Questions +- Should region permissions support compact string notation (e.g., "rwx") in addition to booleans? + +## Acceptance Criteria +- Custom memory layout in `title.toml` is parsed and emitted in `manifest.json`. +- Invalid layouts (overlaps, zero size) fail the pipeline with clear errors. +- Default behavior is unchanged when no layout is specified. + +## Risks +- User-provided layouts may diverge from actual module segment expectations if not validated together. + +## References +- SPEC-045 Runtime Memory Model and Load/Store Lowering diff --git a/specs/SPEC-047-MEMORY-IMAGE-INIT.md b/specs/SPEC-047-MEMORY-IMAGE-INIT.md new file mode 100644 index 0000000..e93c7c5 --- /dev/null +++ b/specs/SPEC-047-MEMORY-IMAGE-INIT.md @@ -0,0 +1,54 @@ +# SPEC-047: Memory Image Initialization From Module Segments + +## Status +Draft v0.1 + +## Purpose +Populate runtime memory regions with initial data derived from module segments (code/rodata/data/bss) so lifted output can execute meaningful memory-backed logic. + +## Goals +- Define how segment metadata and initial bytes are captured in module inputs. +- Populate runtime memory regions at startup with code/rodata/data images and zero-initialized bss. +- Keep asset separation explicit; no proprietary bytes are embedded in specs or tests. + +## Non-Goals +- Full relocation or dynamic loader behavior. +- Complete NSO/NRO loader coverage beyond minimal segment mapping. + +## Background +The runtime memory model currently initializes regions as zeroed buffers. To execute non-trivial lifted code, the runtime must load initial segment bytes into memory based on module metadata. + +## Requirements +- The pipeline records module segment descriptors with: + - `name`, `base`, `size`, `permissions`. + - `init_path` and `init_size` for segments with initial bytes (code/rodata/data). + - `bss_size` or `zeroed = true` for bss regions. +- Runtime initialization loads initial bytes into their mapped regions and zero-fills bss. +- Initialization validates that init data fits within the target region. +- Initialization errors are surfaced deterministically (bad size, out of bounds, overlap). + +## Interfaces and Data +- Module metadata carries segment descriptors (either in `module.json` or a sidecar manifest referenced by it). +- The build manifest records the segment descriptors and initial image paths. +- Runtime exposes an initialization helper that accepts descriptors plus byte slices for each init segment. + +## Deliverables +- Segment descriptor data model. +- Pipeline support to emit segment descriptors and copy initial segment bytes into output metadata or assets. +- Runtime memory initialization logic with validation and tests. + +## Open Questions +- Should init bytes be embedded as separate files in output or packed into a single image blob? + +## Acceptance Criteria +- A sample module with code/data init bytes executes a load/store path against initialized memory. +- BSS regions are zeroed deterministically. +- Invalid init sizes or region mismatches fail with clear errors. + +## Risks +- Early segment mapping decisions may need to be revisited when loader/relocation support expands. + +## References +- SPEC-045 Runtime Memory Model and Load/Store Lowering +- SPEC-120 Homebrew Intake +- SPEC-130 Homebrew Module Extraction From ddab1351af3fc6b3d902cb4732b396c1d676fa9f Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 16:37:43 -0800 Subject: [PATCH 6/7] Add memory image sample and recompilation flow doc --- docs/exploratory-pipeline.md | 3 + docs/static-recompilation-flow.md | 113 +++++++++++++++++++++++++++ samples/memory-image/README.md | 26 ++++++ samples/memory-image/data.bin | 1 + samples/memory-image/module.json | 26 ++++++ samples/memory-image/provenance.toml | 22 ++++++ samples/memory-image/title.toml | 13 +++ 7 files changed, 204 insertions(+) create mode 100644 docs/static-recompilation-flow.md create mode 100644 samples/memory-image/README.md create mode 100644 samples/memory-image/data.bin create mode 100644 samples/memory-image/module.json create mode 100644 samples/memory-image/provenance.toml create mode 100644 samples/memory-image/title.toml diff --git a/docs/exploratory-pipeline.md b/docs/exploratory-pipeline.md index 2c021b8..4568ff5 100644 --- a/docs/exploratory-pipeline.md +++ b/docs/exploratory-pipeline.md @@ -44,3 +44,6 @@ performance_mode = "handheld" - Add a real input parser for Switch binaries. - Expand the lifter to cover more AArch64 instructions and control flow. - Expand runtime services and ABI validation. + +## Further Reading +- `docs/static-recompilation-flow.md` for a hypothetical end-to-end flow and verification plan. diff --git a/docs/static-recompilation-flow.md b/docs/static-recompilation-flow.md new file mode 100644 index 0000000..4c60ffb --- /dev/null +++ b/docs/static-recompilation-flow.md @@ -0,0 +1,113 @@ +# Hypothetical Static Recompilation Flow (macOS) + +This document describes a hypothetical end-to-end static recompilation flow for +preservation work. It is not a recipe for distributing proprietary assets, and +assumes lawful access to original data. The steps call out where the current +repo provides scaffolding versus where future tooling is required. + +## Scope And Assumptions +- Inputs are legally obtained by the operator and remain separated from outputs. +- The pipeline emits a macOS-hosted, statically recompiled binary. +- Many steps are exploratory and require additional implementation work. + +## End-To-End Flow +1. Legal acquisition and provenance capture. +2. Input ingest and validation. +3. Segment extraction and labeling. +4. Lifting to a stable, config-driven IR. +5. Configuration of runtime, memory layout, and stubs. +6. Emission of a recompiled Rust project. +7. macOS build and packaging. +8. Verification pipeline. + +## Step Details + +1. Legal acquisition and provenance capture. +- Acquire inputs from lawful sources. +- Record provenance metadata: title, version, region, device, collection tool, + and cryptographic hashes for each input. +- Maintain strict separation between code outputs and proprietary data. + +2. Input ingest and validation. +- Use the intake tooling to detect and validate formats (NCA, NSO, NRO, NPDM). +- Store extracted data under a clean, deterministic directory layout. +- Reject inputs that fail hash or format validation. + +3. Segment extraction and labeling. +- Extract executable segments (text, rodata, data, bss) and record: + - Base address, size, permissions. + - Paths to initial segment bytes where applicable. +- Emit a `module.json` (or sidecar manifest) describing segments and metadata. + +4. Lifting to a stable IR. +- Lift instructions into a deterministic IR (current repo uses a JSON module + with ops and blocks as a stand-in). +- Preserve control-flow and data-flow metadata as needed for correctness. +- Record unsupported instructions or gaps for later coverage expansion. + +5. Configuration of runtime and memory. +- Write `title.toml` to define: + - Entry function. + - Stub behaviors for syscalls. + - Runtime mode and memory layout. +- Define a memory layout that covers the extracted segments. +- Ensure layout is validated for overlap, size, and overflow errors. + +6. Emission of a recompiled Rust project. +- Run the pipeline to generate: + - `src/main.rs` with runtime initialization. + - `manifest.json` and build metadata. + - Segment blob copies used for memory initialization. +- Confirm the emitted manifest captures input hashes and generated file hashes. + +7. macOS build and packaging. +- Build the emitted project for the intended macOS target: + - `cargo build --release --target aarch64-apple-darwin` + - or `cargo build --release --target x86_64-apple-darwin` +- Package outputs so that proprietary assets remain external. +- Document required runtime assets separately from the binary output. + +## Suggested Verification Pipeline + +### 1. Data Integrity And Reproducibility +- Verify that `manifest.json` hashes match on-disk outputs. +- Re-run the pipeline and confirm identical manifests for identical inputs. +- Store a signed build manifest for each verification run. + +### 2. Functional Correctness Against Original Data +- Capture a reference trace from the original execution environment. +- Normalize the trace into deterministic events: + - CPU-visible register snapshots at stable sync points. + - Syscall events and return values. + - Memory checksums at segment boundaries. +- Run the recompiled output with equivalent inputs and compare traces. +- Flag and investigate divergences with minimal, reproducible test cases. + +### 3. Video-Based Validation (Public Gameplay Videos) +This step is a heuristic and should be used only for coarse validation. + +- Select a publicly available video with clear, stable footage. +- Capture gameplay from the recompiled output using consistent settings. +- Align the two videos using audio fingerprints or a known sync marker. +- Compute a per-frame perceptual hash or structural similarity score. +- Track divergence windows and correlate with in-game events. +- Treat this as supporting evidence, not proof of correctness. + +### 4. Combined Acceptance Checks +- Require all deterministic trace checks to pass for critical paths. +- Use video-based checks to detect gross mismatches or timing drift. +- Record all verification artifacts alongside the build manifest: + - Trace logs. + - Frame hash summaries. + - Verification report and environment metadata. + +## Outputs And Artifacts +- `manifest.json` and `bundle-manifest.json` for integrity tracking. +- A deterministic verification report (JSON + human-readable summary). +- A reproducible command log for each verification run. + +## Notes +- This flow is intentionally conservative to preserve legal compliance and + reproducibility. +- Do not embed or distribute proprietary assets as part of the recompiled + output. diff --git a/samples/memory-image/README.md b/samples/memory-image/README.md new file mode 100644 index 0000000..c0953ff --- /dev/null +++ b/samples/memory-image/README.md @@ -0,0 +1,26 @@ +# Memory Image Sample + +This sample demonstrates the memory image initialization flow by pairing a lifted +`module.json` with an initial data segment blob. + +## Files +- `module.json`: Declares a data segment that is initialized from `data.bin` and + zero-fills the remaining bytes. +- `data.bin`: Four bytes of initial data (0x01 0x02 0x03 0x04). +- `title.toml`: Configures a memory layout that maps the data segment region. +- `provenance.toml`: Minimal provenance metadata for the sample module. + +## How To Run +From the repo root: + +```bash +cargo run -p recomp-cli -- run \ + --module samples/memory-image/module.json \ + --config samples/memory-image/title.toml \ + --provenance samples/memory-image/provenance.toml \ + --out-dir out/memory-image +``` + +The emitted `manifest.json` will include a `memory_image` section describing the +segment blob, and the generated `main.rs` will apply the memory image before +calling `entry`. diff --git a/samples/memory-image/data.bin b/samples/memory-image/data.bin new file mode 100644 index 0000000..82090ee --- /dev/null +++ b/samples/memory-image/data.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/samples/memory-image/module.json b/samples/memory-image/module.json new file mode 100644 index 0000000..e718c71 --- /dev/null +++ b/samples/memory-image/module.json @@ -0,0 +1,26 @@ +{ + "arch": "aarch64", + "segments": [ + { + "name": "data", + "base": 4096, + "size": 8, + "permissions": { "read": true, "write": true, "execute": false }, + "init_path": "data.bin", + "init_size": 4, + "zero_fill": true + } + ], + "functions": [ + { + "name": "entry", + "ops": [ + { "op": "const_i64", "dst": "x0", "imm": 4096 }, + { "op": "load_i32", "dst": "x1", "addr": "x0", "offset": 0 }, + { "op": "add_i64", "dst": "x2", "lhs": "x1", "rhs": "x1" }, + { "op": "store_i32", "src": "x2", "addr": "x0", "offset": 4 }, + { "op": "ret" } + ] + } + ] +} diff --git a/samples/memory-image/provenance.toml b/samples/memory-image/provenance.toml new file mode 100644 index 0000000..f0bd7db --- /dev/null +++ b/samples/memory-image/provenance.toml @@ -0,0 +1,22 @@ +schema_version = "1" + +[title] +name = "Memory Image Sample" +title_id = "0100000000000000" +version = "1.0.0" +region = "US" + +[collection] +device = "demo" +collected_at = "2026-02-03" + +[collection.tool] +name = "manual" +version = "1.0" + +[[inputs]] +path = "module.json" +format = "lifted_json" +sha256 = "f65421a9c7eb28523992c99ae2a4cb690ff375d599f820ff8a1697f91944f89b" +size = 635 +role = "lifted_module" diff --git a/samples/memory-image/title.toml b/samples/memory-image/title.toml new file mode 100644 index 0000000..dde4cff --- /dev/null +++ b/samples/memory-image/title.toml @@ -0,0 +1,13 @@ +title = "Memory Image Sample" +entry = "entry" +abi_version = "0.1.0" + +[runtime] +performance_mode = "handheld" + +[runtime.memory_layout] +[[runtime.memory_layout.regions]] +name = "data" +base = 0x1000 +size = 0x100 +permissions = { read = true, write = true, execute = false } From dcd6e75727d11bbc391f501483cc4209f27f5db2 Mon Sep 17 00:00:00 2001 From: Brian Gyss Date: Mon, 2 Feb 2026 16:39:02 -0800 Subject: [PATCH 7/7] Document new sample and flow docs --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d0196fa..366d7df 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ Legal and provenance policy: - The dev environment is managed with Nix + devenv. - See `docs/DEVELOPMENT.md` for commands and sample usage. +## Samples and Flow Docs +- `samples/memory-image/` shows the memory image initialization flow (segment blob + lifted module). +- `docs/static-recompilation-flow.md` outlines a hypothetical macOS static recompilation flow and verification pipeline. + ## Back Pressure Hooks These hooks add fast, consistent feedback to keep the repo autonomous and reduce review churn. Hooks are defined in `.pre-commit-config.yaml` and can be run with `prek` (preferred) or `pre-commit`.