diff --git a/crates/perry-api-manifest/src/entries.rs b/crates/perry-api-manifest/src/entries.rs index f41eff4d62..853ce6a2c2 100644 --- a/crates/perry-api-manifest/src/entries.rs +++ b/crates/perry-api-manifest/src/entries.rs @@ -115,6 +115,9 @@ pub const NATIVE_MODULES: &[&str] = &[ "v8", "vm", "process", + // Bare `perry` builtin — embedded-asset introspection (#5731): + // `embeddedFiles`, `readEmbedded`, `isStandaloneExecutable`. + "perry", "perry/tui", "perry/yoga", "perry/ui", @@ -206,6 +209,9 @@ pub const RUNTIME_ONLY_MODULES: &[&str] = &[ "dns", "dns/promises", "process", + // #5731 — `perry` embed API is served entirely from perry-runtime + // (registry + fs interception); no perry-stdlib surface needed. + "perry", "perry/ui", "perry/system", "perry/widget", diff --git a/crates/perry-api-manifest/src/entries/part_1.rs b/crates/perry-api-manifest/src/entries/part_1.rs index de37345640..f675aad765 100644 --- a/crates/perry-api-manifest/src/entries/part_1.rs +++ b/crates/perry-api-manifest/src/entries/part_1.rs @@ -1347,6 +1347,20 @@ pub(crate) const API_MANIFEST_PART_1: &[ApiEntry] = &[ ], TypeSpec::Any, ), + // #5731 — `perry` embedded-asset API for standalone executables. + // `readEmbedded(path)` returns the bytes of an asset baked in via + // `perry compile --embed`; `embeddedFiles` / `isStandaloneExecutable` + // are value (property) exports resolved at read time. + method_sig( + "perry", + "readEmbedded", + false, + None, + &[p_str("path")], + TypeSpec::Buffer, + ), + method_sig("perry", "embeddedFiles", false, None, &[], TypeSpec::Any), + property("perry", "isStandaloneExecutable"), method_sig( "perry/thread", "parallelMap", diff --git a/crates/perry-codegen/src/lower_call/native_table/thread_lodash.rs b/crates/perry-codegen/src/lower_call/native_table/thread_lodash.rs index 097ef74d86..df159c2236 100644 --- a/crates/perry-codegen/src/lower_call/native_table/thread_lodash.rs +++ b/crates/perry-codegen/src/lower_call/native_table/thread_lodash.rs @@ -1,6 +1,30 @@ use super::*; pub(super) const THREAD_LODASH_ROWS: &[NativeModSig] = &[ + // ========== perry (embedded-asset API, #5731) ========== + // `readEmbedded(path)` takes the NaN-boxed path string as-is (decoded + // runtime-side via `decode_path_value`, like the fs APIs) and returns a + // `*mut BufferHeader` (NaN-boxed POINTER → a `Buffer`). + NativeModSig { + module: "perry", + has_receiver: false, + method: "readEmbedded", + class_filter: None, + runtime: "js_perry_read_embedded", + args: &[NA_F64], + ret: NR_PTR, + }, + // `embeddedFiles()` — zero-arg, returns a fresh `*mut ArrayHeader` of + // `{ name, size, type }` objects (NaN-boxed POINTER by NR_PTR). + NativeModSig { + module: "perry", + has_receiver: false, + method: "embeddedFiles", + class_filter: None, + runtime: "js_perry_embedded_files", + args: &[], + ret: NR_PTR, + }, // ========== perry/thread (parallelMap, parallelFilter, spawn) ========== // Runtime expects both args as NaN-boxed f64 values and returns the same // — no unboxing/reboxing needed on either side. Closure is a POINTER_TAG'd diff --git a/crates/perry-codegen/src/runtime_decls/stdlib_ffi/third_party.rs b/crates/perry-codegen/src/runtime_decls/stdlib_ffi/third_party.rs index 5f15542f7a..cb943dd56c 100644 --- a/crates/perry-codegen/src/runtime_decls/stdlib_ffi/third_party.rs +++ b/crates/perry-codegen/src/runtime_decls/stdlib_ffi/third_party.rs @@ -39,6 +39,12 @@ pub(crate) fn declare_third_party(module: &mut LlModule) { module.declare_function("js_ads_banner_destroy", VOID, &[DOUBLE]); module.declare_function("js_ads_request_consent", I64, &[]); + // ========== perry embedded-asset API (#5731) ========== + // readEmbedded(path) → *mut BufferHeader (returned as I64 pointer). + module.declare_function("js_perry_read_embedded", I64, &[DOUBLE]); + // embeddedFiles() → *mut ArrayHeader (returned as I64 pointer). + module.declare_function("js_perry_embedded_files", I64, &[]); + // ========== perry/thread (parallelMap, parallelFilter, spawn) ========== module.declare_function("js_thread_parallel_map", DOUBLE, &[DOUBLE, DOUBLE]); module.declare_function("js_thread_parallel_filter", DOUBLE, &[DOUBLE, DOUBLE]); diff --git a/crates/perry-runtime/src/embedded.rs b/crates/perry-runtime/src/embedded.rs new file mode 100644 index 0000000000..5de15f94b2 --- /dev/null +++ b/crates/perry-runtime/src/embedded.rs @@ -0,0 +1,324 @@ +//! Embedded-asset registry for standalone executables (#5731). +//! +//! `perry compile --embed "./dist/**"` (or `perry.embed` in package.json / +//! `[compile] embed` in perry.toml) bakes the matched files' bytes into the +//! binary. The compiler emits a generated C object whose `constructor` calls +//! [`js_register_embedded_asset`] once per file before `main` runs, populating +//! a process-global registry. The bytes themselves live in the binary's +//! read-only data (static C literals), so the registry only stores +//! `&'static [u8]` slices into them — no copy, no per-asset heap allocation. +//! +//! Three consumers read the registry at runtime: +//! * `fs.readFileSync` / `fs.readFile` — a `$perryfs/...` virtual path (or a +//! bare key that matches an embedded asset) resolves to the embedded bytes +//! instead of touching disk (see `crate::fs::read_file_bytes_with_options`). +//! * `import { embeddedFiles } from "perry"` — [`js_perry_embedded_files`] +//! builds the introspection array (`{ name, size, type }` per asset). +//! * `import { readEmbedded } from "perry"` — [`js_perry_read_embedded`] +//! returns the bytes as a `Buffer`. +//! +//! The global never frees (matching Perry's "embedded data lives for the life of +//! the process" model), mirroring the `crate::shared_sab` registry pattern. + +use std::sync::{Mutex, OnceLock}; + +use crate::object::{js_object_alloc, js_object_set_field_by_name, ObjectHeader}; +use crate::string::js_string_from_bytes; +use crate::value::{js_nanbox_pointer, JSValue, TAG_TRUE}; + +/// Virtual-path prefix that marks a path as an embedded asset, mirroring +/// Bun's `$bunfs/`. `fs` and `readEmbedded` strip it before lookup; the +/// import-attribute lowering hands user code a `$perryfs/` string. +pub const VIRTUAL_PREFIX: &str = "$perryfs/"; + +/// One embedded file. `bytes` points into the binary's read-only data and is +/// valid for the life of the process. +struct EmbeddedAsset { + /// Registry key — the embed-relative path, e.g. `dist/index.html`. + name: String, + bytes: &'static [u8], +} + +static EMBEDDED_ASSETS: OnceLock>> = OnceLock::new(); + +fn registry() -> &'static Mutex> { + EMBEDDED_ASSETS.get_or_init(|| Mutex::new(Vec::new())) +} + +/// Strip the `$perryfs/` prefix (and a single leading `./`) so `$perryfs/x`, +/// `./x`, and `x` all resolve to the same registry key. Backslashes are folded +/// to `/` first so Windows-style inputs (`$perryfs\dist\index.html`, +/// `dist\index.html`) match the always-`/`-joined registry keys. +fn normalize_key(path: &str) -> String { + let unified = path.replace('\\', "/"); + let p = unified.strip_prefix(VIRTUAL_PREFIX).unwrap_or(&unified); + p.strip_prefix("./").unwrap_or(p).to_string() +} + +/// Register an embedded asset. Called once per file from the generated +/// `__attribute__((constructor))` before the runtime starts. Both `name_ptr` +/// and `bytes_ptr` point at static literals in the binary, so the recorded +/// slices are `'static`. +/// +/// # Safety +/// `name_ptr`/`bytes_ptr` must point at valid, immortal byte ranges of the +/// given lengths (they always do — the compiler emits binary `.rodata`). +#[no_mangle] +pub unsafe extern "C" fn js_register_embedded_asset( + name_ptr: *const u8, + name_len: usize, + bytes_ptr: *const u8, + bytes_len: usize, +) { + if name_ptr.is_null() || (bytes_ptr.is_null() && bytes_len != 0) { + return; + } + let name_bytes = std::slice::from_raw_parts(name_ptr, name_len); + let name = String::from_utf8_lossy(name_bytes).into_owned(); + let bytes: &'static [u8] = if bytes_len == 0 { + &[] + } else { + std::slice::from_raw_parts(bytes_ptr, bytes_len) + }; + registry() + .lock() + .unwrap_or_else(|e| e.into_inner()) + .push(EmbeddedAsset { name, bytes }); +} + +/// Look up an embedded asset's bytes by virtual path (`$perryfs/...`) or by its +/// embed-relative key. Returns the `'static` slice into the binary. This is the +/// authoritative presence test — a path is "embedded" iff this returns `Some`. +pub fn lookup(path: &str) -> Option<&'static [u8]> { + let key = normalize_key(path); + let reg = registry().lock().unwrap_or_else(|e| e.into_inner()); + reg.iter().find(|a| a.name == key).map(|a| a.bytes) +} + +/// True if `path` is an embedded-asset *virtual* path (carries the `$perryfs/` +/// prefix), independent of whether it actually resolves. `fs` uses this to treat +/// an unresolved `$perryfs/...` path as missing rather than attempting a real +/// disk read of the literal string. Actual presence is [`lookup`]. +pub fn is_virtual_path(path: &str) -> bool { + path.replace('\\', "/").starts_with(VIRTUAL_PREFIX) +} + +/// Snapshot of `(name, size)` for every embedded asset, in registration order. +fn snapshot() -> Vec<(String, usize)> { + let reg = registry().lock().unwrap_or_else(|e| e.into_inner()); + reg.iter() + .map(|a| (a.name.clone(), a.bytes.len())) + .collect() +} + +/// Best-effort MIME type from a file extension, covering the asset classes a +/// static file server commonly emits. Defaults to `application/octet-stream`. +fn mime_for(name: &str) -> &'static str { + let ext = name.rsplit('.').next().unwrap_or("").to_ascii_lowercase(); + match ext.as_str() { + "html" | "htm" => "text/html; charset=utf-8", + "css" => "text/css; charset=utf-8", + "js" | "mjs" | "cjs" => "text/javascript; charset=utf-8", + "json" | "map" => "application/json; charset=utf-8", + "xml" => "application/xml; charset=utf-8", + "txt" => "text/plain; charset=utf-8", + "csv" => "text/csv; charset=utf-8", + "svg" => "image/svg+xml", + "png" => "image/png", + "jpg" | "jpeg" => "image/jpeg", + "gif" => "image/gif", + "webp" => "image/webp", + "avif" => "image/avif", + "ico" => "image/x-icon", + "bmp" => "image/bmp", + "woff" => "font/woff", + "woff2" => "font/woff2", + "ttf" => "font/ttf", + "otf" => "font/otf", + "eot" => "application/vnd.ms-fontobject", + "wasm" => "application/wasm", + "pdf" => "application/pdf", + "mp4" => "video/mp4", + "webm" => "video/webm", + "mp3" => "audio/mpeg", + "wav" => "audio/wav", + "ogg" => "audio/ogg", + _ => "application/octet-stream", + } +} + +fn string_value(value: &str) -> f64 { + // Must carry STRING_TAG (not POINTER_TAG) so `typeof`/`console.log`/property + // reads see a string, not an opaque object. + let ptr = js_string_from_bytes(value.as_ptr(), value.len() as u32); + f64::from_bits(JSValue::string_ptr(ptr).bits()) +} + +fn set_field(obj: *mut ObjectHeader, name: &str, value: f64) { + let key = js_string_from_bytes(name.as_ptr(), name.len() as u32); + js_object_set_field_by_name(obj, key, value); +} + +/// `import { embeddedFiles } from "perry"`. Returns a fresh array with one +/// `{ name, size, type }` object per embedded asset. Assets are registered (and +/// therefore listed) sorted by their embed-relative path — deterministic across +/// builds, after de-duplication. +/// +/// Exposed as a (zero-arg) function rather than a bare value: member calls on a +/// native-module *value* binding (`embeddedFiles.map(...)`) are lowered as a +/// namespace dispatch (`perry.map`), so a callable that returns a real array — +/// on which normal array methods then dispatch — is the robust shape. Returns +/// the raw `*mut ArrayHeader`; the native dispatch layer NaN-boxes it (NR_PTR). +#[no_mangle] +pub extern "C" fn js_perry_embedded_files() -> *mut crate::array::ArrayHeader { + let assets = snapshot(); + let scope = crate::gc::RuntimeHandleScope::new(); + let arr = crate::array::js_array_alloc_with_length(assets.len() as u32); + let arr_handle = scope.root_raw_mut_ptr(arr); + + for (i, (name, size)) in assets.iter().enumerate() { + // Root the per-asset object across the string allocations below, then + // splice it into the already-rooted array (which makes it reachable). + let obj = js_object_alloc(0, 3); + let obj_handle = scope.root_raw_mut_ptr(obj); + + let name_h = scope.root_nanbox_f64(string_value(name)); + set_field( + obj_handle.get_raw_mut_ptr::(), + "name", + name_h.get_nanbox_f64(), + ); + set_field( + obj_handle.get_raw_mut_ptr::(), + "size", + *size as f64, + ); + let type_h = scope.root_nanbox_f64(string_value(mime_for(name))); + set_field( + obj_handle.get_raw_mut_ptr::(), + "type", + type_h.get_nanbox_f64(), + ); + + let obj_value = js_nanbox_pointer(obj_handle.get_raw_mut_ptr::() as i64); + crate::array::js_array_set_f64( + arr_handle.get_raw_mut_ptr::(), + i as u32, + obj_value, + ); + } + + arr_handle.get_raw_mut_ptr::() +} + +/// `Perry.isStandaloneExecutable`. Any Perry-compiled binary is standalone +/// (there is no interpreter mode at runtime), so this is always `true`. +pub fn is_standalone_executable_value() -> f64 { + f64::from_bits(TAG_TRUE) +} + +/// Throw a catchable `Error` from `readEmbedded`. The native call's return ABI +/// (NR_PTR) NaN-boxes the raw pointer, so a null/garbage return would surface as +/// a bogus object rather than a thrown error — throwing keeps the +/// `readEmbedded(): Buffer` contract honest. +fn throw_embed_error(message: &str) -> ! { + let msg = crate::string::js_string_from_bytes(message.as_ptr(), message.len() as u32); + let err = crate::error::js_error_new_with_message(msg); + crate::exception::js_throw(js_nanbox_pointer(err as i64)) +} + +/// Throw for a `readEmbedded` miss — matches Node's `fs` "not found" semantics. +fn throw_embed_not_found(path: &str) -> ! { + throw_embed_error(&format!("No embedded asset found for path: {path}")) +} + +/// `import { readEmbedded } from "perry"`. Reads an embedded asset by virtual +/// path (`$perryfs/...`) or embed-relative key and returns its bytes as a +/// `Buffer`. Throws an `Error` when the asset is not found. +#[no_mangle] +pub extern "C" fn js_perry_read_embedded(path_value: f64) -> *mut crate::buffer::BufferHeader { + let path = match unsafe { crate::fs::decode_path_value(path_value) } { + Some(p) => p, + None => throw_embed_not_found(""), + }; + let Some(bytes) = lookup(&path) else { + throw_embed_not_found(&path); + }; + // `js_buffer_alloc` takes an i32 length; an asset ≥2 GiB would wrap to a + // negative/garbage size. Reject it explicitly rather than corrupt memory. + if bytes.len() > i32::MAX as usize { + throw_embed_error(&format!( + "Embedded asset too large to read into a Buffer ({} bytes): {path}", + bytes.len() + )); + } + unsafe { + let buf = crate::buffer::js_buffer_alloc(bytes.len() as i32, 0); + if !buf.is_null() { + let buf_data = (buf as *mut u8).add(std::mem::size_of::()); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), buf_data, bytes.len()); + (*buf).length = bytes.len() as u32; + } + buf + } +} + +// Keep the FFI symbols external under the thin-LTO + `strip=true` release +// profile. A `#[no_mangle] pub extern "C"` alone is internalized and +// dead-stripped; only individual `#[used]` typed fn-pointer statics survive +// (see the note in `typed_feedback/trace.rs`). `js_register_embedded_asset` is +// called only from the generated C constructor, and `js_perry_read_embedded` +// only from codegen-emitted callsites — both are invisible to Rust's reachability. +#[rustfmt::skip] +mod keep_embedded { + use super::*; + #[used] static K0: unsafe extern "C" fn(*const u8, usize, *const u8, usize) = js_register_embedded_asset; + #[used] static K1: extern "C" fn(f64) -> *mut crate::buffer::BufferHeader = js_perry_read_embedded; + #[used] static K2: extern "C" fn() -> *mut crate::array::ArrayHeader = js_perry_embedded_files; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_strips_virtual_prefix_and_dot_slash() { + assert_eq!(normalize_key("$perryfs/dist/index.html"), "dist/index.html"); + assert_eq!(normalize_key("./dist/index.html"), "dist/index.html"); + assert_eq!(normalize_key("dist/index.html"), "dist/index.html"); + // Windows-style separators fold to `/` (before and after the prefix). + assert_eq!(normalize_key("dist\\index.html"), "dist/index.html"); + assert_eq!( + normalize_key("$perryfs\\dist\\index.html"), + "dist/index.html" + ); + } + + #[test] + fn register_and_lookup_by_both_paths() { + const NAME: &[u8] = b"embed-test/asset.txt"; + const DATA: &[u8] = b"embedded-bytes"; + unsafe { + js_register_embedded_asset(NAME.as_ptr(), NAME.len(), DATA.as_ptr(), DATA.len()); + } + // Found by bare key, by `$perryfs/` virtual path, and via backslashes. + assert_eq!(lookup("embed-test/asset.txt"), Some(DATA)); + assert_eq!(lookup("$perryfs/embed-test/asset.txt"), Some(DATA)); + assert_eq!(lookup("$perryfs\\embed-test\\asset.txt"), Some(DATA)); + // `is_virtual_path` is a pure prefix test; presence is `lookup`. + assert!(is_virtual_path("$perryfs/anything")); + assert!(!is_virtual_path("not/registered.txt")); + assert!(lookup("not/registered.txt").is_none()); + assert!(lookup("$perryfs/not-registered").is_none()); + } + + #[test] + fn mime_table_covers_common_web_assets() { + assert_eq!(mime_for("index.html"), "text/html; charset=utf-8"); + assert_eq!(mime_for("app.JS"), "text/javascript; charset=utf-8"); + assert_eq!(mime_for("logo.png"), "image/png"); + assert_eq!(mime_for("font.woff2"), "font/woff2"); + assert_eq!(mime_for("data.bin"), "application/octet-stream"); + assert_eq!(mime_for("noext"), "application/octet-stream"); + } +} diff --git a/crates/perry-runtime/src/fs/mod.rs b/crates/perry-runtime/src/fs/mod.rs index 0d63e465bf..7ab6138bb4 100644 --- a/crates/perry-runtime/src/fs/mod.rs +++ b/crates/perry-runtime/src/fs/mod.rs @@ -432,6 +432,18 @@ fn read_file_bytes_with_options(path_value: f64, options_value: f64) -> Option i32 { None => return 0, }; + // #5731 — a registered embedded asset exists for the life of the + // process; an unresolved `$perryfs/...` path does not (and must not + // fall through to a disk check of the literal virtual path). + if crate::embedded::lookup(&path_str).is_some() { + return 1; + } + if crate::embedded::is_virtual_path(&path_str) { + return 0; + } + if Path::new(&path_str).exists() { 1 } else { diff --git a/crates/perry-runtime/src/lib.rs b/crates/perry-runtime/src/lib.rs index e1e203dab0..e392eaa8f9 100644 --- a/crates/perry-runtime/src/lib.rs +++ b/crates/perry-runtime/src/lib.rs @@ -50,6 +50,7 @@ pub mod dgram_reactor; pub mod disposable; pub mod dns; pub mod dns_resolver; +pub mod embedded; pub mod error; pub mod event_pump; pub mod event_target; diff --git a/crates/perry-runtime/src/object/native_module.rs b/crates/perry-runtime/src/object/native_module.rs index ebf26c170a..5f49698e35 100644 --- a/crates/perry-runtime/src/object/native_module.rs +++ b/crates/perry-runtime/src/object/native_module.rs @@ -766,6 +766,13 @@ pub unsafe extern "C" fn js_native_module_property_by_name( }); } + // #5731 — `perry.isStandaloneExecutable` value export (always `true` at + // runtime). `embeddedFiles` / `readEmbedded` are callable exports dispatched + // via the native call table, not value reads. + if module_name == "perry" && property_name == "isStandaloneExecutable" { + return crate::embedded::is_standalone_executable_value(); + } + if module_name == "util" && property_name == "debug" { return bound_native_callable_export_value("util", "debuglog"); } diff --git a/crates/perry/src/commands/compile.rs b/crates/perry/src/commands/compile.rs index 52071916e9..0bc2f54e01 100644 --- a/crates/perry/src/commands/compile.rs +++ b/crates/perry/src/commands/compile.rs @@ -25,6 +25,7 @@ mod cjs_wrap; mod codegen_steps; mod collect_modules; mod compressed_libs; +mod embed; mod env_fold; mod harmonyos_shim; mod host_config; diff --git a/crates/perry/src/commands/compile/build_cache.rs b/crates/perry/src/commands/compile/build_cache.rs index e0ec5012f2..092f58284e 100644 --- a/crates/perry/src/commands/compile/build_cache.rs +++ b/crates/perry/src/commands/compile/build_cache.rs @@ -107,7 +107,7 @@ impl BuildCacheProbe { .join(manifest_name); let eligible = eligibility(args, project_root); Self { - args_key: args_key(args, &output_path), + args_key: args_key(args, &output_path, project_root), manifest_path, output_path, target_name: args.target.clone().unwrap_or_else(|| "native".to_string()), @@ -437,7 +437,7 @@ fn entry_uses_precompile(input: &Path) -> bool { .unwrap_or(true) } -fn args_key(args: &CompileArgs, output_path: &Path) -> String { +fn args_key(args: &CompileArgs, output_path: &Path, project_root: &Path) -> String { let mut hasher = Sha256::new(); hash_field(&mut hasher, "args-debug", &format!("{args:?}")); hash_field(&mut hasher, "input", &absolute_identity(&args.input)); @@ -453,6 +453,51 @@ fn args_key(args: &CompileArgs, output_path: &Path) -> String { "features", args.features.as_deref().unwrap_or(""), ); + // #5731 — fold the resolved embedded-asset set into the key. `{args:?}` + // covers `--embed` patterns but not `perry.embed` / `[compile] embed` + // config nor the files' state, so without this an edit to an embedded file + // (with no pattern change) would reuse a stale cached binary. Key on each + // asset's name + size + mtime rather than re-reading and hashing the full + // contents here — the bytes are already streamed into the binary at embed + // time, and size+mtime is the conventional, cheap freshness signal (a fresh + // checkout bumps mtime → safe rebuild; the only miss is a content change + // that preserves both size and mtime, which real edits don't do). + // + // Fail closed on resolution / per-asset stat failures. `run_pipeline` + // treats `resolve_embedded_assets` as fatal (the `?` at its embed step), so + // the cache must not let a broken `perry.embed` / `[compile] embed` config — + // or a file that vanished or can't be stat'd — silently drop the embed + // inputs and fall back to the non-embed key, which could reuse a stale + // manifest and mask the error. Folding a sentinel field on every error path + // makes the key diverge from any successful build (which never emits these + // field names), so the probe misses, `run_pipeline` re-runs, and the real + // error surfaces instead of a stale binary. + match super::embed::resolve_embedded_assets(&args.embed, project_root) { + Ok(assets) => { + for (name, path) in &assets { + hash_field(&mut hasher, "embed-name", name); + match fs::metadata(path) { + Ok(meta) => { + hash_field(&mut hasher, "embed-size", &meta.len().to_string()); + match meta + .modified() + .ok() + .and_then(|m| m.duration_since(std::time::UNIX_EPOCH).ok()) + { + Some(dur) => hash_field( + &mut hasher, + "embed-mtime", + &format!("{}.{:09}", dur.as_secs(), dur.subsec_nanos()), + ), + None => hash_field(&mut hasher, "embed-mtime-unavailable", name), + } + } + Err(e) => hash_field(&mut hasher, "embed-stat-error", &format!("{name}: {e}")), + } + } + } + Err(e) => hash_field(&mut hasher, "embed-resolve-error", &e.to_string()), + } hex::encode(hasher.finalize()) } diff --git a/crates/perry/src/commands/compile/embed.rs b/crates/perry/src/commands/compile/embed.rs new file mode 100644 index 0000000000..0b6e769a7d --- /dev/null +++ b/crates/perry/src/commands/compile/embed.rs @@ -0,0 +1,512 @@ +//! Embed static assets/files into the standalone executable (#5731). +//! +//! Patterns come from three sources, unioned: the `--embed` CLI flag, +//! `perry.embed` in package.json, and `[compile] embed` in perry.toml. Each +//! pattern is a file, a directory (embedded recursively), or a `*`/`**` glob, +//! resolved relative to the project root. The matched files become +//! `(name, abs_path)` pairs where `name` is the project-root-relative path with +//! `/` separators (e.g. `dist/index.html`) — the runtime registry key and the +//! `$perryfs/` virtual-path suffix. +//! +//! [`generate_embedded_asset_object`] emits a C source whose +//! `__attribute__((constructor))` calls `js_register_embedded_asset` once per +//! file before `main` runs (mirroring the embedded-JS object generator), then +//! compiles it to a `.o` that the caller appends to the link line. Each asset's +//! bytes are pulled straight into read-only data with a module-level `.incbin` +//! assembler directive — the file is *referenced*, not re-encoded, so a 5 MB +//! image costs ~6 lines of source instead of ~20 MB of octal-escaped string +//! literal. This sidesteps both the quadratic-ish cost of compiling a huge +//! single string literal and the per-literal size caps some toolchains impose, +//! and it keeps binary assets byte-exact. The runtime keeps `&'static` slices +//! into the resulting `.rodata` (no copy). +//! +//! Embedding runs the host `cc` over the generated translation unit and links +//! its `.o` into the executable, so it is supported only on Unix-like hosts +//! (macOS/Linux). On Windows Perry links with MSVC `link.exe`, which neither +//! consumes a `cc`-produced object nor supports `__attribute__((constructor))` +//! / `.incbin`; [`generate_embedded_asset_object`] fails loudly there rather +//! than emitting an object that silently won't link. Cross-target / Windows +//! embedding is a tracked follow-up to #5731. + +use anyhow::{anyhow, Result}; +use std::fmt::Write as _; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +/// Collect the embed patterns from the CLI flag plus `perry.embed` +/// (package.json) and `[compile] embed` (perry.toml) under `project_root`, +/// expand them against `project_root`, and return de-duplicated, sorted +/// `(name, absolute_path)` pairs. +pub(super) fn resolve_embedded_assets( + cli_patterns: &[String], + project_root: &Path, +) -> Result> { + let mut patterns: Vec = cli_patterns.to_vec(); + patterns.extend(read_package_json_embed(project_root)); + patterns.extend(read_perry_toml_embed(project_root)); + + let mut assets: Vec<(String, PathBuf)> = Vec::new(); + let mut seen = std::collections::HashSet::new(); + for pattern in &patterns { + for path in expand_pattern(pattern, project_root)? { + let name = match relative_name(&path, project_root) { + Some(n) => n, + None => continue, + }; + if seen.insert(name.clone()) { + assets.push((name, path)); + } + } + } + assets.sort_by(|a, b| a.0.cmp(&b.0)); + Ok(assets) +} + +/// `perry.embed` from `/package.json` (array of strings). +fn read_package_json_embed(project_root: &Path) -> Vec { + let path = project_root.join("package.json"); + let Ok(text) = fs::read_to_string(&path) else { + return Vec::new(); + }; + let Ok(json) = serde_json::from_str::(&text) else { + return Vec::new(); + }; + json.get("perry") + .and_then(|p| p.get("embed")) + .and_then(|e| e.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(str::to_string)) + .collect() + }) + .unwrap_or_default() +} + +/// `[compile] embed` from `/perry.toml` (array of strings). +fn read_perry_toml_embed(project_root: &Path) -> Vec { + let path = project_root.join("perry.toml"); + let Ok(text) = fs::read_to_string(&path) else { + return Vec::new(); + }; + let Ok(toml) = text.parse::() else { + return Vec::new(); + }; + toml.get("compile") + .and_then(|c| c.get("embed")) + .and_then(|e| e.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(str::to_string)) + .collect() + }) + .unwrap_or_default() +} + +/// Expand one pattern (file / directory / glob) into a list of concrete files. +fn expand_pattern(pattern: &str, project_root: &Path) -> Result> { + let trimmed = pattern.trim_start_matches("./"); + let abs = if Path::new(pattern).is_absolute() { + PathBuf::from(pattern) + } else { + project_root.join(trimmed) + }; + + // Plain path (no wildcards): a file embeds itself; a directory embeds all + // files beneath it. + if !has_wildcard(pattern) { + if abs.is_file() { + return Ok(vec![abs]); + } + if abs.is_dir() { + return Ok(walk_files(&abs)); + } + // Missing path — not fatal; skip with no match (a glob that matches + // nothing is also non-fatal). Caller-visible "embedded N files" makes + // an empty result obvious. + return Ok(Vec::new()); + } + + // Glob: split into the longest wildcard-free base dir and the wildcard + // remainder, walk the base, and match each file's relative segments. + let (base, rest) = split_glob_base(trimmed); + let base_dir = project_root.join(&base); + if !base_dir.is_dir() { + return Ok(Vec::new()); + } + let pat_segments: Vec<&str> = rest.split('/').filter(|s| !s.is_empty()).collect(); + let mut out = Vec::new(); + for file in walk_files(&base_dir) { + let Ok(rel) = file.strip_prefix(&base_dir) else { + continue; + }; + let rel_segments: Vec = rel + .components() + .map(|c| c.as_os_str().to_string_lossy().into_owned()) + .collect(); + let rel_refs: Vec<&str> = rel_segments.iter().map(String::as_str).collect(); + if glob_match(&pat_segments, &rel_refs) { + out.push(file); + } + } + Ok(out) +} + +fn has_wildcard(s: &str) -> bool { + s.contains('*') || s.contains('?') +} + +/// Split a glob pattern into `(base_without_wildcards, remainder)`. e.g. +/// `dist/assets/**/*.png` → (`dist/assets`, `**/*.png`). +fn split_glob_base(pattern: &str) -> (String, String) { + let segments: Vec<&str> = pattern.split('/').collect(); + let mut base = Vec::new(); + let mut idx = 0; + while idx < segments.len() && !has_wildcard(segments[idx]) { + base.push(segments[idx]); + idx += 1; + } + let rest: Vec<&str> = segments[idx..].to_vec(); + (base.join("/"), rest.join("/")) +} + +/// Recursively collect all regular files under `dir`, sorted for determinism. +fn walk_files(dir: &Path) -> Vec { + let mut out: Vec = walkdir::WalkDir::new(dir) + .follow_links(false) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .map(|e| e.into_path()) + .collect(); + out.sort(); + out +} + +/// Match glob pattern segments against path segments. `**` matches zero or more +/// path segments; within a segment, `*` matches any run of non-`/` chars and +/// `?` matches a single char. +fn glob_match(pat: &[&str], path: &[&str]) -> bool { + match pat.split_first() { + None => path.is_empty(), + Some((&"**", rest)) => { + // `**` consumes 0..=path.len() leading segments. + (0..=path.len()).any(|i| glob_match(rest, &path[i..])) + } + Some((seg, rest)) => match path.split_first() { + Some((first, prest)) if segment_match(seg, first) => glob_match(rest, prest), + _ => false, + }, + } +} + +/// Match a single path segment against a single pattern segment containing +/// `*` / `?` wildcards. +fn segment_match(pat: &str, text: &str) -> bool { + let p: Vec = pat.chars().collect(); + let t: Vec = text.chars().collect(); + fn rec(p: &[char], t: &[char]) -> bool { + match p.split_first() { + None => t.is_empty(), + Some(('*', prest)) => (0..=t.len()).any(|i| rec(prest, &t[i..])), + Some(('?', prest)) => !t.is_empty() && rec(prest, &t[1..]), + Some((c, prest)) => match t.split_first() { + Some((tc, trest)) if tc == c => rec(prest, trest), + _ => false, + }, + } + } + rec(&p, &t) +} + +/// Project-root-relative name with `/` separators, e.g. `dist/index.html`. +/// +/// Canonicalizes both the candidate and the project root before stripping so a +/// pattern that escapes the root (`--embed ../secret.txt`, or a symlink out of +/// the tree) is rejected (`None`) rather than producing a `../`-laden key. +fn relative_name(path: &Path, project_root: &Path) -> Option { + let root = project_root.canonicalize().ok()?; + let abs = path.canonicalize().ok()?; + let rel = abs.strip_prefix(&root).ok()?; + let name = rel + .components() + .map(|c| c.as_os_str().to_string_lossy()) + .collect::>() + .join("/"); + if name.is_empty() { + None + } else { + Some(name) + } +} + +/// Emit and compile the embedded-asset registration object. Returns `Ok(None)` +/// when there are no assets. The `.o` calls `js_register_embedded_asset` (a +/// perry-runtime symbol) once per file from a startup constructor. +pub(super) fn generate_embedded_asset_object( + assets: &[(String, PathBuf)], + output_dir: &Path, +) -> Result> { + if assets.is_empty() { + return Ok(None); + } + // The generated object is compiled with `cc` and linked into the executable. + // On a Windows host Perry links with MSVC `link.exe`, which is ABI- + // incompatible with a `cc`-produced object (and `cl.exe` supports neither + // `__attribute__((constructor))` nor `.incbin`). Fail loudly here rather + // than emit an object that silently won't link — Windows / cross-target + // embedding is a tracked follow-up to #5731. + if cfg!(windows) { + return Err(anyhow!( + "`--embed` is currently supported only on Unix-like hosts (macOS/Linux); \ + Windows embedding is a follow-up to #5731" + )); + } + + let c_path = output_dir.join("__perry_embedded_assets.c"); + let obj_path = output_dir.join("__perry_embedded_assets.o"); + + // Mach-O prefixes C symbols with `_` and names its read-only-const section + // `__TEXT,__const`; ELF uses the bare symbol and `.rodata`. Perry runs on + // the host it targets here (cross-target embedding is a follow-up), so the + // host `cfg` picks the right pair. + let sym_prefix = if cfg!(target_os = "macos") { "_" } else { "" }; + let rodata_section = if cfg!(target_os = "macos") { + "__TEXT,__const" + } else { + ".rodata" + }; + + let mut c = String::new(); + c.push_str("// Auto-generated by Perry — embedded asset table (#5731).\n"); + c.push_str("// Each asset's bytes are pulled straight into read-only data via a\n"); + c.push_str("// module-level `.incbin` (no source-level byte expansion, so multi-MB\n"); + c.push_str("// binary assets compile in constant time). A startup constructor then\n"); + c.push_str("// registers them into the runtime registry before `main`.\n"); + c.push_str("#include \n\n"); + c.push_str("extern void js_register_embedded_asset(const char *name, size_t name_len, const char *bytes, size_t bytes_len);\n\n"); + + for (idx, (name, path)) in assets.iter().enumerate() { + // Names are tiny — keep them as ASCII-clean C string literals. + let name_lit = c_byte_literal(name.as_bytes()); + writeln!( + c, + "static const char PERRY_ASSET_NAME_{idx}[] = {name_lit};" + ) + .ok(); + writeln!( + c, + "static const size_t PERRY_ASSET_NAME_LEN_{idx} = {};", + name.len() + ) + .ok(); + + // Bytes come from the file itself via `.incbin`, bracketed by a start + // and end label so the C side recovers the length as a link-time + // constant (end − start). `.incbin` needs an unambiguous path, so feed + // it the canonical absolute path. + let abs = path + .canonicalize() + .map_err(|e| anyhow!("failed to resolve embed asset {}: {}", path.display(), e))?; + let start = format!("{sym_prefix}PERRY_ASSET_DATA_{idx}"); + let end = format!("{sym_prefix}PERRY_ASSET_END_{idx}"); + // Assembler-level escape for the path inside `.incbin "..."`; `asm_line` + // adds the C-string-literal escaping on top. + let asm_path = abs + .to_string_lossy() + .replace('\\', "\\\\") + .replace('"', "\\\""); + c.push_str("__asm__(\n"); + c.push_str(&asm_line(&format!(".section {rodata_section}"))); + c.push_str(&asm_line(&format!(".globl {start}"))); + c.push_str(&asm_line(&format!("{start}:"))); + c.push_str(&asm_line(&format!(".incbin \"{asm_path}\""))); + c.push_str(&asm_line(&format!(".globl {end}"))); + c.push_str(&asm_line(&format!("{end}:"))); + c.push_str(");\n"); + writeln!(c, "extern const char PERRY_ASSET_DATA_{idx}[];").ok(); + writeln!(c, "extern const char PERRY_ASSET_END_{idx}[];").ok(); + } + + // Constructor priority 101: runs before `main`'s `js_runtime_init`, so the + // registry is populated by the time any user code or fs read consults it. + c.push_str("__attribute__((constructor(101)))\n"); + c.push_str("static void perry_register_embedded_assets(void) {\n"); + for idx in 0..assets.len() { + writeln!( + c, + " js_register_embedded_asset(PERRY_ASSET_NAME_{idx}, PERRY_ASSET_NAME_LEN_{idx}, PERRY_ASSET_DATA_{idx}, (size_t)(PERRY_ASSET_END_{idx} - PERRY_ASSET_DATA_{idx}));" + ) + .ok(); + } + c.push_str("}\n"); + + fs::write(&c_path, &c)?; + + let status = Command::new("cc") + .arg("-c") + .arg(&c_path) + .arg("-O0") + .arg("-o") + .arg(&obj_path) + .status() + .map_err(|e| anyhow!("failed to invoke cc for embedded assets: {}", e))?; + if !status.success() { + return Err(anyhow!( + "cc failed to compile embedded asset table ({})", + c_path.display() + )); + } + Ok(Some(obj_path)) +} + +/// Render one line of assembler as a C string literal for a module-level +/// `__asm__(...)` block: C-escape `"`/`\`, wrap in quotes, and append the +/// assembler newline (`\n`). e.g. `.globl foo` → ` ".globl foo\n"`. +fn asm_line(line: &str) -> String { + let mut out = String::with_capacity(line.len() + 8); + out.push_str(" \""); + for ch in line.chars() { + match ch { + '\\' => out.push_str("\\\\"), + '"' => out.push_str("\\\""), + _ => out.push(ch), + } + } + out.push_str("\\n\"\n"); + out +} + +/// Render arbitrary bytes as a C string literal. Printable ASCII passes +/// through; quotes/backslash/control bytes and everything ≥0x80 use octal +/// escapes, keeping the generated source ASCII-clean and binary-safe (the +/// length is tracked separately, so embedded NULs are fine). Mirrors the +/// `c_string_literal` helper in `targets.rs`. +fn c_byte_literal(bytes: &[u8]) -> String { + let mut out = String::with_capacity(bytes.len() + 2); + out.push('"'); + for &b in bytes { + match b { + b'"' => out.push_str("\\\""), + b'\\' => out.push_str("\\\\"), + b'\n' => out.push_str("\\n"), + b'\r' => out.push_str("\\r"), + b'\t' => out.push_str("\\t"), + b'?' => out.push_str("\\?"), + 0x20..=0x7E => out.push(b as char), + _ => { + let _ = write!(out, "\\{:03o}", b); + } + } + } + out.push('"'); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + fn seg(s: &str) -> Vec<&str> { + s.split('/').filter(|x| !x.is_empty()).collect() + } + + #[test] + fn double_star_matches_any_depth() { + assert!(glob_match(&seg("**"), &seg("index.html"))); + assert!(glob_match(&seg("**"), &seg("assets/app.js"))); + assert!(glob_match(&seg("**"), &seg("a/b/c/d.png"))); + assert!(glob_match(&seg("**"), &[])); // ** matches zero segments + } + + #[test] + fn double_star_with_extension_filter() { + let p = seg("**/*.png"); + assert!(glob_match(&p, &seg("logo.png"))); + assert!(glob_match(&p, &seg("assets/logo.png"))); + assert!(glob_match(&p, &seg("a/b/logo.png"))); + assert!(!glob_match(&p, &seg("logo.jpg"))); + assert!(!glob_match(&p, &seg("assets/app.js"))); + } + + #[test] + fn single_star_is_one_segment_only() { + let p = seg("*.css"); + assert!(glob_match(&p, &seg("main.css"))); + assert!(!glob_match(&p, &seg("nested/main.css"))); + } + + #[test] + fn question_mark_matches_single_char() { + assert!(segment_match("a?c", "abc")); + assert!(!segment_match("a?c", "ac")); + assert!(!segment_match("a?c", "abbc")); + } + + #[test] + fn split_glob_base_separates_static_prefix() { + assert_eq!( + split_glob_base("dist/assets/**/*.png"), + ("dist/assets".to_string(), "**/*.png".to_string()) + ); + assert_eq!( + split_glob_base("dist/**"), + ("dist".to_string(), "**".to_string()) + ); + assert_eq!( + split_glob_base("*.html"), + (String::new(), "*.html".to_string()) + ); + } + + #[test] + fn expand_directory_and_glob_resolve_relative_names() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + fs::create_dir_all(root.join("dist/assets")).unwrap(); + fs::write(root.join("dist/index.html"), b"").unwrap(); + fs::write(root.join("dist/assets/app.js"), b"1").unwrap(); + fs::write(root.join("dist/assets/logo.png"), b"PNG").unwrap(); + + // Whole directory. + let all = resolve_embedded_assets(&["./dist".into()], root).unwrap(); + let names: Vec<&str> = all.iter().map(|(n, _)| n.as_str()).collect(); + assert_eq!( + names, + vec![ + "dist/assets/app.js", + "dist/assets/logo.png", + "dist/index.html" + ] + ); + + // Glob with extension filter. + let pngs = resolve_embedded_assets(&["./dist/**/*.png".into()], root).unwrap(); + let names: Vec<&str> = pngs.iter().map(|(n, _)| n.as_str()).collect(); + assert_eq!(names, vec!["dist/assets/logo.png"]); + + // De-dup across overlapping patterns. + let merged = + resolve_embedded_assets(&["./dist/**".into(), "./dist/index.html".into()], root) + .unwrap(); + assert_eq!(merged.len(), 3); + } + + #[test] + fn asm_line_escapes_and_appends_newline() { + assert_eq!(asm_line(".globl foo"), " \".globl foo\\n\"\n"); + // A path with a quote/backslash gets C-escaped (one more layer on top + // of the assembler-level escaping the caller already applied). + assert_eq!( + asm_line(r#".incbin "a\b""#), + " \".incbin \\\"a\\\\b\\\"\\n\"\n" + ); + } + + #[test] + fn binary_literal_is_ascii_clean_and_escapes_specials() { + let lit = c_byte_literal(&[0x00, b'"', b'\\', 0x41, 0xFF]); + assert_eq!(lit, "\"\\000\\\"\\\\A\\377\""); + assert!(lit.is_ascii()); + } +} diff --git a/crates/perry/src/commands/compile/run_pipeline.rs b/crates/perry/src/commands/compile/run_pipeline.rs index f72617a9a4..83c8344e16 100644 --- a/crates/perry/src/commands/compile/run_pipeline.rs +++ b/crates/perry/src/commands/compile/run_pipeline.rs @@ -4926,6 +4926,32 @@ pub fn run_with_parse_cache( }); } + // #5731 — embed static assets into the binary. Merge `--embed` with + // `perry.embed` / `[compile] embed`, expand globs/directories relative to + // the project root, and emit a registration object linked alongside the + // user objects. The runtime serves these via `perry` / node:fs at runtime. + let embedded_assets = embed::resolve_embedded_assets(&args.embed, &project_root)?; + if !embedded_assets.is_empty() { + if let Some(obj) = + embed::generate_embedded_asset_object(&embedded_assets, &object_output_dir)? + { + obj_cleanup_paths.push(obj.clone()); + obj_paths.push(obj); + obj_fingerprints.push(None); + } + let total: u64 = embedded_assets + .iter() + .filter_map(|(_, p)| fs::metadata(p).ok().map(|m| m.len())) + .sum(); + if let OutputFormat::Text = format { + println!( + "Embedding {} asset(s) ({} bytes) into the executable", + embedded_assets.len(), + total + ); + } + } + match format { OutputFormat::Text => { if ctx.needs_stdlib { diff --git a/crates/perry/src/commands/compile/types.rs b/crates/perry/src/commands/compile/types.rs index 739039a2d4..dda3071457 100644 --- a/crates/perry/src/commands/compile/types.rs +++ b/crates/perry/src/commands/compile/types.rs @@ -138,6 +138,16 @@ pub struct CompileArgs { #[arg(long)] pub bundle_extensions: Option, + /// Embed static assets/files into the standalone executable (#5731). + /// Accepts files, directories, or `**`/`*`-style glob patterns relative to + /// the project root, e.g. `--embed "./dist/**" --embed ./logo.png`. The + /// matched bytes are baked into the binary; at runtime they are readable + /// via `import { embeddedFiles, readEmbedded } from "perry"` and through + /// `node:fs` at their `$perryfs/` virtual path. Merged with + /// `perry.embed` (package.json) / `[compile] embed` (perry.toml). Repeatable. + #[arg(long)] + pub embed: Vec, + /// Enable type checking via tsgo (Microsoft's native TypeScript checker). /// Resolves cross-file types, interfaces, and generics for better optimization. /// Requires: npm install -g @typescript/native-preview @@ -527,6 +537,13 @@ pub struct CompilationContext { pub package_aliases: HashMap, /// Packages to compile natively instead of routing to V8 (from perry.compilePackages) pub compile_packages: HashSet, + /// #5731 — assets to embed into the standalone executable, as + /// `(embed-relative name, absolute source path)` pairs. Populated by + /// merging the `--embed` flag with `perry.embed` / `[compile] embed` and + /// expanding globs/directories. The embed-relative name (e.g. + /// `dist/index.html`) is the runtime registry key and `$perryfs/` virtual + /// path suffix. + pub embedded_assets: Vec<(String, PathBuf)>, /// #1681 (Phase 3 of #1677): true when this is the build-time capture /// stage (the `current_exe` subprocess), so `precompile(EXPR)` sites /// emit their build-time value instead of substituting. Re-installed on @@ -954,6 +971,7 @@ impl CompilationContext { native_libraries: Vec::new(), package_aliases: HashMap::new(), compile_packages: HashSet::new(), + embedded_assets: Vec::new(), precompile_capture: false, precompile_results: HashMap::new(), fast_math: false, diff --git a/crates/perry/src/commands/dev.rs b/crates/perry/src/commands/dev.rs index d6641ff287..442860937d 100644 --- a/crates/perry/src/commands/dev.rs +++ b/crates/perry/src/commands/dev.rs @@ -293,6 +293,7 @@ fn build_once( app_bundle_id: None, output_type: "executable".to_string(), bundle_extensions: None, + embed: Vec::new(), type_check: false, minify: false, features: None, diff --git a/crates/perry/src/commands/run/mod.rs b/crates/perry/src/commands/run/mod.rs index f0f917a455..3b87a26a37 100644 --- a/crates/perry/src/commands/run/mod.rs +++ b/crates/perry/src/commands/run/mod.rs @@ -205,6 +205,7 @@ pub fn run(args: RunArgs, format: OutputFormat, use_color: bool, verbose: u8) -> app_bundle_id: Some(bundle_id), output_type: "executable".to_string(), bundle_extensions: None, + embed: Vec::new(), type_check: args.type_check, minify: target.as_deref() == Some("web"), features: None, diff --git a/crates/perry/tests/issue_5731_embedded_assets.rs b/crates/perry/tests/issue_5731_embedded_assets.rs new file mode 100644 index 0000000000..7b2874a949 --- /dev/null +++ b/crates/perry/tests/issue_5731_embedded_assets.rs @@ -0,0 +1,96 @@ +//! #5731 — embed static assets/files into standalone executables. +//! +//! `perry compile --embed "./dist/**"` bakes matched files into the binary. +//! At runtime they are reachable three ways, all exercised here: +//! * `import { embeddedFiles } from "perry"` — `{ name, size, type }` per asset +//! * `import { readEmbedded } from "perry"` — bytes as a `Buffer` +//! * `node:fs` (`readFileSync` / `existsSync`) via the `$perryfs/` path +//! plus `isStandaloneExecutable` (always `true` in a compiled binary). +//! +//! Asset embedding is host-only (Unix-like): it compiles a `cc` object that +//! MSVC `link.exe` can't consume, so the feature errors on a Windows host and +//! this end-to-end test is skipped there. +#![cfg(not(windows))] + +use std::path::PathBuf; +use std::process::Command; + +fn perry_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_perry")) +} + +#[test] +fn embeds_assets_and_reads_them_back() { + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + std::fs::create_dir_all(root.join("dist/assets")).expect("mkdir dist/assets"); + std::fs::write(root.join("dist/index.html"), b"HELLO_EMBED").expect("write index.html"); + std::fs::write(root.join("dist/assets/app.js"), b"console.log(1)").expect("write app.js"); + + let entry = root.join("main.ts"); + std::fs::write( + &entry, + r#" +import { embeddedFiles, readEmbedded, isStandaloneExecutable } from "perry"; +import * as fs from "fs"; + +console.log("standalone:", isStandaloneExecutable); +const files = embeddedFiles(); +console.log("count:", files.length); +console.log("names:", files.map(f => f.name).sort().join(",")); +console.log("readEmbedded:", readEmbedded("dist/index.html").toString()); +console.log("viaFs:", fs.readFileSync("$perryfs/dist/index.html", "utf8")); +const html = files.find(f => f.name === "dist/index.html"); +console.log("type:", html.type, "size:", html.size); +console.log("exists:", fs.existsSync("$perryfs/dist/assets/app.js")); +console.log("existsMissing:", fs.existsSync("$perryfs/nope.txt")); +try { readEmbedded("nope.txt"); console.log("throwMissing: no"); } +catch (e) { console.log("throwMissing: yes"); } +"#, + ) + .expect("write entry"); + + let output = root.join("app"); + let compile = Command::new(perry_bin()) + .current_dir(root) + .arg("compile") + .arg(&entry) + .arg("--embed") + .arg("./dist/**") + .arg("-o") + .arg(&output) + .output() + .expect("run perry compile"); + assert!( + compile.status.success(), + "perry compile failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&compile.stdout), + String::from_utf8_lossy(&compile.stderr) + ); + + let run = Command::new(&output) + .current_dir(root) + .output() + .expect("run compiled binary"); + assert!( + run.status.success(), + "compiled binary failed\nstatus: {:?}\nstdout:\n{}\nstderr:\n{}", + run.status, + String::from_utf8_lossy(&run.stdout), + String::from_utf8_lossy(&run.stderr) + ); + let stdout = String::from_utf8_lossy(&run.stdout); + assert_eq!( + stdout, + "standalone: true\n\ + count: 2\n\ + names: dist/assets/app.js,dist/index.html\n\ + readEmbedded: HELLO_EMBED\n\ + viaFs: HELLO_EMBED\n\ + type: text/html; charset=utf-8 size: 11\n\ + exists: true\n\ + existsMissing: false\n\ + throwMissing: yes\n", + "unexpected runtime output" + ); +} diff --git a/docs/api/perry.d.ts b/docs/api/perry.d.ts index 78b0ac1ab8..cf1e2dd8d0 100644 --- a/docs/api/perry.d.ts +++ b/docs/api/perry.d.ts @@ -1,6 +1,6 @@ // Auto-generated from Perry's API manifest (#465). Do not edit by hand. // Source: perry-api-manifest::API_MANIFEST -// Coverage: 1947 entries across 113 modules +// Coverage: 1950 entries across 114 modules type PerryU32 = number & { readonly __perryU32?: never }; type PerryU64 = number & { readonly __perryU64?: never }; @@ -2320,6 +2320,15 @@ declare module "perf_hooks" { export function timerify(...args: any[]): any; } +declare module "perry" { + /** stdlib */ + export const isStandaloneExecutable: any; + /** stdlib */ + export function embeddedFiles(...args: any[]): any; + /** stdlib */ + export function readEmbedded(path: string): Buffer; +} + declare module "perry/ads" { /** stdlib */ export function js_ads_banner_create(...args: any[]): any; diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index ffb31fdae9..e4fe3da501 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -2,7 +2,7 @@ This page is auto-generated from Perry's compile-time API manifest (`perry-api-manifest::API_MANIFEST`). It is the source of truth for what `perry compile` accepts; references to symbols not listed here produce `R005 UnimplementedApi` (issue #463). Stubs (#464) are flagged ⚠ — they link cleanly but no-op at runtime on the chosen target. -Total: 2821 entries across 115 modules. +Total: 2824 entries across 116 modules. ## Modules @@ -66,6 +66,7 @@ Total: 2821 entries across 115 modules. - [`path/posix`](#path-posix) - [`path/win32`](#path-win32) - [`perf_hooks`](#perf-hooks) +- [`perry`](#perry) - [`perry/ads`](#perry-ads) - [`perry/audio`](#perry-audio) - [`perry/background`](#perry-background) @@ -2235,6 +2236,17 @@ Total: 2821 entries across 115 modules. - `constants` - `performance` +## `perry` + +### Methods + +- `embeddedFiles` — module +- `readEmbedded` — module + +### Properties + +- `isStandaloneExecutable` + ## `perry/ads` ### Methods diff --git a/docs/src/cli/flags.md b/docs/src/cli/flags.md index 2cd0e6ec13..8757660fac 100644 --- a/docs/src/cli/flags.md +++ b/docs/src/cli/flags.md @@ -46,6 +46,51 @@ Use `--output-type` to change what's produced: | `executable` | Standalone binary (default) | | `dylib` | Shared library (`.dylib`/`.so`) for [plugins](../plugins/overview.md) | +## Embedding Assets + +Bake static files (an SPA `dist/`, images, JSON, fonts, …) into the standalone +executable so it runs with no external files on disk (#5731). + +| Flag | Description | +|------|-------------| +| `--embed ` | Embed a file, directory, or `*`/`**` glob (relative to the project root). Repeatable. Merged with `perry.embed` (package.json) and `[compile] embed` (perry.toml). | + +```bash +vite build +perry compile server.ts --embed "./dist/**" -o myapp +./myapp # serves dist/ from memory — no dist/ folder needed +``` + +Embedded files are reachable at runtime three ways: + +```ts +import { embeddedFiles, readEmbedded, isStandaloneExecutable } from "perry"; +import { readFileSync } from "fs"; + +for (const f of embeddedFiles()) { + // f.name (e.g. "dist/index.html"), f.size, f.type (MIME) + app.get("/" + f.name, (_, reply) => reply.type(f.type).send(readEmbedded(f.name))); +} + +// or via node:fs at the `$perryfs/` virtual path: +const html = readFileSync("$perryfs/dist/index.html", "utf8"); +``` + +`embeddedFiles()` is a function (not a bare value like Bun's `embeddedFiles`) so +that array methods dispatch on its result. `readEmbedded(path)` and `node:fs` +accept either the `$perryfs/` virtual path or the embed-relative key. + +> **Note** +> `node:fs` consults the embedded registry *before* disk, and a bare +> embed-relative key matches too — so `readFileSync("dist/index.html")` returns +> the **embedded** bytes even if a `dist/index.html` exists on disk next to the +> binary. Read a real on-disk file by absolute path, and use the explicit +> `$perryfs/` form when you specifically mean the embedded copy. +> +> Embedding currently requires a Unix-like host toolchain (macOS/Linux); on a +> Windows host `--embed` errors out. Cross-target / Windows embedding is a +> tracked follow-up. + ## Debug Flags | Flag | Description | @@ -113,6 +158,10 @@ version = "1.0.0" [build] out_dir = "build" +[compile] +# Embed static assets into the standalone executable (same as repeated --embed). +embed = ["./dist/**"] + [app] name = "My App" description = "A Perry application" diff --git a/types/perry/index.d.ts b/types/perry/index.d.ts new file mode 100644 index 0000000000..77f4ba7e91 --- /dev/null +++ b/types/perry/index.d.ts @@ -0,0 +1,43 @@ +// Type declarations for the bare `perry` module — Perry's standalone-executable +// embedded-asset API (#5731). These let `tsc` / IDEs resolve +// `import { embeddedFiles, readEmbedded, isStandaloneExecutable } from "perry"`. +// +// Assets are baked into the binary at compile time via +// `perry compile --embed "./dist/**"` (or `perry.embed` in package.json / +// `[compile] embed` in perry.toml). At runtime they are also readable through +// `node:fs` via their `$perryfs/` virtual path. + +/** Metadata for a single asset embedded into the standalone executable. */ +export interface EmbeddedFile { + /** Embed-relative path, e.g. `dist/index.html`. Use as the route key. */ + readonly name: string; + /** Size of the asset in bytes. */ + readonly size: number; + /** Best-effort MIME type inferred from the file extension. */ + readonly type: string; +} + +/** + * All files embedded into this executable, sorted by their embed-relative path + * (deterministic across builds, after de-duplication). Returns a fresh array on + * each call (empty for a non-embedded build). + * + * Note: exposed as a function (not a bare value like Bun's `embeddedFiles`) so + * that array methods dispatch correctly on the result — + * `embeddedFiles().map(f => f.name)`. + */ +export function embeddedFiles(): ReadonlyArray; + +/** + * `true` when running as a Perry-compiled standalone executable. Always `true` + * at runtime (Perry has no interpreter mode); useful as a dev-vs-compiled guard + * in code shared with a Node/tsx dev workflow. + */ +export const isStandaloneExecutable: boolean; + +/** + * Read an embedded asset's bytes. Accepts either the `$perryfs/` virtual + * path or the embed-relative key (`dist/index.html`). Returns the bytes as a + * `Buffer` (a `Uint8Array`); throws an `Error` when the asset is not found. + */ +export function readEmbedded(path: string): Buffer;