diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75b643f..ab52afd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,12 @@ on: push: branches: - main + # The long-lived `migration` branch (PR #71) sits behind `main` and is + # frequently in a conflicting/dirty merge state. `pull_request` runs only + # against the test-merge commit, which GitHub cannot create while the PR + # conflicts — so CI never starts. A `push` trigger runs against the branch + # head directly and is unaffected by conflicts, keeping CI alive here. + - migration pull_request: schedule: - cron: "0 4 * * 0,3" # 4 a.m. UTC every Sun and Wed, keep actions-cache available diff --git a/Cargo.lock b/Cargo.lock index 7fe7883..5ebbd0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -831,6 +831,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "sha2 0.11.0", "tempfile", ] @@ -1024,6 +1025,19 @@ dependencies = [ "wasip3", ] +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "group" version = "0.13.0" @@ -1997,6 +2011,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + [[package]] name = "lalrpop" version = "0.20.2" @@ -3092,15 +3117,19 @@ dependencies = [ "brotli", "candid", "flate2", + "globset", "hex", "http", + "json5", "mime", "mime_guess", "serde", "serde_bytes", + "serde_json", "sha2 0.11.0", "tempfile", "url", + "walkdir", ] [[package]] diff --git a/crates/e2e/Cargo.toml b/crates/e2e/Cargo.toml index c2de19f..e099a83 100644 --- a/crates/e2e/Cargo.toml +++ b/crates/e2e/Cargo.toml @@ -15,3 +15,6 @@ reqwest = { version = "0.12", default-features = false, features = ["blocking"] serde.workspace = true serde_json.workspace = true tempfile.workspace = true + +[build-dependencies] +sha2.workspace = true diff --git a/crates/e2e/build.rs b/crates/e2e/build.rs index 7f5ad4c..3a446ae 100644 --- a/crates/e2e/build.rs +++ b/crates/e2e/build.rs @@ -1,8 +1,16 @@ +use sha2::{Digest, Sha256}; use std::{env, path::Path, path::PathBuf, process::Command}; +/// The legacy `assetstorage` canister shipped with dfx 0.32.0. The migration +/// plugin targets this canister, so the e2e suite deploys it (gzipped — icp-cli +/// installs `.wasm.gz` directly) rather than this repo's canister. +const LEGACY_WASM_URL: &str = + "https://github.com/dfinity/sdk/releases/download/0.32.0/assetstorage.wasm.gz"; +/// sha256 published on the GH release page for `assetstorage.wasm.gz`. +const LEGACY_WASM_SHA256: &str = "04e565b3425fe7510ee16b02adcfe3f01abc9a2725c82a21cb08969241debd62"; + fn main() { - println!("cargo:rerun-if-changed=../canister/src"); - println!("cargo:rerun-if-changed=../canister-core/src"); + println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=../sync-plugin/src"); println!("cargo:rerun-if-changed=../sync-core/src"); @@ -12,16 +20,12 @@ fn main() { .parent() .and_then(Path::parent) .expect("crates/e2e/ must have a workspace root two levels up"); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - build_wasm(workspace_root, "canister", "wasm32-unknown-unknown"); + let canister_wasm = fetch_legacy_canister(&out_dir); build_wasm(workspace_root, "sync-plugin", "wasm32-wasip2"); - println!( - "cargo:rustc-env=CANISTER_WASM={}", - workspace_root - .join("target/wasm32-unknown-unknown/release/canister.wasm") - .display() - ); + println!("cargo:rustc-env=CANISTER_WASM={}", canister_wasm.display()); println!( "cargo:rustc-env=PLUGIN_WASM={}", workspace_root @@ -30,6 +34,35 @@ fn main() { ); } +/// Downloads (and caches in `OUT_DIR`) the legacy `assetstorage.wasm.gz`, +/// verifying it against the pinned sha256. Returns the path to the `.gz`. +fn fetch_legacy_canister(out_dir: &Path) -> PathBuf { + let dest = out_dir.join("assetstorage.wasm.gz"); + if dest.exists() && sha256_hex(&dest) == LEGACY_WASM_SHA256 { + return dest; + } + let status = Command::new("curl") + .args(["-sSL", "-o"]) + .arg(&dest) + .arg(LEGACY_WASM_URL) + .status() + .unwrap_or_else(|e| panic!("failed to spawn curl for {LEGACY_WASM_URL}: {e}")); + assert!(status.success(), "curl {LEGACY_WASM_URL} failed"); + + let got = sha256_hex(&dest); + assert_eq!( + got, LEGACY_WASM_SHA256, + "sha256 mismatch for assetstorage.wasm.gz (got {got}, expected {LEGACY_WASM_SHA256})" + ); + dest +} + +fn sha256_hex(path: &Path) -> String { + let bytes = std::fs::read(path).unwrap_or_else(|e| panic!("read {}: {e}", path.display())); + let digest = Sha256::digest(&bytes); + digest.iter().map(|b| format!("{b:02x}")).collect() +} + fn build_wasm(workspace_root: &Path, package: &str, target: &str) { let status = Command::new("cargo") .args(["build", "-p", package, "--target", target, "--release"]) diff --git a/crates/e2e/src/lib.rs b/crates/e2e/src/lib.rs index 339f29b..3677d36 100644 --- a/crates/e2e/src/lib.rs +++ b/crates/e2e/src/lib.rs @@ -81,9 +81,10 @@ pub fn copy_dir_contents(src: &Path, dst: &Path) -> std::io::Result<()> { Ok(()) } -/// Set up an isolated copy of a fixture in a temporary directory, with -/// pre-built WASM modules placed at `wasms/canister.wasm` and -/// `wasms/plugin.wasm` (paths supplied by the build script). +/// Set up an isolated copy of a fixture in a temporary directory, with the +/// pre-built modules placed at `wasms/canister.wasm.gz` (the legacy dfx 0.32.0 +/// `assetstorage`, installed gzipped) and `wasms/plugin.wasm` (paths supplied +/// by the build script). /// /// `fixture_path` is relative to the e2e crate root (e.g. `"tests/fixture/basic"`). /// The returned `TempDir` must be kept alive for the duration of the test. @@ -97,8 +98,8 @@ pub fn setup_project(fixture_path: &str) -> tempfile::TempDir { let wasms_dir = tmp.path().join("wasms"); fs::create_dir_all(&wasms_dir).expect("failed to create wasms/ dir"); - fs::copy(env!("CANISTER_WASM"), wasms_dir.join("canister.wasm")) - .expect("failed to copy canister.wasm"); + fs::copy(env!("CANISTER_WASM"), wasms_dir.join("canister.wasm.gz")) + .expect("failed to copy canister.wasm.gz"); fs::copy(env!("PLUGIN_WASM"), wasms_dir.join("plugin.wasm")) .expect("failed to copy plugin.wasm"); diff --git a/crates/e2e/tests/config.rs b/crates/e2e/tests/config.rs new file mode 100644 index 0000000..ddeff87 --- /dev/null +++ b/crates/e2e/tests/config.rs @@ -0,0 +1,70 @@ +//! End-to-end checks for `.ic-assets.json5` handling against the legacy +//! dfx 0.32.0 `assetstorage` canister: per-asset headers, `cache.max_age`, +//! `enable_aliasing`, and the `security_policy` CSP preset are all applied, and +//! the config file itself is never uploaded as an asset. + +use e2e::{get_asset_properties, http_fetch, icp_cmd, list_assets, setup_project, LocalNetwork}; + +#[test] +fn config_applies_properties_headers_and_aliasing() { + let tmp = setup_project("tests/fixture/config"); + let project = tmp.path(); + let _network = LocalNetwork::start(project); + + icp_cmd(project).arg("deploy").assert().success(); + + // The `.ic-assets.json5` file is configuration, not an asset. + let assets = list_assets(project); + assert!( + !assets.iter().any(|a| a.key == "/.ic-assets.json5"), + "the config file must not be uploaded as an asset; got: {assets:#?}", + ); + assert!( + assets.iter().any(|a| a.key == "/index.html"), + "expected /index.html in asset list; got: {assets:#?}", + ); + + // index.html resolves both rules: max_age 600 + aliasing from the *.html + // rule, plus the standard security policy + custom header from **/*. + let props = get_asset_properties(project, "/index.html"); + assert_eq!(props.max_age, Some(600), "max_age from cache config"); + assert_eq!(props.is_aliased, Some(true), "enable_aliasing from config"); + + let headers = props.headers.expect("index.html should have headers"); + let has = |name: &str| headers.iter().any(|(k, _)| k.eq_ignore_ascii_case(name)); + assert!( + headers + .iter() + .any(|(k, v)| k.eq_ignore_ascii_case("X-Custom") && v == "yes"), + "custom header from config missing; got: {headers:#?}", + ); + assert!( + has("Content-Security-Policy"), + "standard security policy CSP header missing; got: {headers:#?}", + ); + + // style.css gets only the **/* rule: custom header + CSP, but no max_age. + let css_props = get_asset_properties(project, "/style.css"); + assert_eq!(css_props.max_age, None, "css has no cache rule"); + let css_headers = css_props.headers.unwrap_or_default(); + assert!( + css_headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("X-Custom")), + "css should still carry the **/* custom header; got: {css_headers:#?}", + ); + + // The certified HTTP response carries the custom header (going through the + // gateway implicitly validates certification). + let resp = http_fetch(project, "/index.html"); + assert!( + resp.status().is_success(), + "GET /index.html: {}", + resp.status() + ); + assert_eq!( + resp.headers().get("x-custom").and_then(|v| v.to_str().ok()), + Some("yes"), + "custom header missing from HTTP response", + ); +} diff --git a/crates/e2e/tests/fixture/basic/icp.yaml b/crates/e2e/tests/fixture/basic/icp.yaml index 9afec8c..b521398 100644 --- a/crates/e2e/tests/fixture/basic/icp.yaml +++ b/crates/e2e/tests/fixture/basic/icp.yaml @@ -9,7 +9,7 @@ canisters: build: steps: - type: pre-built - path: wasms/canister.wasm + path: wasms/canister.wasm.gz sync: steps: diff --git a/crates/e2e/tests/fixture/config/dist/.ic-assets.json5 b/crates/e2e/tests/fixture/config/dist/.ic-assets.json5 new file mode 100644 index 0000000..71fa6bf --- /dev/null +++ b/crates/e2e/tests/fixture/config/dist/.ic-assets.json5 @@ -0,0 +1,16 @@ +[ + // Applies a standard security policy and a custom header to every asset. + { + "match": "**/*", + "security_policy": "standard", + "headers": { + "X-Custom": "yes", + }, + }, + // HTML files additionally get a cache max-age and explicit aliasing. + { + "match": "*.html", + "cache": { "max_age": 600 }, + "enable_aliasing": true, + }, +] diff --git a/crates/e2e/tests/fixture/config/dist/index.html b/crates/e2e/tests/fixture/config/dist/index.html new file mode 100644 index 0000000..5d4ee47 --- /dev/null +++ b/crates/e2e/tests/fixture/config/dist/index.html @@ -0,0 +1,5 @@ + + + config fixture +

config fixture index

+ diff --git a/crates/e2e/tests/fixture/config/dist/style.css b/crates/e2e/tests/fixture/config/dist/style.css new file mode 100644 index 0000000..693ce9b --- /dev/null +++ b/crates/e2e/tests/fixture/config/dist/style.css @@ -0,0 +1 @@ +body { font-family: sans-serif; } diff --git a/crates/e2e/tests/fixture/html-handling/icp.yaml b/crates/e2e/tests/fixture/config/icp.yaml similarity index 87% rename from crates/e2e/tests/fixture/html-handling/icp.yaml rename to crates/e2e/tests/fixture/config/icp.yaml index 9afec8c..b521398 100644 --- a/crates/e2e/tests/fixture/html-handling/icp.yaml +++ b/crates/e2e/tests/fixture/config/icp.yaml @@ -9,7 +9,7 @@ canisters: build: steps: - type: pre-built - path: wasms/canister.wasm + path: wasms/canister.wasm.gz sync: steps: diff --git a/crates/e2e/tests/fixture/headers-content-type/dist/_headers b/crates/e2e/tests/fixture/headers-content-type/dist/_headers deleted file mode 100644 index 6f88e84..0000000 --- a/crates/e2e/tests/fixture/headers-content-type/dist/_headers +++ /dev/null @@ -1,4 +0,0 @@ -# Content-Type overrides for files mime_guess doesn't classify usefully. - -/*.did - Content-Type: text/plain; charset=utf-8 diff --git a/crates/e2e/tests/fixture/headers-content-type/dist/ic.did b/crates/e2e/tests/fixture/headers-content-type/dist/ic.did deleted file mode 100644 index 1563fdb..0000000 --- a/crates/e2e/tests/fixture/headers-content-type/dist/ic.did +++ /dev/null @@ -1,3 +0,0 @@ -service : { - greet : (text) -> (text); -} diff --git a/crates/e2e/tests/fixture/headers-content-type/dist/index.html b/crates/e2e/tests/fixture/headers-content-type/dist/index.html deleted file mode 100644 index dd000f5..0000000 --- a/crates/e2e/tests/fixture/headers-content-type/dist/index.html +++ /dev/null @@ -1,2 +0,0 @@ - -hello diff --git a/crates/e2e/tests/fixture/headers-content-type/icp.yaml b/crates/e2e/tests/fixture/headers-content-type/icp.yaml deleted file mode 100644 index 9afec8c..0000000 --- a/crates/e2e/tests/fixture/headers-content-type/icp.yaml +++ /dev/null @@ -1,19 +0,0 @@ -networks: - - name: local - mode: managed - gateway: - port: 0 - -canisters: - - name: frontend - build: - steps: - - type: pre-built - path: wasms/canister.wasm - - sync: - steps: - - type: plugin - path: wasms/plugin.wasm - dirs: - - dist diff --git a/crates/e2e/tests/fixture/headers/dist/_astro/app.js b/crates/e2e/tests/fixture/headers/dist/_astro/app.js deleted file mode 100644 index 702645f..0000000 --- a/crates/e2e/tests/fixture/headers/dist/_astro/app.js +++ /dev/null @@ -1 +0,0 @@ -console.log("app"); diff --git a/crates/e2e/tests/fixture/headers/dist/_headers b/crates/e2e/tests/fixture/headers/dist/_headers deleted file mode 100644 index 34d9524..0000000 --- a/crates/e2e/tests/fixture/headers/dist/_headers +++ /dev/null @@ -1,10 +0,0 @@ -# integration fixture: cover exact, subtree, and global header rules -/index.html - X-Frame-Options: DENY - X-Content-Type-Options: nosniff - -/_astro/* - Cache-Control: public, max-age=31536000, immutable - -/* - X-Robots-Tag: noindex diff --git a/crates/e2e/tests/fixture/headers/dist/index.html b/crates/e2e/tests/fixture/headers/dist/index.html deleted file mode 100644 index 7211fc4..0000000 --- a/crates/e2e/tests/fixture/headers/dist/index.html +++ /dev/null @@ -1 +0,0 @@ -home diff --git a/crates/e2e/tests/fixture/headers/icp.yaml b/crates/e2e/tests/fixture/headers/icp.yaml deleted file mode 100644 index 9afec8c..0000000 --- a/crates/e2e/tests/fixture/headers/icp.yaml +++ /dev/null @@ -1,19 +0,0 @@ -networks: - - name: local - mode: managed - gateway: - port: 0 - -canisters: - - name: frontend - build: - steps: - - type: pre-built - path: wasms/canister.wasm - - sync: - steps: - - type: plugin - path: wasms/plugin.wasm - dirs: - - dist diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/404.html b/crates/e2e/tests/fixture/html-handling-with-catchall/dist/404.html deleted file mode 100644 index 09798d8..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/404.html +++ /dev/null @@ -1 +0,0 @@ -custom 404 diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/_redirects b/crates/e2e/tests/fixture/html-handling-with-catchall/dist/_redirects deleted file mode 100644 index aa33418..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/_redirects +++ /dev/null @@ -1 +0,0 @@ -/* /404.html 404 diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/blog/index.html b/crates/e2e/tests/fixture/html-handling-with-catchall/dist/blog/index.html deleted file mode 100644 index 5b0bc7e..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/blog/index.html +++ /dev/null @@ -1 +0,0 @@ -blog index body diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/foo.html b/crates/e2e/tests/fixture/html-handling-with-catchall/dist/foo.html deleted file mode 100644 index fbd08a2..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/foo.html +++ /dev/null @@ -1 +0,0 @@ -foo.html body diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/index.html b/crates/e2e/tests/fixture/html-handling-with-catchall/dist/index.html deleted file mode 100644 index 7e47fbe..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/dist/index.html +++ /dev/null @@ -1 +0,0 @@ -root index body diff --git a/crates/e2e/tests/fixture/html-handling-with-catchall/icp.yaml b/crates/e2e/tests/fixture/html-handling-with-catchall/icp.yaml deleted file mode 100644 index 9afec8c..0000000 --- a/crates/e2e/tests/fixture/html-handling-with-catchall/icp.yaml +++ /dev/null @@ -1,19 +0,0 @@ -networks: - - name: local - mode: managed - gateway: - port: 0 - -canisters: - - name: frontend - build: - steps: - - type: pre-built - path: wasms/canister.wasm - - sync: - steps: - - type: plugin - path: wasms/plugin.wasm - dirs: - - dist diff --git a/crates/e2e/tests/fixture/html-handling/dist/blog/index.html b/crates/e2e/tests/fixture/html-handling/dist/blog/index.html deleted file mode 100644 index 5b0bc7e..0000000 --- a/crates/e2e/tests/fixture/html-handling/dist/blog/index.html +++ /dev/null @@ -1 +0,0 @@ -blog index body diff --git a/crates/e2e/tests/fixture/html-handling/dist/foo.html b/crates/e2e/tests/fixture/html-handling/dist/foo.html deleted file mode 100644 index fbd08a2..0000000 --- a/crates/e2e/tests/fixture/html-handling/dist/foo.html +++ /dev/null @@ -1 +0,0 @@ -foo.html body diff --git a/crates/e2e/tests/fixture/html-handling/dist/index.html b/crates/e2e/tests/fixture/html-handling/dist/index.html deleted file mode 100644 index 7e47fbe..0000000 --- a/crates/e2e/tests/fixture/html-handling/dist/index.html +++ /dev/null @@ -1 +0,0 @@ -root index body diff --git a/crates/e2e/tests/fixture/multi-dir/dist-a/page.html b/crates/e2e/tests/fixture/multi-dir/dist-a/page.html deleted file mode 100644 index 550afa3..0000000 --- a/crates/e2e/tests/fixture/multi-dir/dist-a/page.html +++ /dev/null @@ -1,5 +0,0 @@ - - - Page A -

Page from dist-a

- diff --git a/crates/e2e/tests/fixture/multi-dir/dist-b/app.js b/crates/e2e/tests/fixture/multi-dir/dist-b/app.js deleted file mode 100644 index bc3439d..0000000 --- a/crates/e2e/tests/fixture/multi-dir/dist-b/app.js +++ /dev/null @@ -1 +0,0 @@ -console.log("app from dist-b"); diff --git a/crates/e2e/tests/fixture/multi-dir/icp.yaml b/crates/e2e/tests/fixture/multi-dir/icp.yaml deleted file mode 100644 index abb5bb0..0000000 --- a/crates/e2e/tests/fixture/multi-dir/icp.yaml +++ /dev/null @@ -1,20 +0,0 @@ -networks: - - name: local - mode: managed - gateway: - port: 0 - -canisters: - - name: frontend - build: - steps: - - type: pre-built - path: wasms/canister.wasm - - sync: - steps: - - type: plugin - path: wasms/plugin.wasm - dirs: - - dist-a - - dist-b diff --git a/crates/e2e/tests/fixture/nested/icp.yaml b/crates/e2e/tests/fixture/nested/icp.yaml new file mode 100644 index 0000000..28d3b15 --- /dev/null +++ b/crates/e2e/tests/fixture/nested/icp.yaml @@ -0,0 +1,22 @@ +networks: + - name: local + mode: managed + gateway: + port: 0 + +canisters: + - name: frontend + build: + steps: + - type: pre-built + path: wasms/canister.wasm.gz + + sync: + steps: + - type: plugin + path: wasms/plugin.wasm + # A multi-component (nested) directory. The host preopens it under a + # multi-segment WASI guest name, which broke `canonicalize`/`realpath` + # in the plugin's scan step (regression: forum post #97). + dirs: + - src/frontend/dist diff --git a/crates/e2e/tests/fixture/nested/src/frontend/dist/assets/style.css b/crates/e2e/tests/fixture/nested/src/frontend/dist/assets/style.css new file mode 100644 index 0000000..a918121 --- /dev/null +++ b/crates/e2e/tests/fixture/nested/src/frontend/dist/assets/style.css @@ -0,0 +1 @@ +body { color: rebeccapurple; } diff --git a/crates/e2e/tests/fixture/nested/src/frontend/dist/index.html b/crates/e2e/tests/fixture/nested/src/frontend/dist/index.html new file mode 100644 index 0000000..ebed777 --- /dev/null +++ b/crates/e2e/tests/fixture/nested/src/frontend/dist/index.html @@ -0,0 +1 @@ +

nested dir

diff --git a/crates/e2e/tests/fixture/redirects/dist/404.html b/crates/e2e/tests/fixture/redirects/dist/404.html deleted file mode 100644 index 1db48fa..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/404.html +++ /dev/null @@ -1 +0,0 @@ -custom not found diff --git a/crates/e2e/tests/fixture/redirects/dist/410.html b/crates/e2e/tests/fixture/redirects/dist/410.html deleted file mode 100644 index d5885b3..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/410.html +++ /dev/null @@ -1 +0,0 @@ -tombstone diff --git a/crates/e2e/tests/fixture/redirects/dist/_redirects b/crates/e2e/tests/fixture/redirects/dist/_redirects deleted file mode 100644 index 7e3c13f..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/_redirects +++ /dev/null @@ -1,7 +0,0 @@ -# integration fixture covering all three response kinds -/old /new.html 301 -/legacy https://example.com/ 302 -/missing-page /404.html 404 -/gone-page /410.html 410 -/about /about.html 200 -/blog/* /blog/index.html 200 diff --git a/crates/e2e/tests/fixture/redirects/dist/about.html b/crates/e2e/tests/fixture/redirects/dist/about.html deleted file mode 100644 index d7befca..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/about.html +++ /dev/null @@ -1 +0,0 @@ -about us diff --git a/crates/e2e/tests/fixture/redirects/dist/blog/index.html b/crates/e2e/tests/fixture/redirects/dist/blog/index.html deleted file mode 100644 index a735d3b..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/blog/index.html +++ /dev/null @@ -1 +0,0 @@ -blog index diff --git a/crates/e2e/tests/fixture/redirects/dist/new.html b/crates/e2e/tests/fixture/redirects/dist/new.html deleted file mode 100644 index 2608e27..0000000 --- a/crates/e2e/tests/fixture/redirects/dist/new.html +++ /dev/null @@ -1 +0,0 @@ -new page diff --git a/crates/e2e/tests/fixture/redirects/icp.yaml b/crates/e2e/tests/fixture/redirects/icp.yaml deleted file mode 100644 index 9afec8c..0000000 --- a/crates/e2e/tests/fixture/redirects/icp.yaml +++ /dev/null @@ -1,19 +0,0 @@ -networks: - - name: local - mode: managed - gateway: - port: 0 - -canisters: - - name: frontend - build: - steps: - - type: pre-built - path: wasms/canister.wasm - - sync: - steps: - - type: plugin - path: wasms/plugin.wasm - dirs: - - dist diff --git a/crates/e2e/tests/headers.rs b/crates/e2e/tests/headers.rs deleted file mode 100644 index 8e78a65..0000000 --- a/crates/e2e/tests/headers.rs +++ /dev/null @@ -1,81 +0,0 @@ -//! Integration tests for `_headers` end-to-end via the WASM plugin. -//! -//! Each test deploys a fixture to a local replica, then fetches paths via the -//! HTTP gateway. The gateway validates the response's `IC-Certificate` before -//! handing it back, so a successful fetch is also proof of certification — -//! and proof that the headers we set are the same ones the canister certified. - -use e2e::{get_asset_properties, http_fetch, icp_cmd, setup_project, LocalNetwork}; -use reqwest::StatusCode; -use std::fs; - -fn header_value<'a>(headers: &'a reqwest::header::HeaderMap, name: &str) -> Option<&'a str> { - headers.get(name).and_then(|v| v.to_str().ok()) -} - -/// Deploy the `headers` fixture and check that exact, subtree, and global -/// header rules all reach the canister and survive certification. -#[test] -fn header_rules_honoured() { - let tmp = setup_project("tests/fixture/headers"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // ── exact + global rules layer on /index.html ─────────────────────────── - let r = http_fetch(project, "/index.html"); - assert_eq!(r.status(), StatusCode::OK); - let h = r.headers(); - assert_eq!(header_value(h, "x-frame-options"), Some("DENY")); - assert_eq!(header_value(h, "x-content-type-options"), Some("nosniff")); - assert_eq!(header_value(h, "x-robots-tag"), Some("noindex")); - - // ── subtree + global rules layer on /_astro/app.js ────────────────────── - let r = http_fetch(project, "/_astro/app.js"); - assert_eq!(r.status(), StatusCode::OK); - let h = r.headers(); - assert_eq!( - header_value(h, "cache-control"), - Some("public, max-age=31536000, immutable") - ); - assert_eq!(header_value(h, "x-robots-tag"), Some("noindex")); - // No exact-rule headers leaked from /index.html. - assert!(h.get("x-frame-options").is_none()); -} - -/// Edit `_headers` and redeploy. Expectation: new headers propagate without -/// re-uploading content (drift detected via `SetAssetProperties`). -#[test] -fn header_edit_propagates_via_set_asset_properties() { - let tmp = setup_project("tests/fixture/headers"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // Sanity-check the initial headers landed on the canister. - let before = get_asset_properties(project, "/index.html"); - let headers_before = before.headers.expect("/index.html should carry headers"); - assert!(headers_before - .iter() - .any(|(k, v)| k.eq_ignore_ascii_case("x-frame-options") && v == "DENY")); - - // Bump the global X-Robots-Tag without touching any asset bytes. - fs::write( - project.join("dist/_headers"), - b"/index.html\n X-Frame-Options: DENY\n X-Content-Type-Options: nosniff\n\n/_astro/*\n Cache-Control: public, max-age=31536000, immutable\n\n/*\n X-Robots-Tag: none\n", - ) - .expect("rewrite _headers"); - - icp_cmd(project).arg("deploy").assert().success(); - - // Drift propagated: /index.html now serves the new X-Robots-Tag value. - let r = http_fetch(project, "/index.html"); - assert_eq!(r.status(), StatusCode::OK); - assert_eq!( - header_value(r.headers(), "x-robots-tag"), - Some("none"), - "edited X-Robots-Tag should reach the canister via SetAssetProperties", - ); -} diff --git a/crates/e2e/tests/headers_content_type.rs b/crates/e2e/tests/headers_content_type.rs deleted file mode 100644 index 32e8346..0000000 --- a/crates/e2e/tests/headers_content_type.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Integration tests for `Content-Type` overrides in `_headers`, end-to-end -//! via the WASM plugin. -//! -//! Each test deploys a fixture to a local replica, then fetches assets via -//! the HTTP gateway. The gateway validates the response's `IC-Certificate` -//! before handing it back, so a successful fetch is also proof that the -//! content-type the plugin sent to the canister is the one that ended up -//! in the certified response. - -use e2e::{http_fetch, icp_cmd, setup_project, LocalNetwork}; -use reqwest::StatusCode; -use std::fs; - -fn content_type(headers: &reqwest::header::HeaderMap) -> &str { - headers - .get("content-type") - .and_then(|v| v.to_str().ok()) - .unwrap_or("") -} - -/// Deploy the `headers-content-type` fixture and verify every per-glob -/// `Content-Type` override in `_headers` survives certification and reaches -/// the HTTP gateway. -#[test] -fn content_type_overrides_land_on_canister() { - let tmp = setup_project("tests/fixture/headers-content-type"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // .did → text/plain; charset=utf-8 (mime_guess has no entry, so the - // pre-override default would have been application/octet-stream). - let r = http_fetch(project, "/ic.did"); - assert_eq!(r.status(), StatusCode::OK); - assert_eq!(content_type(r.headers()), "text/plain; charset=utf-8"); - - // No override → mime_guess default applies (index.html stays text/html). - let r = http_fetch(project, "/index.html"); - assert_eq!(r.status(), StatusCode::OK); - assert!( - content_type(r.headers()).starts_with("text/html"), - "expected text/html for index.html, got: {}", - content_type(r.headers()) - ); -} - -/// Edit `_headers` and redeploy. Expectation: the new content-type lands -/// on the canister (via delete-then-recreate triggered by content-type -/// drift), and the gateway serves the updated value. -#[test] -fn content_type_edit_propagates_on_redeploy() { - let tmp = setup_project("tests/fixture/headers-content-type"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // Sanity check the initial value. - let r = http_fetch(project, "/ic.did"); - assert_eq!(content_type(r.headers()), "text/plain; charset=utf-8"); - - // Flip the .did mapping to a different MIME without touching the file. - fs::write( - project.join("dist/_headers"), - b"/*.did\n Content-Type: application/json\n", - ) - .expect("rewrite _headers"); - - icp_cmd(project).arg("deploy").assert().success(); - - let r = http_fetch(project, "/ic.did"); - assert_eq!(r.status(), StatusCode::OK); - assert_eq!( - content_type(r.headers()), - "application/json", - "edited _headers should propagate the new Content-Type via delete-then-recreate", - ); -} diff --git a/crates/e2e/tests/redirects.rs b/crates/e2e/tests/redirects.rs deleted file mode 100644 index bd23b3e..0000000 --- a/crates/e2e/tests/redirects.rs +++ /dev/null @@ -1,287 +0,0 @@ -//! Integration tests for `_redirects` end-to-end via the WASM plugin. -//! -//! Each test deploys a fixture to a local replica, then fetches paths via the -//! HTTP gateway. The gateway validates the response's `IC-Certificate` before -//! handing it back, so a successful fetch is also proof of certification. - -use e2e::{http_fetch, http_fetch_subdomain, icp_cmd, setup_project, LocalNetwork}; -use reqwest::StatusCode; - -/// Deploy the `redirects` fixture and exercise every response kind: -/// 3xx redirect (internal + external), 4xx custom error page, 200 rewrite -/// (exact and subtree). -#[test] -fn redirect_rules_honoured() { - let tmp = setup_project("tests/fixture/redirects"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // ── 301 redirect to an internal path ──────────────────────────────────── - let r = http_fetch(project, "/old"); - assert_eq!(r.status(), StatusCode::MOVED_PERMANENTLY); - assert_eq!( - r.headers() - .get("location") - .and_then(|v| v.to_str().ok()) - .map(str::to_owned), - Some("/new.html".into()), - ); - - // ── 302 redirect to an external URL ───────────────────────────────────── - let r = http_fetch(project, "/legacy"); - assert_eq!(r.status(), StatusCode::FOUND); - assert_eq!( - r.headers() - .get("location") - .and_then(|v| v.to_str().ok()) - .map(str::to_owned), - Some("https://example.com/".into()), - ); - - // ── 404 custom error page ─────────────────────────────────────────────── - let r = http_fetch(project, "/missing-page"); - assert_eq!(r.status(), StatusCode::NOT_FOUND); - let body = r.text().expect("read body"); - assert!( - body.contains("custom not found"), - "expected /404.html body, got: {body}" - ); - - // ── 410 custom error page ─────────────────────────────────────────────── - let r = http_fetch(project, "/gone-page"); - assert_eq!(r.status(), StatusCode::GONE); - let body = r.text().expect("read body"); - assert!( - body.contains("tombstone"), - "expected /410.html body, got: {body}" - ); - - // ── 200 rewrite: exact ────────────────────────────────────────────────── - let r = http_fetch(project, "/about"); - assert_eq!(r.status(), StatusCode::OK); - let body = r.text().expect("read body"); - assert!( - body.contains("about us"), - "expected /about.html body, got: {body}" - ); - - // ── 200 rewrite: subtree ──────────────────────────────────────────────── - for sub in &["/blog/anything", "/blog/2024/post"] { - let r = http_fetch(project, sub); - assert_eq!(r.status(), StatusCode::OK, "fetching {sub}"); - let body = r.text().expect("read body"); - assert!( - body.contains("blog index"), - "fetching {sub}: expected blog index body, got: {body}" - ); - } -} - -/// Without a user-supplied `_redirects`, the plugin auto-synthesises -/// Cloudflare's `auto-trailing-slash` rule set for every HTML asset (see -/// `sync-core::html_handling`). This test deploys an HTML-only fixture -/// and walks the full CF table for each of the three asset shapes: -/// root index, directory index, and non-index HTML file. -/// -/// Two rows of CF's table are knowingly inert: the source-URL 307s from -/// `/foo.html → /foo` and `/blog/index.html → /blog/` (and the root -/// `/index.html → /`). The asset at those keys shadows the rule, so the -/// canister returns the asset's body with a 200 instead of redirecting. -/// The test asserts that observed behaviour rather than CF's strict 307. -#[test] -fn html_handling_auto_synthesis() { - let tmp = setup_project("tests/fixture/html-handling"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - icp_cmd(project).arg("deploy").assert().success(); - - // ── /foo.html (non-index): canonical /foo ─────────────────────────────── - expect_200(project, "/foo", "foo.html body"); - // /foo.html: inert — asset shadows the synthesised 307. - expect_200(project, "/foo.html", "foo.html body"); - expect_307(project, "/foo/", "/foo"); - expect_307(project, "/foo/index", "/foo"); - expect_307(project, "/foo/index.html", "/foo"); - - // ── /blog/index.html (directory index): canonical /blog/ ─────────────── - expect_200(project, "/blog/", "blog index body"); - expect_307(project, "/blog", "/blog/"); - // CF chains: /blog.html -> /blog -> /blog/. The 307 the canister emits - // points at the bare form; the client follows it to land on /blog/. - expect_307(project, "/blog.html", "/blog"); - expect_307(project, "/blog/index", "/blog"); - // /blog/index.html: inert — asset shadows the synthesised 307. - expect_200(project, "/blog/index.html", "blog index body"); - - // ── /index.html (root index): canonical / ─────────────────────────────── - expect_200(project, "/", "root index body"); - expect_307(project, "/index", "/"); - // /index.html: inert — asset shadows the synthesised 307. - expect_200(project, "/index.html", "root index body"); -} - -/// When a rule is removed from `_redirects` between deploys, the canister -/// must not leave an orphaned cert-tree path behind. The HTTP gateway's -/// verifier rejects wildcard `<*>` witnesses if a "potential exact -/// expression path" is visible at the requested URL, so a dangling Exact -/// entry from a deleted rule will turn the removed path into a 503 for -/// any subsequent request — including ones the user expected the SPA -/// catch-all to handle. -/// -/// This test deploys once with a marker rule, verifies it works, removes -/// the rule from `_redirects`, redeploys, and verifies the marker path -/// now falls through cleanly to the catch-all 404 rather than 503-ing. -#[test] -fn removed_redirect_rule_clears_cert_tree() { - let tmp = setup_project("tests/fixture/html-handling-with-catchall"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - let redirects_path = project.join("dist/_redirects"); - let original = std::fs::read_to_string(&redirects_path).expect("read original"); - // Prepend a marker that can't collide with auto-synth (no matching - // .html source for this path). - std::fs::write( - &redirects_path, - format!("/marker-path /index.html 307\n{original}"), - ) - .expect("write augmented _redirects"); - - icp_cmd(project).arg("deploy").assert().success(); - - let r = http_fetch_subdomain(project, "/marker-path"); - assert_eq!( - r.status(), - StatusCode::TEMPORARY_REDIRECT, - "before removal: /marker-path expected 307, got {}", - r.status() - ); - - // Remove the marker, leaving only the user's catch-all + synth. - std::fs::write(&redirects_path, &original).expect("restore _redirects"); - icp_cmd(project).arg("deploy").assert().success(); - - let r = http_fetch_subdomain(project, "/marker-path"); - // After the fix, the cert tree no longer has an Exact entry at this - // path, so the user's `/* /404.html 404` catch-all takes over cleanly. - // Before the fix, the orphaned subtree confused the verifier and the - // gateway returned 503 instead. - assert_eq!( - r.status(), - StatusCode::NOT_FOUND, - "after removal: /marker-path expected 404 from catch-all, got {} \ - (a 503 here means the cert-tree entry wasn't pruned)", - r.status() - ); - let body = r.text().expect("read body"); - assert!( - body.contains("custom 404"), - "expected /404.html body from catch-all, got: {body}" - ); -} - -fn expect_200(project: &std::path::Path, path: &str, body_marker: &str) { - let r = http_fetch(project, path); - assert_eq!( - r.status(), - StatusCode::OK, - "html-handling: GET {path} expected 200, got {}", - r.status() - ); - let body = r.text().expect("read body"); - assert!( - body.contains(body_marker), - "html-handling: GET {path} expected body containing {body_marker:?}, got: {body}" - ); -} - -/// Reproduces the in-the-wild bug report: a user `_redirects` with a SPA-style -/// `/* /404.html 404` catch-all combined with auto-synthesised html-handling -/// rules caused the gateway verifier to reject responses with 503 -/// "Response Verification Error" — the wildcard expression path the -/// canister returned for paths matched by `/*` conflicted with the Exact -/// expression paths the synthesised rules had certified in the tree. -/// -/// The fix prepends synth rules before the user's `_redirects`, so the -/// html-handling defaults claim their paths first and `/* … 404` only fires -/// for paths no HTML asset covers. The verifier then sees consistent -/// expression paths on every response. -/// -/// This test exercises both code paths the gateway differentiates between: -/// the subdomain-style URL the browser uses (which forces full v2 -/// verification) and the explicit `?canisterId=…` form. -#[test] -fn html_handling_with_catchall_redirect() { - let tmp = setup_project("tests/fixture/html-handling-with-catchall"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - icp_cmd(project).arg("deploy").assert().success(); - - // The headline regression: `/` via the browser-style subdomain URL must - // 200-serve the root index, not 503 with a verification error. - let r = http_fetch_subdomain(project, "/"); - assert_eq!( - r.status(), - StatusCode::OK, - "root via subdomain expected 200 (was 503 before the fix), got {}", - r.status() - ); - let body = r.text().expect("read body"); - assert!( - body.contains("root index body"), - "expected /index.html body, got: {body}" - ); - - // `/index` is the other failure mode: the synthesised 307 was certified - // at `["http_expr", "index", "<$>"]`, but the user's `/*` matched first - // at request time and returned a `<*>` wildcard witness. The verifier - // then refused the wildcard because the Exact entry existed in the tree. - let r = http_fetch_subdomain(project, "/index"); - assert_eq!( - r.status(), - StatusCode::TEMPORARY_REDIRECT, - "/index expected 307, got {}", - r.status() - ); - assert_eq!( - r.headers().get("location").and_then(|v| v.to_str().ok()), - Some("/"), - ); - - // The catch-all still does its job for paths nothing else covers. - let r = http_fetch_subdomain(project, "/this-path-does-not-exist"); - assert_eq!( - r.status(), - StatusCode::NOT_FOUND, - "catch-all expected 404, got {}", - r.status() - ); - let body = r.text().expect("read body"); - assert!( - body.contains("custom 404"), - "expected /404.html body, got: {body}" - ); -} - -fn expect_307(project: &std::path::Path, path: &str, location: &str) { - let r = http_fetch(project, path); - assert_eq!( - r.status(), - StatusCode::TEMPORARY_REDIRECT, - "html-handling: GET {path} expected 307, got {}", - r.status() - ); - let actual = r - .headers() - .get("location") - .and_then(|v| v.to_str().ok()) - .map(str::to_owned); - assert_eq!( - actual.as_deref(), - Some(location), - "html-handling: GET {path} expected Location {location}, got {actual:?}" - ); -} diff --git a/crates/e2e/tests/sync.rs b/crates/e2e/tests/sync.rs index 392d857..a7013ba 100644 --- a/crates/e2e/tests/sync.rs +++ b/crates/e2e/tests/sync.rs @@ -20,6 +20,30 @@ fn basic_deploy() { ); } +/// Deploy a fixture whose `dirs` entry is a *nested* path (`src/frontend/dist`). +/// The host preopens it under a multi-segment WASI guest name; the plugin's scan +/// step must not call `canonicalize`/`realpath` on it (WASI returns ENOENT for +/// any path under a multi-component preopen). Regression for forum post #97. +#[test] +fn nested_dir_deploy() { + let tmp = setup_project("tests/fixture/nested"); + let project = tmp.path(); + let _network = LocalNetwork::start(project); + + icp_cmd(project).arg("deploy").assert().success(); + + let assets = list_assets(project); + + assert!( + assets.iter().any(|a| a.key == "/index.html"), + "expected /index.html in canister asset list; got: {assets:#?}", + ); + assert!( + assets.iter().any(|a| a.key == "/assets/style.css"), + "expected /assets/style.css (nested subdir) in canister asset list; got: {assets:#?}", + ); +} + #[test] fn basic_deploy_with_proxy() { let tmp = setup_project("tests/fixture/basic"); @@ -184,29 +208,3 @@ fn asset_deletion() { "/style.css should be removed from the canister after local deletion", ); } - -/// The assets sync plugin owns the URL space of its canister and only -/// supports a single source directory. A manifest that lists multiple -/// `dirs:` entries must fail the sync step before any canister mutation. -#[test] -fn multi_directory_sync_rejected() { - let tmp = setup_project("tests/fixture/multi-dir"); - let project = tmp.path(); - let _network = LocalNetwork::start(project); - - let output = icp_cmd(project) - .arg("deploy") - .assert() - .failure() - .get_output() - .clone(); - let combined = format!( - "{}\n{}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr), - ); - assert!( - combined.contains("expected exactly one input directory"), - "expected multi-dir rejection message; got:\n{combined}", - ); -} diff --git a/crates/sync-core/Cargo.toml b/crates/sync-core/Cargo.toml index 865c6fc..4c20aeb 100644 --- a/crates/sync-core/Cargo.toml +++ b/crates/sync-core/Cargo.toml @@ -11,14 +11,18 @@ publish.workspace = true brotli.workspace = true candid.workspace = true flate2.workspace = true +globset = "0.4" hex.workspace = true http = "1" +json5 = "0.4" mime.workspace = true mime_guess.workspace = true serde.workspace = true serde_bytes.workspace = true +serde_json.workspace = true sha2.workspace = true url = "2" +walkdir = "2" [dev-dependencies] tempfile.workspace = true diff --git a/crates/sync-core/src/canister.rs b/crates/sync-core/src/canister.rs index 4e9cab9..73cc12e 100644 --- a/crates/sync-core/src/canister.rs +++ b/crates/sync-core/src/canister.rs @@ -27,6 +27,8 @@ pub struct CreateAssetArguments { pub content_type: String, pub max_age: Option, pub headers: Option>, + /// Whether the legacy canister serves `/route` as an alias of `/route.html`. + pub enable_aliasing: Option, pub allow_raw_access: Option, } @@ -59,25 +61,8 @@ pub struct SetAssetPropertiesArguments { pub max_age: Option>, pub headers: Option>>, pub allow_raw_access: Option>, -} - -#[derive(CandidType, Clone, Debug, Deserialize, PartialEq, Eq)] -pub enum RulePattern { - Exact(String), - Subtree(String), -} - -#[derive(CandidType, Clone, Debug, Deserialize, PartialEq, Eq)] -pub struct RedirectRule { - pub from: RulePattern, - pub to: String, - pub status: u16, - pub headers: Option>, -} - -#[derive(CandidType, Clone, Debug)] -pub struct SetRedirectRulesArguments { - pub rules: Vec, + /// Toggles `/route` ↔ `/route.html` aliasing on the legacy canister. + pub is_aliased: Option>, } #[derive(CandidType, Clone, Debug)] @@ -88,7 +73,6 @@ pub enum BatchOperationKind { UnsetAssetContent(UnsetAssetContentArguments), SetAssetContent(SetAssetContentArguments), SetAssetProperties(SetAssetPropertiesArguments), - SetRedirectRules(SetRedirectRulesArguments), } #[derive(CandidType, Debug)] @@ -102,6 +86,7 @@ pub struct AssetProperties { pub max_age: Option, pub headers: Option>, pub allow_raw_access: Option, + pub is_aliased: Option, } #[derive(CandidType, Clone, Debug, Deserialize)] @@ -235,10 +220,6 @@ pub fn get_asset_properties(c: &impl CanisterCall, key: &str) -> Result Result, String> { - c.call("get_redirect_rules", (), CallType::Query, true) -} - pub fn list_permitted( c: &impl CanisterCall, permission: Permission, diff --git a/crates/sync-core/src/config.rs b/crates/sync-core/src/config.rs new file mode 100644 index 0000000..43df839 --- /dev/null +++ b/crates/sync-core/src/config.rs @@ -0,0 +1,530 @@ +//! `.ic-assets.json5` (and `.ic-assets.json`) parsing, ported from `ic-asset`'s +//! `asset/config.rs`. +//! +//! Config files nest: a directory's `.ic-assets.json5` applies to that directory +//! and all descendants, and child configs merge on top of parent ones. Each rule +//! has a glob `match` (resolved relative to the config file's directory) plus +//! optional `cache`, `headers`, `ignore`, `enable_aliasing`, `allow_raw_access`, +//! `encodings`, and `security_policy` fields. +//! +//! Differences from the upstream `ic-asset` port: +//! - errors are plain `String`s (this crate's convention), +//! - the `derivative` proc-macro is replaced with hand-written impls, +//! - the json5 pretty-printer / serialize plumbing is dropped; unused-rule +//! warnings print the glob pattern only. + +use crate::content::Encoder; +use crate::security_policy::SecurityPolicy; +use globset::{Glob, GlobMatcher}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; + +pub const ASSETS_CONFIG_FILENAME_JSON: &str = ".ic-assets.json"; +pub const ASSETS_CONFIG_FILENAME_JSON5: &str = ".ic-assets.json5"; + +pub type HeadersConfig = BTreeMap; + +/// The resolved configuration assigned to a single asset. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AssetConfig { + pub cache: Option, + pub headers: Option, + pub ignore: Option, + pub enable_aliasing: Option, + pub allow_raw_access: Option, + pub encodings: Option>, + pub security_policy: Option, + pub disable_security_policy_warning: Option, +} + +impl Default for AssetConfig { + fn default() -> Self { + Self { + cache: None, + headers: None, + ignore: None, + enable_aliasing: None, + // Matches ic-asset: raw access is allowed unless a rule turns it off. + allow_raw_access: Some(true), + encodings: None, + security_policy: None, + disable_security_policy_warning: None, + } + } +} + +impl AssetConfig { + /// Merges the custom `headers` with the headers implied by `security_policy`. + /// Custom headers win on case-insensitive name collisions. + pub fn combined_headers(&self) -> Option { + match (self.headers.as_ref(), self.security_policy) { + (None, None) => None, + (None, Some(policy)) => Some(policy.to_headers()), + (Some(custom_headers), None) => Some(custom_headers.clone()), + (Some(custom_headers), Some(policy)) => { + let mut headers = custom_headers.clone(); + let custom_header_names: HashSet = + HashSet::from_iter(custom_headers.keys().map(|a| a.to_lowercase())); + for (policy_header_name, policy_header_value) in policy.to_headers() { + if !custom_header_names.contains(&policy_header_name.to_lowercase()) { + headers.insert(policy_header_name, policy_header_value); + } + } + Some(headers) + } + } + } + + pub fn warn_about_standard_security_policy(&self) -> bool { + let warning_disabled = self.disable_security_policy_warning == Some(true); + let standard_policy = self.security_policy == Some(SecurityPolicy::Standard); + standard_policy && !warning_disabled + } + + pub fn warn_about_no_security_policy(&self) -> bool { + let warning_disabled = self.disable_security_policy_warning == Some(true); + let no_policy = self.security_policy.is_none(); + no_policy && !warning_disabled + } + + /// `"hardened"` expects custom headers to be present; this cannot be silenced. + pub fn warn_about_missing_hardening_headers(&self) -> bool { + let is_hardened = self.security_policy == Some(SecurityPolicy::Hardened); + let has_headers = self + .headers + .as_ref() + .map(|headers| !headers.is_empty()) + .unwrap_or_default(); + is_hardened && !has_headers + } + + fn merge(mut self, other: &AssetConfigRule) -> Self { + if let Some(c) = &other.cache { + self.cache = Some(c.to_owned()); + } + match (self.headers.as_mut(), &other.headers) { + (Some(sh), Maybe::Value(oh)) => sh.extend(oh.to_owned()), + (None, Maybe::Value(oh)) => self.headers = Some(oh.to_owned()), + (_, Maybe::Null) => self.headers = None, + (_, Maybe::Absent) => (), + } + if other.ignore.is_some() { + self.ignore = other.ignore; + } + if other.enable_aliasing.is_some() { + self.enable_aliasing = other.enable_aliasing; + } + if other.allow_raw_access.is_some() { + self.allow_raw_access = other.allow_raw_access; + } + if other.encodings.is_some() { + self.encodings.clone_from(&other.encodings); + } + if other.security_policy.is_some() { + self.security_policy = other.security_policy; + } + if other.disable_security_policy_warning.is_some() { + self.disable_security_policy_warning = other.disable_security_policy_warning; + } + self + } +} + +#[derive(Deserialize, Debug, Default, Clone, PartialEq, Eq)] +pub struct CacheConfig { + pub max_age: Option, +} + +/// Tri-state for the `headers` field: absent (don't touch), `null` (clear), or a +/// map of header values. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +enum Maybe { + Null, + #[default] + Absent, + Value(T), +} + +/// A single rule from a `.ic-assets.json5` file, with its glob resolved against +/// the config file's directory. +#[derive(Clone)] +struct AssetConfigRule { + r#match: GlobMatcher, + cache: Option, + headers: Maybe, + ignore: Option, + enable_aliasing: Option, + used: bool, + allow_raw_access: Option, + encodings: Option>, + security_policy: Option, + disable_security_policy_warning: Option, +} + +impl AssetConfigRule { + fn applies(&self, canonical_path: &Path) -> bool { + self.r#match.is_match(canonical_path) + } + + fn from_interim(interim: InterimAssetConfigRule, dir: &Path) -> Result { + let InterimAssetConfigRule { + r#match, + cache, + headers, + ignore, + enable_aliasing, + allow_raw_access, + encodings, + security_policy, + disable_security_policy_warning, + } = interim; + let pattern = r#match; + let glob = dir.join(&pattern); + let glob = glob + .to_str() + .ok_or_else(|| format!("non-UTF-8 glob pattern '{pattern}' in {}", dir.display()))?; + let matcher = Glob::new(glob) + .map_err(|e| format!("invalid glob pattern '{pattern}': {e}"))? + .compile_matcher(); + Ok(Self { + r#match: matcher, + cache, + headers, + ignore, + enable_aliasing, + used: false, + allow_raw_access, + encodings, + security_policy, + disable_security_policy_warning, + }) + } +} + +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +struct InterimAssetConfigRule { + r#match: String, + cache: Option, + #[serde(default, deserialize_with = "headers_deserialize")] + headers: Maybe, + ignore: Option, + enable_aliasing: Option, + allow_raw_access: Option, + encodings: Option>, + security_policy: Option, + disable_security_policy_warning: Option, +} + +fn headers_deserialize<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + use serde::de::Error as _; + match Value::deserialize(deserializer)? { + Value::Object(v) => Ok(Maybe::Value( + v.into_iter() + .map(|(k, v)| { + Ok(( + k, + match v { + Value::Bool(b) => b.to_string(), + Value::Number(n) => n.to_string(), + Value::String(s) => s, + Value::Null => String::new(), + v => { + return Err(D::Error::custom(format!( + "headers must be strings, numbers, or bools (was {v:?})" + ))) + } + }, + )) + }) + .collect::, D::Error>>()?, + )), + Value::Null => Ok(Maybe::Null), + _ => Err(D::Error::custom( + "wrong data format for field `headers` (only map or null are allowed)", + )), + } +} + +type ConfigNode = Arc>; +type ConfigMap = HashMap; + +/// Aggregates `.ic-assets.json5` files nested in a directory tree. +#[derive(Debug)] +pub struct AssetSourceDirectoryConfiguration { + config_map: ConfigMap, +} + +#[derive(Default)] +struct AssetConfigTreeNode { + parent: Option, + rules: Vec, + origin: PathBuf, +} + +impl std::fmt::Debug for AssetConfigTreeNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AssetConfigTreeNode") + .field("origin", &self.origin) + .field("rules", &self.rules.len()) + .finish() + } +} + +impl AssetSourceDirectoryConfiguration { + /// Builds the config tree for an (absolute) assets directory. + pub fn load(root_dir: &Path) -> Result { + if !root_dir.has_root() { + return Err(format!( + "asset config root dir must be absolute: {}", + root_dir.display() + )); + } + let mut config_map = HashMap::new(); + AssetConfigTreeNode::load(None, root_dir, &mut config_map)?; + Ok(Self { config_map }) + } + + /// Resolves the configuration for an asset at `canonical_path`. + pub fn get_asset_config(&mut self, canonical_path: &Path) -> Result { + let parent_dir = canonical_path + .parent() + .ok_or_else(|| format!("no parent dir for {}", canonical_path.display()))?; + Ok(self + .config_map + .get(parent_dir) + .ok_or_else(|| format!("no asset config found for {}", parent_dir.display()))? + .lock() + .unwrap() + .get_config(canonical_path)) + } + + /// Returns the rules that never matched any asset, grouped by config-file + /// directory. Used to warn about typo'd globs. + pub fn get_unused_configs(&self) -> HashMap> { + let mut hm: HashMap> = HashMap::new(); + for node in self.config_map.values() { + let node = node.lock().unwrap(); + for rule in &node.rules { + if !rule.used { + hm.entry(node.origin.clone()) + .or_default() + .push(rule.r#match.glob().to_string()); + } + } + } + for (_, globs) in hm.iter_mut() { + globs.sort(); + globs.dedup(); + } + hm + } +} + +impl AssetConfigTreeNode { + fn load(parent: Option, dir: &Path, configs: &mut ConfigMap) -> Result<(), String> { + let json = dir.join(ASSETS_CONFIG_FILENAME_JSON); + let json5 = dir.join(ASSETS_CONFIG_FILENAME_JSON5); + let config_path = match (json.exists(), json5.exists()) { + (true, true) => { + return Err(format!( + "both {ASSETS_CONFIG_FILENAME_JSON} and {ASSETS_CONFIG_FILENAME_JSON5} present in {}", + dir.display() + )) + } + (true, false) => Some(json), + (false, true) => Some(json5), + (false, false) => None, + }; + + let mut rules = vec![]; + if let Some(config_path) = &config_path { + let content = std::fs::read_to_string(config_path) + .map_err(|e| format!("read {}: {e}", config_path.display()))?; + let interim_rules: Vec = json5::from_str(&content) + .map_err(|e| format!("malformed {}: {e}", config_path.display()))?; + for interim_rule in interim_rules { + rules.push(AssetConfigRule::from_interim(interim_rule, dir)?); + } + } + + // An empty node just forwards to its parent (matches ic-asset). + let node_ref = match parent { + Some(p) if rules.is_empty() => p, + _ => Arc::new(Mutex::new(Self { + parent, + rules, + origin: dir.to_path_buf(), + })), + }; + + configs.insert(dir.to_path_buf(), node_ref.clone()); + for entry in std::fs::read_dir(dir) + .map_err(|e| format!("read_dir {}: {e}", dir.display()))? + .filter_map(|x| x.ok()) + .filter(|x| x.file_type().is_ok_and(|ft| ft.is_dir())) + { + Self::load(Some(node_ref.clone()), &entry.path(), configs)?; + } + Ok(()) + } + + /// Resolves config for `canonical_path`, marking matched rules as used. + fn get_config(&mut self, canonical_path: &Path) -> AssetConfig { + let base_config = match &self.parent { + Some(parent) => parent.clone().lock().unwrap().get_config(canonical_path), + None => AssetConfig::default(), + }; + self.rules + .iter_mut() + .filter(|rule| rule.applies(canonical_path)) + .fold(base_config, |acc, x| { + x.used = true; + acc.merge(x) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + fn write(dir: &Path, rel: &str, content: &str) { + let path = dir.join(rel); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + fs::write(path, content).unwrap(); + } + + fn config_for(root: &TempDir, rel_file: &str) -> AssetConfig { + let root_abs = root.path().canonicalize().unwrap(); + let mut cfg = AssetSourceDirectoryConfiguration::load(&root_abs).unwrap(); + cfg.get_asset_config(&root_abs.join(rel_file)).unwrap() + } + + #[test] + fn default_allows_raw_access() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), "index.html", "x"); + let cfg = config_for(&dir, "index.html"); + assert_eq!(cfg.allow_raw_access, Some(true)); + assert!(cfg.headers.is_none()); + } + + #[test] + fn headers_and_cache_apply_by_glob() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[{ "match": "*.html", "cache": { "max_age": 42 }, "headers": { "X-Foo": "bar" } }]"#, + ); + write(dir.path(), "index.html", "x"); + write(dir.path(), "app.js", "x"); + let html = config_for(&dir, "index.html"); + assert_eq!(html.cache, Some(CacheConfig { max_age: Some(42) })); + assert_eq!( + html.headers.unwrap().get("X-Foo").map(String::as_str), + Some("bar") + ); + let js = config_for(&dir, "app.js"); + assert!(js.cache.is_none()); + assert!(js.headers.is_none()); + } + + #[test] + fn nested_config_merges_over_parent() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[{ "match": "**/*", "headers": { "A": "1" } }]"#, + ); + write( + dir.path(), + "sub/.ic-assets.json5", + r#"[{ "match": "*", "headers": { "B": "2" } }]"#, + ); + write(dir.path(), "sub/page.html", "x"); + let cfg = config_for(&dir, "sub/page.html"); + let headers = cfg.headers.unwrap(); + assert_eq!(headers.get("A").map(String::as_str), Some("1")); + assert_eq!(headers.get("B").map(String::as_str), Some("2")); + } + + #[test] + fn null_headers_clears() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[ + { "match": "*", "headers": { "A": "1" } }, + { "match": "page.html", "headers": null } + ]"#, + ); + write(dir.path(), "page.html", "x"); + let cfg = config_for(&dir, "page.html"); + assert!(cfg.headers.is_none()); + } + + #[test] + fn security_policy_parsed_and_combined() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[{ "match": "**/*", "security_policy": "standard" }]"#, + ); + write(dir.path(), "index.html", "x"); + let cfg = config_for(&dir, "index.html"); + assert_eq!(cfg.security_policy, Some(SecurityPolicy::Standard)); + let combined = cfg.combined_headers().unwrap(); + assert!(combined.contains_key("Content-Security-Policy")); + } + + #[test] + fn encodings_override_parsed() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[{ "match": "*.wasm", "encodings": ["identity", "gzip"] }]"#, + ); + write(dir.path(), "mod.wasm", "x"); + let cfg = config_for(&dir, "mod.wasm"); + assert_eq!(cfg.encodings, Some(vec![Encoder::Identity, Encoder::Gzip])); + } + + #[test] + fn unused_rule_is_reported() { + let dir = tempfile::tempdir().unwrap(); + write( + dir.path(), + ".ic-assets.json5", + r#"[{ "match": "nonexistent.css", "headers": { "A": "1" } }]"#, + ); + write(dir.path(), "index.html", "x"); + let root_abs = dir.path().canonicalize().unwrap(); + let mut cfg = AssetSourceDirectoryConfiguration::load(&root_abs).unwrap(); + let _ = cfg.get_asset_config(&root_abs.join("index.html")).unwrap(); + let unused = cfg.get_unused_configs(); + assert!(!unused.is_empty()); + } + + #[test] + fn both_config_files_present_is_error() { + let dir = tempfile::tempdir().unwrap(); + write(dir.path(), ".ic-assets.json", "[]"); + write(dir.path(), ".ic-assets.json5", "[]"); + let root_abs = dir.path().canonicalize().unwrap(); + assert!(AssetSourceDirectoryConfiguration::load(&root_abs).is_err()); + } +} diff --git a/crates/sync-core/src/glob.rs b/crates/sync-core/src/glob.rs deleted file mode 100644 index 1c282d1..0000000 --- a/crates/sync-core/src/glob.rs +++ /dev/null @@ -1,134 +0,0 @@ -//! Glob pattern matched against asset keys, used by `_headers` to attach -//! both response headers and Content-Type overrides to assets. -//! -//! Syntax: leading `/`, then literal characters and a single greedy `*`. -//! `*` matches any sequence of characters including `/` and empty; every -//! other character matches literally. No `**`, no `?`, no `:placeholder` — -//! per Cloudflare Pages / Netlify `_headers` precedent. - -/// Compiled glob. `parts.len() == 1` means no `*` (exact match); otherwise -/// consecutive entries are joined by an implicit `*`. Matcher is -/// `O(parts * key)` with no per-call allocation. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct KeyPattern { - /// Original pattern source, kept for Debug output and error reporting. - source: String, - /// Literal chunks of the pattern. Between consecutive entries the - /// implicit `*` matches any sequence including `/` and empty. - parts: Vec, -} - -impl KeyPattern { - pub fn source(&self) -> &str { - &self.source - } - - pub fn matches(&self, key: &str) -> bool { - if self.parts.len() == 1 { - return key == self.parts[0]; - } - let Some(mut tail) = key.strip_prefix(self.parts[0].as_str()) else { - return false; - }; - let middles = &self.parts[1..self.parts.len() - 1]; - let last = &self.parts[self.parts.len() - 1]; - for middle in middles { - match tail.find(middle.as_str()) { - Some(idx) => tail = &tail[idx + middle.len()..], - None => return false, - } - } - tail.len() >= last.len() && tail.ends_with(last.as_str()) - } -} - -pub fn parse(token: &str) -> Result { - if !token.starts_with('/') { - return Err(format!( - "'{token}' must be an absolute path (start with '/')" - )); - } - if token.contains(':') { - return Err(format!( - "':' placeholders in pattern ('{token}') are not supported" - )); - } - if token.contains("**") { - return Err(format!( - "'**' in pattern ('{token}') is not supported — a single '*' already matches any character sequence including '/'" - )); - } - let parts: Vec = token.split('*').map(String::from).collect(); - Ok(KeyPattern { - source: token.to_string(), - parts, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn pat(token: &str) -> KeyPattern { - parse(token).unwrap() - } - - #[test] - fn exact_match_when_no_star() { - let p = pat("/about"); - assert!(p.matches("/about")); - assert!(!p.matches("/about/")); - assert!(!p.matches("/aboutus")); - assert!(!p.matches("/about/team")); - } - - #[test] - fn trailing_star_matches_subtree() { - let p = pat("/blog/*"); - assert!(p.matches("/blog/post")); - assert!(p.matches("/blog/nested/post")); - assert!(!p.matches("/blogger")); - } - - #[test] - fn root_star_matches_everything() { - let p = pat("/*"); - assert!(p.matches("/anywhere")); - assert!(p.matches("/deep/nested/path")); - } - - #[test] - fn extension_glob_matches_by_suffix() { - let p = pat("/*.md"); - assert!(p.matches("/llms.md")); - assert!(p.matches("/docs/intro.md")); - assert!(p.matches("/a/b/c.md")); - assert!(!p.matches("/llms.txt")); - // Anchored suffix — `.md` mid-path is not a match. - assert!(!p.matches("/a.md.html")); - } - - #[test] - fn mid_path_wildcard_matches() { - let p = pat("/api/*/v1"); - assert!(p.matches("/api/users/v1")); - assert!(p.matches("/api/nested/path/v1")); - assert!(!p.matches("/api/v1")); - assert!(!p.matches("/api/users/v2")); - } - - #[test] - fn rejects_relative_pattern() { - assert!(parse("about").unwrap_err().contains("absolute path")); - } - - #[test] - fn rejects_double_star() { - assert!(parse("/foo/**/bar").unwrap_err().contains("'**'")); - } - - #[test] - fn rejects_placeholder() { - assert!(parse("/blog/:slug").unwrap_err().contains("placeholders")); - } -} diff --git a/crates/sync-core/src/headers.rs b/crates/sync-core/src/headers.rs deleted file mode 100644 index ce5bb91..0000000 --- a/crates/sync-core/src/headers.rs +++ /dev/null @@ -1,844 +0,0 @@ -//! Strict block parser for Netlify-style `_headers` files. -//! -//! Each block is one non-indented `` line followed by one or more -//! indented `Header-Name: value` lines. Blank lines and `#` comments close -//! blocks. Errors carry a 1-based line number so the plugin can point users -//! at the offending entry without a canister round-trip. -//! -//! `Content-Type` is parsed structurally onto [`HeaderRule::content_type`] -//! instead of accumulating in `headers`: the canister stores it as asset -//! metadata that drives encoder selection and certification, not as an -//! appended response header. See `docs/headers.md` for the full reject list. - -use crate::glob::KeyPattern; -use crate::strip_comment; -use http::{HeaderName, HeaderValue}; -use mime::Mime; -use std::str::FromStr; - -pub const HEADERS_FILENAME: &str = "_headers"; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct HeaderRule { - pub pattern: KeyPattern, - /// Response headers in declaration order. Multiple entries for the same - /// name are allowed (e.g. `Set-Cookie`); resolver semantics in `sync.rs`. - /// `Content-Type` is never stored here — it routes to [`Self::content_type`]. - pub headers: Vec<(String, String)>, - /// `Content-Type` value if the block declared one. The plugin feeds this - /// into `CreateAssetArguments.content_type`, overriding `mime_guess` - /// before encoder selection runs. At most one per block; a duplicate - /// `Content-Type:` line within the same block is a parse error. - pub content_type: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ParseError { - pub line: usize, - pub source: String, - pub message: String, -} - -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "_headers: line {}: {} (source: `{}`)", - self.line, self.message, self.source - ) - } -} - -impl std::error::Error for ParseError {} - -/// Open block under construction during `parse`: -/// line_no of the path line, the raw path line (for error reporting), -/// pattern, headers accumulated so far, and the block's `Content-Type` -/// value (if a `Content-Type:` line has been seen yet — used to detect -/// duplicates). -struct OpenBlock { - line_no: usize, - source: String, - pattern: KeyPattern, - headers: Vec<(String, String)>, - content_type: Option, -} - -/// Resolves the per-asset header map for `key` by walking `rules` in -/// declaration order. All matching rules contribute; same-name values across -/// rules are concatenated with `, ` per RFC 7230 §3.2.2, with `Set-Cookie` -/// carved out per RFC 6265 §3 (kept as separate entries). The returned Vec is -/// stable-sorted by lowercased header name so multi-valued headers preserve -/// their declaration order — see the determinism guarantee in `docs/headers.md`. -/// -/// `Content-Type` is never present in the output — it routes through -/// [`content_type_for`] into the asset's stored `content_type` metadata. -pub fn resolve(key: &str, rules: &[HeaderRule]) -> Vec<(String, String)> { - use std::collections::HashMap; - - let mut merged: Vec<(String, String)> = Vec::new(); - // index in `merged` for the first occurrence of each non-Set-Cookie name, - // keyed by lowercased name. - let mut idx_by_lower: HashMap = HashMap::new(); - - for rule in rules { - if !rule.pattern.matches(key) { - continue; - } - for (name, value) in &rule.headers { - if name.eq_ignore_ascii_case("set-cookie") { - merged.push((name.clone(), value.clone())); - continue; - } - let lower = name.to_ascii_lowercase(); - if let Some(&i) = idx_by_lower.get(&lower) { - let existing = &mut merged[i].1; - existing.push_str(", "); - existing.push_str(value); - } else { - idx_by_lower.insert(lower, merged.len()); - merged.push((name.clone(), value.clone())); - } - } - } - - // Stable-sort by lowercased name only — Set-Cookie groups stay together - // but preserve declaration order within the group. - merged.sort_by_key(|(a, _)| a.to_ascii_lowercase()); - merged -} - -/// Returns the `Content-Type` override for `key`, walking `rules` in -/// declaration order — first-match-wins, because `Content-Type` is -/// single-valued and accumulation semantics make no sense for it. Returns -/// `None` if no matching rule declared a `Content-Type:`, in which case the -/// caller falls back to `mime_guess`. -pub fn content_type_for(key: &str, rules: &[HeaderRule]) -> Option { - rules - .iter() - .find(|r| r.pattern.matches(key) && r.content_type.is_some()) - .and_then(|r| r.content_type.clone()) -} - -/// Parses an entire `_headers` file into a list of [`HeaderRule`]s. Rules are -/// returned in declaration order — the resolver walks them in order, so order -/// is semantic. The first malformed line aborts parsing; we want the user to -/// fix issues one at a time rather than wade through cascading errors. -pub fn parse(content: &str) -> Result, ParseError> { - let mut rules = Vec::new(); - let mut current: Option = None; - - for (i, raw) in content.lines().enumerate() { - let line_no = i + 1; - - // Truly blank lines (only whitespace, no characters) close blocks. - // Comment-only lines are skipped without closing. - if raw.trim().is_empty() { - if let Some(block) = current.take() { - rules.push(finalize_block(block)?); - } - continue; - } - let stripped = strip_comment(raw); - if stripped.trim().is_empty() { - // Pure comment line — skip without closing the current block. - continue; - } - - let is_indented = stripped - .chars() - .next() - .is_some_and(|c| c == ' ' || c == '\t'); - - let source = raw.trim_end().to_string(); - if !is_indented { - // Path line. If a block is open, close it (must have headers). - if let Some(block) = current.take() { - rules.push(finalize_block(block)?); - } - let token = stripped.trim(); - let pattern = crate::glob::parse(token).map_err(|message| ParseError { - line: line_no, - source: source.clone(), - message, - })?; - current = Some(OpenBlock { - line_no, - source, - pattern, - headers: Vec::new(), - content_type: None, - }); - continue; - } - - // Indented line: must be a `Header-Name: value` inside an open block. - let Some(block) = current.as_mut() else { - return Err(ParseError { - line: line_no, - source, - message: "indented header line outside a path block".to_string(), - }); - }; - let parsed = parse_header(stripped).map_err(|message| ParseError { - line: line_no, - source: source.clone(), - message, - })?; - match parsed { - ParsedHeader::ContentType(mime) => { - if block.content_type.is_some() { - return Err(ParseError { - line: line_no, - source, - message: "duplicate `Content-Type` in the same block".to_string(), - }); - } - block.content_type = Some(mime); - } - ParsedHeader::Other(name, value) => { - block.headers.push((name, value)); - } - } - } - - if let Some(block) = current.take() { - rules.push(finalize_block(block)?); - } - Ok(rules) -} - -fn finalize_block(block: OpenBlock) -> Result { - if block.headers.is_empty() && block.content_type.is_none() { - return Err(ParseError { - line: block.line_no, - source: block.source, - message: "path block has no header lines under it".to_string(), - }); - } - Ok(HeaderRule { - pattern: block.pattern, - headers: block.headers, - content_type: block.content_type, - }) -} - -enum ParsedHeader { - ContentType(Mime), - Other(String, String), -} - -fn parse_header(stripped: &str) -> Result { - let trimmed = stripped.trim(); - let Some(colon_idx) = trimmed.find(':') else { - return Err(format!( - "header line '{trimmed}' is missing a ':' separator" - )); - }; - let name = trimmed[..colon_idx].trim(); - let value = trimmed[colon_idx + 1..].trim(); - if name.is_empty() { - return Err("header name is empty".to_string()); - } - if name.eq_ignore_ascii_case("content-type") { - if value.is_empty() { - return Err("'Content-Type' value is empty".to_string()); - } - let mime = Mime::from_str(value) - .map_err(|e| format!("invalid `Content-Type` value '{value}': {e}"))?; - return Ok(ParsedHeader::ContentType(mime)); - } - if value.contains(":splat") || value.contains(":placeholder") { - return Err(format!( - "':splat' / ':placeholder' substitution in header value ('{value}') \ - is a known-deferred feature" - )); - } - // `http` rejects CR/LF and other invalid chars — guarantees no header injection. - HeaderName::from_bytes(name.as_bytes()) - .map_err(|e| format!("invalid header name '{name}': {e}"))?; - HeaderValue::from_str(value).map_err(|e| format!("invalid header value '{value}': {e}"))?; - Ok(ParsedHeader::Other(name.to_string(), value.to_string())) -} - -#[cfg(test)] -mod tests { - use super::*; - - fn err(content: &str) -> ParseError { - parse(content).unwrap_err() - } - - fn pat(token: &str) -> KeyPattern { - crate::glob::parse(token).unwrap() - } - - // ── happy paths ─────────────────────────────────────────────────────────── - - #[test] - fn empty_input_yields_no_rules() { - assert!(parse("").unwrap().is_empty()); - assert!(parse("\n\n \n").unwrap().is_empty()); - } - - #[test] - fn single_rule_with_single_header() { - let rules = parse("/about\n X-Frame-Options: DENY\n").unwrap(); - assert_eq!(rules.len(), 1); - assert_eq!(rules[0].pattern.source(), "/about"); - assert_eq!( - rules[0].headers, - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn tab_indented_headers_are_accepted() { - let rules = parse("/about\n\tX-Frame-Options: DENY\n").unwrap(); - assert_eq!( - rules[0].headers, - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn multiple_headers_in_one_block_preserve_order() { - let input = "\ -/api - Cache-Control: no-store - X-Frame-Options: DENY - Set-Cookie: a=1 - Set-Cookie: b=2 -"; - let rules = parse(input).unwrap(); - assert_eq!(rules.len(), 1); - assert_eq!(rules[0].pattern.source(), "/api"); - assert_eq!( - rules[0].headers, - vec![ - ("Cache-Control".into(), "no-store".into()), - ("X-Frame-Options".into(), "DENY".into()), - ("Set-Cookie".into(), "a=1".into()), - ("Set-Cookie".into(), "b=2".into()), - ] - ); - } - - #[test] - fn multiple_blocks_preserve_order() { - let input = "\ -/_astro/* - Cache-Control: immutable - -/* - X-Frame-Options: DENY - -/api - Cache-Control: no-store -"; - let rules = parse(input).unwrap(); - assert_eq!(rules.len(), 3); - assert_eq!(rules[0].pattern.source(), "/_astro/*"); - assert_eq!(rules[1].pattern.source(), "/*"); - assert_eq!(rules[2].pattern.source(), "/api"); - } - - #[test] - fn blocks_without_blank_separator_still_parse() { - // A new non-indented path line closes the previous block. - let input = "\ -/a - X-A: 1 -/b - X-B: 2 -"; - let rules = parse(input).unwrap(); - assert_eq!(rules.len(), 2); - assert_eq!(rules[0].pattern.source(), "/a"); - assert_eq!(rules[0].headers, vec![("X-A".into(), "1".into())]); - assert_eq!(rules[1].pattern.source(), "/b"); - assert_eq!(rules[1].headers, vec![("X-B".into(), "2".into())]); - } - - #[test] - fn comments_and_blanks_are_ignored() { - let input = "\ -# top-level comment -/about - # comment inside block - X-Frame-Options: DENY -# trailing comment -"; - let rules = parse(input).unwrap(); - assert_eq!(rules.len(), 1); - assert_eq!( - rules[0].headers, - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn inline_comment_after_header_is_stripped() { - let rules = parse("/about\n X-Frame-Options: DENY # inline\n").unwrap(); - assert_eq!( - rules[0].headers, - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn hash_inside_header_value_token_is_preserved() { - // `#` only begins a comment after whitespace — a fragment embedded in - // a header-value token stays attached. - let rules = parse( - "/api\n Content-Security-Policy: default-src 'self'; report-uri /csp#endpoint\n", - ) - .unwrap(); - assert_eq!( - rules[0].headers, - vec![( - "Content-Security-Policy".into(), - "default-src 'self'; report-uri /csp#endpoint".into() - )] - ); - } - - #[test] - fn header_value_with_internal_colon_is_preserved() { - // Only the first colon separates name from value. - let rules = - parse("/api\n Content-Security-Policy: default-src 'self'; img-src https:\n").unwrap(); - assert_eq!( - rules[0].headers, - vec![( - "Content-Security-Policy".into(), - "default-src 'self'; img-src https:".into() - )] - ); - } - - // ── reject cases ────────────────────────────────────────────────────────── - - #[test] - fn rejects_indented_line_at_top_of_file() { - let e = err(" X-Frame-Options: DENY\n"); - assert_eq!(e.line, 1); - assert!(e.message.contains("outside a path block"), "{}", e.message); - } - - #[test] - fn rejects_indented_line_after_blank_boundary() { - let input = "\ -/about - X-A: 1 - - X-B: 2 -"; - let e = err(input); - assert_eq!(e.line, 4); - assert!(e.message.contains("outside a path block"), "{}", e.message); - } - - #[test] - fn rejects_path_block_with_no_headers() { - let input = "\ -/lonely - -/api - Cache-Control: no-store -"; - let e = err(input); - assert_eq!(e.line, 1); - assert!(e.message.contains("no header lines"), "{}", e.message); - } - - #[test] - fn rejects_relative_pattern() { - let e = err("about\n X-Frame-Options: DENY\n"); - assert!(e.message.contains("absolute path"), "{}", e.message); - } - - #[test] - fn rejects_double_star() { - let e = err("/foo/**/bar\n X-Frame-Options: DENY\n"); - assert!(e.message.contains("'**'"), "{}", e.message); - } - - #[test] - fn rejects_placeholder_in_pattern() { - let e = err("/blog/:slug\n X-Frame-Options: DENY\n"); - assert!(e.message.contains("placeholders"), "{}", e.message); - } - - #[test] - fn content_type_routes_to_dedicated_field_not_headers() { - // `Content-Type` is asset metadata, not a response header — it must - // not appear in `headers` (otherwise the canister would append it - // alongside its own derived value, producing duplicates on the wire). - let rules = parse("/llms.txt\n Content-Type: text/markdown; charset=utf-8\n").unwrap(); - assert_eq!(rules.len(), 1); - assert!( - rules[0].headers.is_empty(), - "Content-Type leaked into headers: {:?}", - rules[0].headers - ); - assert_eq!( - rules[0].content_type.as_ref().unwrap().to_string(), - "text/markdown; charset=utf-8" - ); - } - - #[test] - fn content_type_is_case_insensitive() { - let rules = parse("/llms.txt\n content-type: text/plain\n").unwrap(); - assert_eq!( - rules[0].content_type.as_ref().unwrap().to_string(), - "text/plain" - ); - } - - #[test] - fn content_type_coexists_with_other_headers() { - let input = "\ -/llms.txt - Content-Type: text/markdown; charset=utf-8 - Cache-Control: max-age=3600 - X-Robots-Tag: noindex -"; - let rules = parse(input).unwrap(); - assert_eq!( - rules[0].content_type.as_ref().unwrap().to_string(), - "text/markdown; charset=utf-8" - ); - assert_eq!( - rules[0].headers, - vec![ - ("Cache-Control".into(), "max-age=3600".into()), - ("X-Robots-Tag".into(), "noindex".into()), - ] - ); - } - - #[test] - fn block_with_only_content_type_is_valid() { - // `Content-Type` alone counts as a non-empty block — finalize must - // not reject it as "no header lines under it". - let rules = parse("/llms.txt\n Content-Type: text/markdown\n").unwrap(); - assert_eq!(rules.len(), 1); - assert!(rules[0].headers.is_empty()); - assert_eq!( - rules[0].content_type.as_ref().unwrap().to_string(), - "text/markdown" - ); - } - - #[test] - fn rejects_duplicate_content_type_in_same_block() { - let input = "\ -/llms.txt - Content-Type: text/markdown - Content-Type: text/plain -"; - let e = err(input); - assert!(e.message.contains("duplicate"), "{}", e.message); - assert_eq!(e.line, 3); - } - - #[test] - fn rejects_invalid_content_type_value() { - let e = err("/llms.txt\n Content-Type: not a mime\n"); - assert!(e.message.contains("Content-Type"), "{}", e.message); - } - - #[test] - fn rejects_empty_content_type_value() { - let e = err("/llms.txt\n Content-Type:\n"); - assert!(e.message.contains("empty"), "{}", e.message); - } - - #[test] - fn rejects_missing_colon() { - let e = err("/about\n X-Frame-Options DENY\n"); - assert!(e.message.contains("missing a ':'"), "{}", e.message); - } - - #[test] - fn rejects_blank_header_name() { - let e = err("/about\n : DENY\n"); - assert!(e.message.contains("empty"), "{}", e.message); - } - - #[test] - fn rejects_invalid_header_name_chars() { - // Spaces in header name are invalid per RFC 7230. - let e = err("/about\n X Frame Options: DENY\n"); - assert!(e.message.contains("invalid header name"), "{}", e.message); - } - - #[test] - fn rejects_splat_in_value() { - let e = err("/about\n X-Custom: :splat\n"); - assert!(e.message.contains(":splat"), "{}", e.message); - } - - #[test] - fn rejects_placeholder_in_value() { - let e = err("/about\n X-Custom: :placeholder\n"); - assert!(e.message.contains(":placeholder"), "{}", e.message); - } - - #[test] - fn error_reports_line_number() { - let input = "\ -# comment -/good - X-Good: 1 - -/bad - : empty name -"; - let e = err(input); - assert_eq!(e.line, 6); - } - - #[test] - fn unterminated_block_at_eof_is_finalized() { - // The last block needs no trailing blank line. - let rules = parse("/about\n X-Frame-Options: DENY").unwrap(); - assert_eq!(rules.len(), 1); - assert_eq!( - rules[0].headers, - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn rejects_unterminated_path_block_with_no_headers() { - let e = err("/about\n"); - assert!(e.message.contains("no header lines"), "{}", e.message); - } - - // ── resolver ─────────────────────────────────────────────────────────────── - - fn rule(pattern_src: &str, headers: &[(&str, &str)]) -> HeaderRule { - HeaderRule { - pattern: pat(pattern_src), - headers: headers - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), - content_type: None, - } - } - - fn rule_with_content_type(pattern_src: &str, mime: &str) -> HeaderRule { - HeaderRule { - pattern: pat(pattern_src), - headers: Vec::new(), - content_type: Some(mime.parse().unwrap()), - } - } - - #[test] - fn resolve_empty_when_no_rules_match() { - let rules = vec![rule("/other", &[("X-Foo", "bar")])]; - assert!(resolve("/about", &rules).is_empty()); - } - - #[test] - fn resolve_exact_match() { - let rules = vec![rule("/about", &[("X-Frame-Options", "DENY")])]; - assert_eq!( - resolve("/about", &rules), - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn resolve_subtree_match() { - let rules = vec![rule("/_astro/*", &[("Cache-Control", "immutable")])]; - assert_eq!( - resolve("/_astro/app.js", &rules), - vec![("Cache-Control".into(), "immutable".into())] - ); - } - - #[test] - fn resolve_root_star_matches_everything() { - let rules = vec![rule("/*", &[("X-Frame-Options", "DENY")])]; - assert_eq!( - resolve("/anywhere", &rules), - vec![("X-Frame-Options".into(), "DENY".into())] - ); - } - - #[test] - fn resolve_extension_glob() { - // Tier-3 glob: `/*.md` matches any `.md` file at any depth. - let rules = vec![rule("/*.md", &[("Cache-Control", "public, max-age=300")])]; - assert_eq!( - resolve("/docs/intro.md", &rules), - vec![("Cache-Control".into(), "public, max-age=300".into())] - ); - assert!(resolve("/docs/intro.html", &rules).is_empty()); - } - - #[test] - fn resolve_concatenates_same_name_across_rules() { - // Per RFC 7230 §3.2.2 — `/* X-Robots-Tag: noindex` + `/admin/* X-Robots-Tag: nofollow` - // on `/admin/page` yields `X-Robots-Tag: noindex, nofollow`. - let rules = vec![ - rule("/*", &[("X-Robots-Tag", "noindex")]), - rule("/admin/*", &[("X-Robots-Tag", "nofollow")]), - ]; - assert_eq!( - resolve("/admin/page", &rules), - vec![("X-Robots-Tag".into(), "noindex, nofollow".into())] - ); - } - - #[test] - fn resolve_concatenation_is_case_insensitive_on_name() { - let rules = vec![rule("/*", &[("X-Foo", "a")]), rule("/*", &[("x-foo", "b")])]; - let out = resolve("/anywhere", &rules); - assert_eq!(out.len(), 1); - // First occurrence's casing is preserved. - assert_eq!(out[0].0, "X-Foo"); - assert_eq!(out[0].1, "a, b"); - } - - #[test] - fn resolve_set_cookie_stays_separate() { - // RFC 6265 §3: Set-Cookie must not be comma-folded. - let rules = vec![ - rule("/*", &[("Set-Cookie", "session=abc")]), - rule("/admin/*", &[("Set-Cookie", "admin=1")]), - ]; - let out = resolve("/admin/page", &rules); - let cookies: Vec<&(String, String)> = out - .iter() - .filter(|(n, _)| n.eq_ignore_ascii_case("set-cookie")) - .collect(); - assert_eq!(cookies.len(), 2); - assert_eq!(cookies[0].1, "session=abc"); - assert_eq!(cookies[1].1, "admin=1"); - } - - #[test] - fn resolve_set_cookie_within_one_rule_stays_separate() { - let rules = vec![rule( - "/api", - &[("Set-Cookie", "a=1"), ("Set-Cookie", "b=2")], - )]; - let out = resolve("/api", &rules); - let cookies: Vec<&(String, String)> = out - .iter() - .filter(|(n, _)| n.eq_ignore_ascii_case("set-cookie")) - .collect(); - assert_eq!(cookies.len(), 2); - assert_eq!(cookies[0].1, "a=1"); - assert_eq!(cookies[1].1, "b=2"); - } - - #[test] - fn resolve_output_is_stable_sorted_by_lowercased_name() { - let rules = vec![rule( - "/*", - &[ - ("Z-Header", "z"), - ("A-Header", "a"), - ("Set-Cookie", "first"), - ("Set-Cookie", "second"), - ("M-Header", "m"), - ], - )]; - let out = resolve("/anywhere", &rules); - let names: Vec<&str> = out.iter().map(|(n, _)| n.as_str()).collect(); - // Set-Cookie group preserves declaration order; everything else - // sorted by lowercased name. - assert_eq!( - names, - vec![ - "A-Header", - "M-Header", - "Set-Cookie", - "Set-Cookie", - "Z-Header" - ] - ); - // Set-Cookie entries kept declaration order: first, second. - let cookies: Vec<&str> = out - .iter() - .filter_map(|(n, v)| n.eq_ignore_ascii_case("set-cookie").then_some(v.as_str())) - .collect(); - assert_eq!(cookies, vec!["first", "second"]); - } - - #[test] - fn resolve_walks_rules_in_declaration_order() { - // First match contributes first; concatenation order is rule order. - let rules = vec![ - rule("/admin/*", &[("X-Foo", "B")]), - rule("/*", &[("X-Foo", "A")]), - ]; - let out = resolve("/admin/page", &rules); - // First matching rule's value comes first in the concatenation. - assert_eq!(out, vec![("X-Foo".into(), "B, A".into())]); - } - - #[test] - fn resolve_no_matching_rules_returns_empty() { - let rules = vec![rule("/specific", &[("X-Foo", "bar")])]; - assert!(resolve("/different", &rules).is_empty()); - assert!(resolve("/specific/subpath", &rules).is_empty()); - } - - // ── content_type_for ────────────────────────────────────────────────────── - - #[test] - fn content_type_for_returns_none_when_no_rule_matches() { - let rules = vec![rule_with_content_type("/*.md", "text/markdown")]; - assert!(content_type_for("/index.html", &rules).is_none()); - } - - #[test] - fn content_type_for_returns_none_when_matching_rule_has_no_content_type() { - // A pure response-headers block must not produce a Content-Type override. - let rules = vec![rule("/*", &[("X-Robots-Tag", "noindex")])]; - assert!(content_type_for("/anywhere", &rules).is_none()); - } - - #[test] - fn content_type_for_first_matching_rule_wins() { - // First-match-wins (Content-Type is single-valued — accumulation - // semantics make no sense). - let rules = vec![ - rule_with_content_type("/legacy/oldstyle.md", "text/plain"), - rule_with_content_type("/*.md", "text/markdown"), - ]; - assert_eq!( - content_type_for("/legacy/oldstyle.md", &rules) - .unwrap() - .to_string(), - "text/plain" - ); - assert_eq!( - content_type_for("/other.md", &rules).unwrap().to_string(), - "text/markdown" - ); - } - - #[test] - fn content_type_for_skips_matching_rules_without_content_type() { - // A broader header-only rule before a narrower Content-Type rule - // must not shadow the override. - let rules = vec![ - rule("/*", &[("X-Robots-Tag", "noindex")]), - rule_with_content_type("/*.md", "text/markdown"), - ]; - assert_eq!( - content_type_for("/intro.md", &rules).unwrap().to_string(), - "text/markdown" - ); - } -} diff --git a/crates/sync-core/src/html_handling.rs b/crates/sync-core/src/html_handling.rs deleted file mode 100644 index adcd76b..0000000 --- a/crates/sync-core/src/html_handling.rs +++ /dev/null @@ -1,290 +0,0 @@ -//! Auto-synthesised redirect rules implementing Cloudflare's -//! `auto-trailing-slash` HTML handling default. -//! -//! Given the set of `.html` asset keys in the project, this module emits the -//! redirect rules that turn each HTML file's filesystem path into a canonical -//! URL plus 307 aliases for every other "obvious" form. The exact table comes -//! from the Cloudflare docs: -//! -//! - -//! -//! For asset `/foo.html` (canonical `/foo`): -//! - `/foo -> 200 /foo.html` -//! - `/foo.html -> 307 /foo` (inert: see below) -//! - `/foo/ -> 307 /foo` -//! - `/foo/index -> 307 /foo` -//! - `/foo/index.html -> 307 /foo` -//! -//! For asset `/bar/index.html` (canonical `/bar/`): -//! - `/bar/ -> 200 /bar/index.html` -//! - `/bar -> 307 /bar/` -//! - `/bar.html -> 307 /bar` (chains via /bar) -//! - `/bar/index -> 307 /bar` (chains via /bar) -//! - `/bar/index.html -> 307 /bar` (inert; chains via /bar) -//! -//! For asset `/index.html` (canonical `/`): -//! - `/ -> 200 /index.html` -//! - `/index -> 307 /` -//! - `/index.html -> 307 /` (inert) -//! -//! The two "inert" entries collide with the asset at the same key. Today the -//! canister's `build_http_response` matches assets before rules, so a request -//! for `/foo.html` still serves the asset directly with a 200 rather than the -//! 307 Cloudflare would emit. We synthesise the rules anyway so the ruleset -//! reflects the full table and self-activates if that precedence ever changes; -//! `docs/redirects.md` documents the gap for users who care about strict URL -//! canonicalisation. -//! -//! Synthesised rules are prepended **before** the user's `_redirects` (see -//! `sync.rs`), so the html-handling defaults win at the exact paths they cover -//! and user rules catch what's left. A user-declared rule with the same `from` -//! as a synthesised rule is therefore shadowed by the synth rule. - -use crate::canister::{RedirectRule, RulePattern}; - -const HTML_EXT: &str = ".html"; -const INDEX_HTML: &str = "/index.html"; - -/// Builds the Cloudflare `auto-trailing-slash` rule set for every `.html` -/// asset key in `asset_keys`. Keys are processed in sorted order so the -/// resulting rule list is deterministic across runs (and across two HTML -/// files that would claim the same canonical URL — first one alphabetically -/// wins via declaration order). -pub fn synthesize(asset_keys: &[String]) -> Vec { - let mut html_keys: Vec<&str> = asset_keys - .iter() - .filter(|k| k.ends_with(HTML_EXT)) - .map(String::as_str) - .collect(); - html_keys.sort_unstable(); - - let mut rules = Vec::new(); - for key in html_keys { - rules.extend(rules_for_html_asset(key)); - } - rules -} - -fn rules_for_html_asset(asset_key: &str) -> Vec { - if asset_key == INDEX_HTML { - return root_index_rules(); - } - if let Some(stem) = asset_key.strip_suffix(INDEX_HTML) { - // `/foo/index.html` → stem is `/foo` (never empty here — empty is the - // root case handled above). - return directory_index_rules(stem, asset_key); - } - // `/foo.html` (and any nested non-index HTML). - let canonical = asset_key - .strip_suffix(HTML_EXT) - .expect("filtered to .html upstream"); - non_index_rules(canonical, asset_key) -} - -fn root_index_rules() -> Vec { - vec![ - rewrite("/", INDEX_HTML), - redirect_307("/index", "/"), - // Inert under current precedence — the asset at /index.html shadows. - redirect_307(INDEX_HTML, "/"), - ] -} - -fn directory_index_rules(stem: &str, asset_key: &str) -> Vec { - // stem = "/bar"; canonical = "/bar/". - let canonical = format!("{stem}/"); - vec![ - rewrite(&canonical, asset_key), - redirect_307(stem, &canonical), - redirect_307(&format!("{stem}.html"), stem), - redirect_307(&format!("{canonical}index"), stem), - // Inert under current precedence — the asset shadows. - redirect_307(asset_key, stem), - ] -} - -fn non_index_rules(canonical: &str, asset_key: &str) -> Vec { - vec![ - rewrite(canonical, asset_key), - // Inert under current precedence — the asset shadows. - redirect_307(asset_key, canonical), - redirect_307(&format!("{canonical}/"), canonical), - redirect_307(&format!("{canonical}/index"), canonical), - redirect_307(&format!("{canonical}/index.html"), canonical), - ] -} - -fn rewrite(from: &str, target: &str) -> RedirectRule { - RedirectRule { - from: RulePattern::Exact(from.to_string()), - to: target.to_string(), - status: 200, - headers: None, - } -} - -fn redirect_307(from: &str, target: &str) -> RedirectRule { - RedirectRule { - from: RulePattern::Exact(from.to_string()), - to: target.to_string(), - status: 307, - headers: None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn exact(s: &str) -> RulePattern { - RulePattern::Exact(s.to_string()) - } - - /// Collapses a rule to `(from, to, status)` for compact assertions. - fn triple(r: &RedirectRule) -> (RulePattern, String, u16) { - (r.from.clone(), r.to.clone(), r.status) - } - - #[test] - fn ignores_non_html_keys() { - let keys = vec![ - "/app.js".to_string(), - "/styles.css".to_string(), - "/image.png".to_string(), - ]; - assert!(synthesize(&keys).is_empty()); - } - - #[test] - fn empty_input_yields_no_rules() { - assert!(synthesize(&[]).is_empty()); - } - - #[test] - fn non_index_html_produces_cf_table() { - let rules = synthesize(&["/foo.html".to_string()]); - let actual: Vec<_> = rules.iter().map(triple).collect(); - assert_eq!( - actual, - vec![ - (exact("/foo"), "/foo.html".into(), 200), - (exact("/foo.html"), "/foo".into(), 307), - (exact("/foo/"), "/foo".into(), 307), - (exact("/foo/index"), "/foo".into(), 307), - (exact("/foo/index.html"), "/foo".into(), 307), - ] - ); - } - - #[test] - fn directory_index_produces_cf_table() { - let rules = synthesize(&["/bar/index.html".to_string()]); - let actual: Vec<_> = rules.iter().map(triple).collect(); - assert_eq!( - actual, - vec![ - (exact("/bar/"), "/bar/index.html".into(), 200), - (exact("/bar"), "/bar/".into(), 307), - (exact("/bar.html"), "/bar".into(), 307), - (exact("/bar/index"), "/bar".into(), 307), - (exact("/bar/index.html"), "/bar".into(), 307), - ] - ); - } - - #[test] - fn root_index_produces_minimal_cf_table() { - // Root index has no "bare" form — /index, /index.html only. - let rules = synthesize(&["/index.html".to_string()]); - let actual: Vec<_> = rules.iter().map(triple).collect(); - assert_eq!( - actual, - vec![ - (exact("/"), "/index.html".into(), 200), - (exact("/index"), "/".into(), 307), - (exact("/index.html"), "/".into(), 307), - ] - ); - } - - #[test] - fn nested_directory_index() { - // `/a/b/index.html` should canonicalise to `/a/b/`. - let rules = synthesize(&["/a/b/index.html".to_string()]); - let actual: Vec<_> = rules.iter().map(triple).collect(); - assert_eq!( - actual, - vec![ - (exact("/a/b/"), "/a/b/index.html".into(), 200), - (exact("/a/b"), "/a/b/".into(), 307), - (exact("/a/b.html"), "/a/b".into(), 307), - (exact("/a/b/index"), "/a/b".into(), 307), - (exact("/a/b/index.html"), "/a/b".into(), 307), - ] - ); - } - - #[test] - fn nested_non_index_html() { - let rules = synthesize(&["/docs/guide.html".to_string()]); - let actual: Vec<_> = rules.iter().map(triple).collect(); - assert_eq!( - actual, - vec![ - (exact("/docs/guide"), "/docs/guide.html".into(), 200), - (exact("/docs/guide.html"), "/docs/guide".into(), 307), - (exact("/docs/guide/"), "/docs/guide".into(), 307), - (exact("/docs/guide/index"), "/docs/guide".into(), 307), - (exact("/docs/guide/index.html"), "/docs/guide".into(), 307), - ] - ); - } - - #[test] - fn ordering_is_sorted_by_asset_key() { - // /foo.html sorts before /foo/index.html (`.` < `/` in ASCII), so - // /foo.html's rules emit first. When both contribute a rule with the - // same `from`, the first one wins via declaration order at the - // canister. - let keys = vec!["/foo/index.html".to_string(), "/foo.html".to_string()]; - let rules = synthesize(&keys); - let first_foo_rule = rules - .iter() - .find(|r| r.from == exact("/foo")) - .expect("a rule at /foo"); - // The 200 rewrite from /foo.html beats the 307 from /foo/index.html. - assert_eq!(first_foo_rule.status, 200); - assert_eq!(first_foo_rule.to, "/foo.html"); - } - - #[test] - fn mixed_html_and_assets_only_synthesises_for_html() { - let keys = vec![ - "/app.js".to_string(), - "/foo.html".to_string(), - "/styles.css".to_string(), - ]; - let rules = synthesize(&keys); - // 5 rules for /foo.html, none for the others. - assert_eq!(rules.len(), 5); - assert!(rules - .iter() - .all(|r| matches!(&r.from, RulePattern::Exact(p) if p.starts_with("/foo")))); - } - - #[test] - fn no_headers_on_synthesised_rules() { - let rules = synthesize(&[ - "/index.html".to_string(), - "/foo.html".to_string(), - "/bar/index.html".to_string(), - ]); - for r in rules { - assert!( - r.headers.is_none(), - "synthesised rule should not carry headers; the sync layer \ - resolves _headers against the rule's `from` for 3xx rules" - ); - } - } -} diff --git a/crates/sync-core/src/lib.rs b/crates/sync-core/src/lib.rs index a1b1195..b766f74 100644 --- a/crates/sync-core/src/lib.rs +++ b/crates/sync-core/src/lib.rs @@ -1,62 +1,6 @@ pub mod canister; +pub mod config; pub mod content; -pub mod glob; -pub mod headers; -pub mod html_handling; -pub mod redirects; pub mod scan; +pub mod security_policy; pub mod sync; - -/// Strips a Netlify-style trailing comment from a single line of a `_redirects` -/// or `_headers` file. A `#` only starts a comment when it sits at the start of -/// the line or is preceded by whitespace; a `#` inside a token (e.g. the URL -/// fragment in `/to/#topic`, or a CSP `report-uri /csp#endpoint`) is preserved. -pub(crate) fn strip_comment(line: &str) -> &str { - let bytes = line.as_bytes(); - let mut prev_is_ws = true; - for (i, &b) in bytes.iter().enumerate() { - if b == b'#' && prev_is_ws { - return &line[..i]; - } - prev_is_ws = b.is_ascii_whitespace(); - } - line -} - -#[cfg(test)] -mod strip_comment_tests { - use super::strip_comment; - - #[test] - fn leading_hash_strips_whole_line() { - assert_eq!(strip_comment("# full-line comment"), ""); - } - - #[test] - fn hash_after_space_starts_comment() { - assert_eq!(strip_comment("/old /new 301 # tail"), "/old /new 301 "); - } - - #[test] - fn hash_after_tab_starts_comment() { - assert_eq!(strip_comment("/old /new 301\t# tail"), "/old /new 301\t"); - } - - #[test] - fn hash_inside_token_is_preserved() { - assert_eq!( - strip_comment("/from /to/#topic 301"), - "/from /to/#topic 301" - ); - } - - #[test] - fn no_hash_returns_input_unchanged() { - assert_eq!(strip_comment("/from /to 301"), "/from /to 301"); - } - - #[test] - fn empty_input() { - assert_eq!(strip_comment(""), ""); - } -} diff --git a/crates/sync-core/src/redirects.rs b/crates/sync-core/src/redirects.rs deleted file mode 100644 index 748821a..0000000 --- a/crates/sync-core/src/redirects.rs +++ /dev/null @@ -1,405 +0,0 @@ -//! Strict line parser for Netlify-style `_redirects` files. -//! -//! Each non-empty, non-comment line is ` `. Whitespace is -//! permissive; semantics are strict — see the "File format" section of the -//! design plan for the full reject list. Errors carry a line number so the -//! plugin can point users at the offending entry without a canister round-trip. - -use crate::canister::{RedirectRule, RulePattern}; -use crate::strip_comment; -use http::StatusCode; -use url::Url; - -pub const REDIRECTS_FILENAME: &str = "_redirects"; - -const SUPPORTED_STATUSES: &[u16] = &[200, 301, 302, 307, 308, 404, 410]; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ParseError { - pub line: usize, - pub source: String, - pub message: String, -} - -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "_redirects: line {}: {} (source: `{}`)", - self.line, self.message, self.source - ) - } -} - -impl std::error::Error for ParseError {} - -/// Parses an entire `_redirects` file into the candid `RedirectRule` shape. -/// The first malformed line aborts parsing — we want the user to fix issues -/// one at a time rather than wade through cascading errors. -pub fn parse(content: &str) -> Result, ParseError> { - let mut rules = Vec::new(); - for (i, raw) in content.lines().enumerate() { - let line_no = i + 1; - let body = strip_comment(raw).trim(); - if body.is_empty() { - continue; - } - let rule = parse_line(body).map_err(|message| ParseError { - line: line_no, - source: raw.trim_end().to_string(), - message, - })?; - rules.push(rule); - } - Ok(rules) -} - -fn parse_line(body: &str) -> Result { - let tokens: Vec<&str> = body.split_whitespace().collect(); - match tokens.len() { - 3 => {} - n if n < 3 => { - return Err(format!( - "expected ' ' (3 fields), got {n}" - )) - } - n => { - return Err(format!( - "expected ' ' (3 fields), got {n}; \ - extra fields are not supported (no headers, conditions, or query-string match)" - )) - } - } - let from_tok = tokens[0]; - let to_tok = tokens[1]; - let status_tok = tokens[2]; - - let status = parse_status(status_tok)?; - let from = parse_from(from_tok)?; - parse_to(to_tok, status)?; - - Ok(RedirectRule { - from, - to: to_tok.to_string(), - status: status.as_u16(), - headers: None, - }) -} - -fn parse_status(token: &str) -> Result { - if token.ends_with('!') { - return Err(format!( - "Netlify '!' force suffix on status ('{token}') is not supported; \ - files win over rules at the same path — remove the conflicting asset instead" - )); - } - let code: u16 = token - .parse() - .map_err(|_| format!("status '{token}' is not an integer"))?; - if !SUPPORTED_STATUSES.contains(&code) { - return Err(format!( - "status {code} is not one of {{200, 301, 302, 307, 308, 404, 410}}" - )); - } - StatusCode::from_u16(code).map_err(|e| format!("invalid status {code}: {e}")) -} - -fn parse_from(token: &str) -> Result { - if !token.starts_with('/') { - return Err(format!( - "'from' ('{token}') must be an absolute path (start with '/')" - )); - } - if token.contains(':') { - return Err(format!( - "':' placeholders in 'from' ('{token}') are not supported" - )); - } - if let Some(prefix) = token.strip_suffix("/*") { - // Subtree. `/*` alone matches the entire site (subtree at `/`). - if prefix.contains('*') { - return Err(format!( - "wildcards in 'from' ('{token}') are only supported as a trailing '/*'" - )); - } - let subtree = if prefix.is_empty() { - "/".to_string() - } else { - format!("{prefix}/") - }; - return Ok(RulePattern::Subtree(subtree)); - } - if token.contains('*') { - return Err(format!( - "wildcards in 'from' ('{token}') are only supported as a trailing '/*'" - )); - } - Ok(RulePattern::Exact(token.to_string())) -} - -fn parse_to(token: &str, status: StatusCode) -> Result<(), String> { - if token.contains(":splat") || token.contains(":placeholder") { - return Err(format!( - "':splat' / ':placeholder' substitution in 'to' ('{token}') \ - is a known-deferred feature; see the plan's tier-3 follow-up" - )); - } - if status.is_redirection() { - // 3xx: absolute path or fully-qualified URL. - if token.starts_with('/') { - return Ok(()); - } - Url::parse(token).map(|_| ()).map_err(|_| { - format!( - "'to' ('{token}') for a {} rule must be an absolute path or fully-qualified URL", - status.as_u16() - ) - }) - } else { - // 200 / 4xx: absolute asset path. - if !token.starts_with('/') { - return Err(format!( - "'to' ('{token}') for a status-{} rule must be an absolute asset path", - status.as_u16() - )); - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn parse_one(line: &str) -> Result { - let mut rules = parse(line)?; - assert_eq!(rules.len(), 1, "expected exactly one rule from '{line}'"); - Ok(rules.pop().unwrap()) - } - - fn err(line: &str) -> ParseError { - parse(line).unwrap_err() - } - - // ── happy paths ─────────────────────────────────────────────────────────── - - #[test] - fn empty_input_yields_no_rules() { - assert!(parse("").unwrap().is_empty()); - assert!(parse("\n\n \n").unwrap().is_empty()); - } - - #[test] - fn comments_and_blanks_are_ignored() { - let input = "\n# leading comment\n\n # indented comment\n/from /to 301\n\n# trailing\n"; - let rules = parse(input).unwrap(); - assert_eq!(rules.len(), 1); - assert_eq!(rules[0].status, 301); - } - - #[test] - fn comment_after_rule_is_stripped() { - let r = parse_one("/old /new 301 # inline comment").unwrap(); - assert_eq!(r.from, RulePattern::Exact("/old".into())); - assert_eq!(r.to, "/new"); - assert_eq!(r.status, 301); - } - - #[test] - fn hash_inside_token_is_preserved_as_fragment() { - // `#` only begins a comment after whitespace — a fragment in the `to` - // token stays attached to the URL. - let r = parse_one("/from-topic /to/#topic 301").unwrap(); - assert_eq!(r.from, RulePattern::Exact("/from-topic".into())); - assert_eq!(r.to, "/to/#topic"); - assert_eq!(r.status, 301); - } - - #[test] - fn hash_after_tab_starts_a_comment() { - // Tab counts as whitespace for the purposes of comment detection, so - // `\t#trailing` is a comment regardless of which whitespace char - // preceded it. - let r = parse_one("/old /new 301\t#trailing").unwrap(); - assert_eq!(r.to, "/new"); - let r = parse_one("/old\t/new\t301\t#trailing").unwrap(); - assert_eq!(r.to, "/new"); - } - - #[test] - fn whitespace_is_permissive() { - // Mixed tabs/spaces between tokens. - let r = parse_one("/old\t \t/new \t 302").unwrap(); - assert_eq!(r.from, RulePattern::Exact("/old".into())); - assert_eq!(r.to, "/new"); - assert_eq!(r.status, 302); - } - - #[test] - fn each_supported_status_parses() { - for status in SUPPORTED_STATUSES { - let line = format!("/from /to {status}"); - let r = parse_one(&line).unwrap_or_else(|e| panic!("status {status}: {e}")); - assert_eq!(r.status, *status); - } - } - - #[test] - fn exact_pattern_is_default() { - let r = parse_one("/about /about.html 200").unwrap(); - assert_eq!(r.from, RulePattern::Exact("/about".into())); - } - - #[test] - fn trailing_star_lowers_to_subtree() { - let r = parse_one("/blog/* /blog/index.html 200").unwrap(); - assert_eq!(r.from, RulePattern::Subtree("/blog/".into())); - } - - #[test] - fn root_star_lowers_to_root_subtree() { - // `/*` matches the entire site; the lowered prefix is `/` so every - // request matches via `starts_with("/")`. - let r = parse_one("/* /index.html 200").unwrap(); - assert_eq!(r.from, RulePattern::Subtree("/".into())); - } - - #[test] - fn three_xx_accepts_fully_qualified_url() { - let r = parse_one("/legacy https://example.com/new 301").unwrap(); - assert_eq!(r.to, "https://example.com/new"); - assert_eq!(r.status, 301); - } - - #[test] - fn three_xx_accepts_absolute_path() { - let r = parse_one("/legacy /new 308").unwrap(); - assert_eq!(r.to, "/new"); - } - - #[test] - fn multiple_rules_preserve_order() { - let input = "\ -/a /a.html 200 -/old /new 301 -/gone /tombstone.html 410 -"; - let rules = parse(input).unwrap(); - let statuses: Vec = rules.iter().map(|r| r.status).collect(); - assert_eq!(statuses, vec![200, 301, 410]); - } - - // ── reject cases ────────────────────────────────────────────────────────── - - #[test] - fn rejects_too_few_fields() { - let e = err("/only-from"); - assert!(e.message.contains("3 fields"), "{}", e.message); - assert_eq!(e.line, 1); - } - - #[test] - fn rejects_too_many_fields() { - // 4 fields = extra token after status (would be inline headers in Netlify). - let e = err("/from /to 301 extra"); - assert!(e.message.contains("3 fields"), "{}", e.message); - assert!(e.message.contains("not supported"), "{}", e.message); - } - - #[test] - fn rejects_unsupported_status() { - let e = err("/from /to 418"); - assert!(e.message.contains("418"), "{}", e.message); - assert!(e.message.contains("200"), "{}", e.message); - } - - #[test] - fn rejects_non_integer_status() { - let e = err("/from /to redirect"); - assert!(e.message.contains("integer"), "{}", e.message); - } - - #[test] - fn rejects_netlify_force_suffix() { - let e = err("/from /to 301!"); - assert!(e.message.contains("'!'"), "{}", e.message); - } - - #[test] - fn rejects_splat_in_to() { - let e = err("/blog/* /archive/:splat 301"); - assert!(e.message.contains(":splat"), "{}", e.message); - } - - #[test] - fn rejects_placeholder_in_to() { - let e = err("/users/:id /people/:id 301"); - // The `:id` in `from` trips the ':' placeholder check first. - assert!(e.message.contains("placeholders"), "{}", e.message); - } - - #[test] - fn rejects_placeholder_in_to_with_safe_from() { - let e = err("/blog/* /archive/:placeholder 301"); - assert!(e.message.contains(":placeholder"), "{}", e.message); - } - - #[test] - fn rejects_relative_from() { - let e = err("relative /target 301"); - assert!(e.message.contains("absolute path"), "{}", e.message); - } - - #[test] - fn rejects_wildcard_in_from_not_at_end() { - let e = err("/blog/*/post /target 301"); - assert!(e.message.contains("trailing '/*'"), "{}", e.message); - } - - #[test] - fn rejects_relative_to_on_200() { - let e = err("/from to 200"); - assert!(e.message.contains("absolute asset path"), "{}", e.message); - } - - #[test] - fn rejects_relative_to_on_4xx() { - let e = err("/from target.html 404"); - assert!(e.message.contains("absolute asset path"), "{}", e.message); - } - - #[test] - fn rejects_unparseable_to_on_3xx() { - // Not absolute, not a URL — `Url::parse` rejects relative URLs. - let e = err("/from not-a-url 301"); - assert!(e.message.contains("absolute path"), "{}", e.message); - } - - #[test] - fn error_reports_line_number() { - let input = "\ -# comment -/good /good 301 -/bad /bad 999 -/another /another 302 -"; - let e = err(input); - assert_eq!(e.line, 3); - } - - #[test] - fn error_carries_source_line_for_display() { - // The plugin echoes parse errors verbatim to the user; the source line - // must be embedded so a line-number alone isn't the only hint. - let input = "\ -/good /good 301 -/incomplete /target -"; - let e = err(input); - assert_eq!(e.line, 2); - assert_eq!(e.source, "/incomplete /target"); - let rendered = format!("{e}"); - assert!(rendered.contains("/incomplete /target"), "{rendered}"); - assert!(rendered.contains("line 2"), "{rendered}"); - } -} diff --git a/crates/sync-core/src/scan.rs b/crates/sync-core/src/scan.rs index 32b4265..7ad1df8 100644 --- a/crates/sync-core/src/scan.rs +++ b/crates/sync-core/src/scan.rs @@ -1,89 +1,121 @@ -//! Scan a project's input directory for asset files. +//! Scan a project's input directory for asset files, applying the per-directory +//! `.ic-assets.json5` configuration. //! -//! Matches `ic-asset`'s traversal behaviour: only plain files are included; -//! dotfiles and all symlinks (to files or directories) are skipped. -//! Exception: `.well-known/` is traversed even though it starts with `.`, -//! mirroring `ic-asset`'s `KNOWN_DIRECTORIES` list. +//! Ported from `ic-asset`'s `gather_asset_descriptors` / `include_entry`: +//! - dotfiles and dotdirs are skipped unless a config rule re-includes them +//! (`"ignore": false`), with the exception of `KNOWN_DIRECTORIES` (`.well-known`), +//! - a rule's `"ignore": true` drops a path (and prunes a directory subtree), +//! - the `.ic-assets.json` / `.ic-assets.json5` files themselves are never uploaded, +//! - each surviving file carries its resolved [`AssetConfig`]. //! -//! Files whose names appear in `CONFIG_FILENAMES` (`_redirects`, etc.) are -//! consumed by the sync layer and excluded from the upload set. +//! Symlinks are not followed (walkdir default), matching `ic-asset`. -use crate::headers::HEADERS_FILENAME; -use crate::redirects::REDIRECTS_FILENAME; +use crate::config::{ + AssetConfig, AssetSourceDirectoryConfiguration, ASSETS_CONFIG_FILENAME_JSON, + ASSETS_CONFIG_FILENAME_JSON5, +}; use std::path::{Path, PathBuf}; +use walkdir::{DirEntry, WalkDir}; const KNOWN_DIRECTORIES: &[&str] = &[".well-known"]; -/// Filenames whose presence is configuration, not asset content. Loaded for -/// their side effects (redirect rules, header rules, etc.) and excluded from -/// the upload set. -const CONFIG_FILENAMES: &[&str] = &[REDIRECTS_FILENAME, HEADERS_FILENAME]; - #[derive(Debug)] pub struct AssetSource { pub path: PathBuf, pub key: String, + pub config: AssetConfig, } -/// Scans `dirs` for asset files. -pub fn scan(dirs: &[String]) -> Result, String> { - let mut out = Vec::new(); - let mut seen_keys = std::collections::HashSet::new(); - for dir in dirs { - let root = Path::new(dir); - let root_abs = root - .canonicalize() - .map_err(|e| format!("canonicalize {}: {e}", root.display()))?; - walk(&root_abs, &root_abs, &mut out, &mut seen_keys)?; +/// Builds an absolute root for `dir` (a manifest-relative directory the host +/// preopened) by prepending `/` and dropping `.` / redundant components. +/// +/// We deliberately avoid [`Path::canonicalize`] here. Under WASI it calls +/// `realpath`, which returns `ENOENT` ("No such file or directory") for *any* +/// path beneath a preopen whose guest name has more than one component (e.g. +/// `src/frontend/dist`) — even though ordinary access (`read_dir`, `metadata`, +/// `read`) through that preopen works fine. Single-component dirs like `dist` +/// happen to canonicalize to `/dist`; this helper produces the same shape +/// (`/src/frontend/dist`) for nested dirs without touching `realpath`, and gives +/// [`AssetSourceDirectoryConfiguration::load`] the absolute root it requires. +/// +/// The host guarantees `dir` is relative and free of `..` components, so keeping +/// only `Normal` components cannot escape the preopen. +fn absolute_root(dir: &str) -> PathBuf { + let mut root = PathBuf::from("/"); + for component in Path::new(dir).components() { + if let std::path::Component::Normal(c) = component { + root.push(c); + } } - Ok(out) + root } -fn walk( - root: &Path, - current: &Path, - out: &mut Vec, - seen_keys: &mut std::collections::HashSet, -) -> Result<(), String> { - let entries = - std::fs::read_dir(current).map_err(|e| format!("read_dir {}: {e}", current.display()))?; - for entry in entries { - let entry = entry.map_err(|e| format!("dir entry in {}: {e}", current.display()))?; - let name = entry.file_name(); - let name_str = name.to_string_lossy(); - let path = entry.path(); - let ft = entry - .file_type() - .map_err(|e| format!("file_type {}: {e}", path.display()))?; - - // Skip dotfiles / dotdirs (except known dirs like .well-known). - if name_str.starts_with('.') && !(ft.is_dir() && KNOWN_DIRECTORIES.contains(&&*name_str)) { - continue; - } - - // Skip config files (`_redirects` etc.) regardless of where they sit - // in the tree — they're consumed by the sync layer, not uploaded. - if ft.is_file() && CONFIG_FILENAMES.contains(&&*name_str) { - continue; - } +/// Scans `dirs` for asset files, resolving each file's `.ic-assets.json5` config. +pub fn scan(dirs: &[String]) -> Result, String> { + let mut out: Vec = Vec::new(); + let mut seen_keys = std::collections::HashSet::new(); - if ft.is_dir() { - walk(root, &path, out, seen_keys)?; - } else if ft.is_file() { - let relative = path - .strip_prefix(root) - .map_err(|e| format!("strip_prefix {}: {e}", path.display()))?; + for dir in dirs { + let root = absolute_root(dir); + let mut configuration = AssetSourceDirectoryConfiguration::load(&root)?; + + let entries: Vec = WalkDir::new(&root) + .into_iter() + .filter_entry(|entry| { + // The root itself is always traversed; pruning it (e.g. when the + // root dir name starts with `.`) would drop the whole tree. + if entry.depth() == 0 { + return true; + } + // `entry.path()` is already rooted at `root`, so it matches the + // config-map keys directly — no canonicalization needed. + let config = configuration + .get_asset_config(entry.path()) + .unwrap_or_default(); + include_entry(entry, &config) + }) + .filter_map(|r| r.ok()) + .filter(|entry| { + entry.file_type().is_file() + && entry.file_name() != ASSETS_CONFIG_FILENAME_JSON + && entry.file_name() != ASSETS_CONFIG_FILENAME_JSON5 + }) + .collect(); + + for entry in entries { + let source = entry.path().to_path_buf(); + let relative = source + .strip_prefix(&root) + .map_err(|e| format!("strip_prefix {}: {e}", source.display()))?; let key = format!("/{}", relative.to_string_lossy()); + let config = configuration.get_asset_config(&source)?; if !seen_keys.insert(key.clone()) { return Err(format!("duplicate asset key {key}")); } - out.push(AssetSource { path, key }); + out.push(AssetSource { + path: source, + key, + config, + }); } - // Symlinks (to files or directories) are skipped, matching ic-asset::sync. } - Ok(()) + Ok(out) +} + +/// Decides whether a walkdir entry is included, mirroring `ic-asset::include_entry`. +/// An explicit `ignore` rule wins; otherwise dotfiles/dotdirs are excluded unless +/// they are a known directory (e.g. `.well-known`). +fn include_entry(entry: &DirEntry, config: &AssetConfig) -> bool { + if let Some(ignored) = config.ignore { + !ignored + } else if let Some(entry_name) = entry.file_name().to_str() { + let is_known = entry.file_type().is_dir() && KNOWN_DIRECTORIES.contains(&entry_name); + is_known || !entry_name.starts_with('.') + } else { + true + } } #[cfg(test)] @@ -127,48 +159,67 @@ mod tests { fn dotfile_skipped() { let dir = tmp(); fs::write(dir.path().join(".hidden"), b"secret").unwrap(); - fs::write(dir.path().join(".gitignore"), b"*.tmp").unwrap(); fs::write(dir.path().join("visible.txt"), b"ok").unwrap(); let keys = sorted_keys(scan(&[dir_str(&dir)]).unwrap()); assert_eq!(keys, vec!["/visible.txt"]); } #[test] - fn redirects_file_skipped() { + fn config_file_skipped() { let dir = tmp(); - fs::write(dir.path().join(REDIRECTS_FILENAME), b"").unwrap(); + fs::write(dir.path().join(".ic-assets.json5"), b"[]").unwrap(); fs::write(dir.path().join("index.html"), b"hi").unwrap(); let keys = sorted_keys(scan(&[dir_str(&dir)]).unwrap()); assert_eq!(keys, vec!["/index.html"]); } #[test] - fn empty_directory() { + fn ignore_rule_excludes_file() { let dir = tmp(); - assert!(scan(&[dir_str(&dir)]).unwrap().is_empty()); + fs::write( + dir.path().join(".ic-assets.json5"), + br#"[{ "match": "secret.txt", "ignore": true }]"#, + ) + .unwrap(); + fs::write(dir.path().join("secret.txt"), b"x").unwrap(); + fs::write(dir.path().join("index.html"), b"y").unwrap(); + let keys = sorted_keys(scan(&[dir_str(&dir)]).unwrap()); + assert_eq!(keys, vec!["/index.html"]); } #[test] - fn duplicate_key_across_two_source_dirs() { - let dir1 = tmp(); - let dir2 = tmp(); - fs::write(dir1.path().join("index.html"), b"v1").unwrap(); - fs::write(dir2.path().join("index.html"), b"v2").unwrap(); - let err = scan(&[dir_str(&dir1), dir_str(&dir2)]).unwrap_err(); - assert!( - err.contains("/index.html"), - "error should name the key: {err}" - ); + fn reinclude_dotfile_via_config() { + let dir = tmp(); + fs::write( + dir.path().join(".ic-assets.json5"), + br#"[{ "match": ".env", "ignore": false }]"#, + ) + .unwrap(); + fs::write(dir.path().join(".env"), b"x").unwrap(); + let keys = sorted_keys(scan(&[dir_str(&dir)]).unwrap()); + assert_eq!(keys, vec!["/.env"]); } #[test] - fn multiple_source_dirs() { - let dir1 = tmp(); - let dir2 = tmp(); - fs::write(dir1.path().join("a.txt"), b"a").unwrap(); - fs::write(dir2.path().join("b.txt"), b"b").unwrap(); - let keys = sorted_keys(scan(&[dir_str(&dir1), dir_str(&dir2)]).unwrap()); - assert_eq!(keys, vec!["/a.txt", "/b.txt"]); + fn config_attaches_to_source() { + let dir = tmp(); + fs::write( + dir.path().join(".ic-assets.json5"), + br#"[{ "match": "*.html", "headers": { "X-Foo": "bar" } }]"#, + ) + .unwrap(); + fs::write(dir.path().join("index.html"), b"x").unwrap(); + let sources = scan(&[dir_str(&dir)]).unwrap(); + let src = sources.iter().find(|s| s.key == "/index.html").unwrap(); + assert_eq!( + src.config + .headers + .as_ref() + .unwrap() + .get("X-Foo") + .map(String::as_str), + Some("bar") + ); } #[test] @@ -181,16 +232,22 @@ mod tests { assert_eq!(keys, vec!["/.well-known/ic-domains", "/index.html"]); } - // Symlinks are skipped regardless of target type, matching ic-asset::sync. - #[cfg(unix)] #[test] - fn symlink_skipped() { + fn empty_directory() { let dir = tmp(); - let target = dir.path().join("real.txt"); - fs::write(&target, b"content").unwrap(); - let link = dir.path().join("link.txt"); - std::os::unix::fs::symlink(&target, &link).unwrap(); - let keys = sorted_keys(scan(&[dir_str(&dir)]).unwrap()); - assert_eq!(keys, vec!["/real.txt"]); + assert!(scan(&[dir_str(&dir)]).unwrap().is_empty()); + } + + #[test] + fn duplicate_key_across_two_source_dirs() { + let dir1 = tmp(); + let dir2 = tmp(); + fs::write(dir1.path().join("index.html"), b"v1").unwrap(); + fs::write(dir2.path().join("index.html"), b"v2").unwrap(); + let err = scan(&[dir_str(&dir1), dir_str(&dir2)]).unwrap_err(); + assert!( + err.contains("/index.html"), + "error should name the key: {err}" + ); } } diff --git a/crates/sync-core/src/security_policy.rs b/crates/sync-core/src/security_policy.rs new file mode 100644 index 0000000..9e12e29 --- /dev/null +++ b/crates/sync-core/src/security_policy.rs @@ -0,0 +1,82 @@ +//! Content-Security-Policy presets, ported from `ic-asset`'s `security_policy.rs`. +//! +//! `.ic-assets.json5` may set `"security_policy": "standard" | "hardened" | +//! "disabled"` as shorthand for a curated set of response headers. We keep the +//! header generation (`to_headers`) and the `Display` impl; the dfx-only json5 +//! pretty-printer is dropped. + +use crate::config::HeadersConfig; +use serde::{Deserialize, Serialize}; +use std::fmt::Display; + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)] +#[serde(rename_all = "lowercase")] +/// Asset synchronization will warn if insufficient security headers are set. +/// To help with security headers these options are provided, which can be set +/// in `.ic-assets.json5` with the `"security_policy"` field. +pub enum SecurityPolicy { + /// No security policy provided by asset sync. + Disabled, + /// The default security policy that will work for most dapps but could be + /// more secure. Asset sync will still warn that it could be hardened. + Standard, + /// Use the default security policy with custom improvements. Same as + /// `Standard`, but disables the "could be hardened" warning. + Hardened, +} + +struct ConcreteSecurityPolicy { + /// (header_name, header_content) + headers: Vec<(&'static str, &'static str)>, +} + +impl ConcreteSecurityPolicy { + fn to_headers(&self) -> HeadersConfig { + self.headers + .iter() + .map(|(name, content)| (name.to_string(), content.to_string())) + .collect() + } +} + +impl SecurityPolicy { + fn to_policy(self) -> ConcreteSecurityPolicy { + match self { + SecurityPolicy::Disabled => ConcreteSecurityPolicy { headers: vec![] }, + SecurityPolicy::Standard | SecurityPolicy::Hardened => ConcreteSecurityPolicy { + headers: vec![ + ( + "Content-Security-Policy", + "default-src 'self';script-src 'self';connect-src 'self' http://localhost:* https://icp0.io https://*.icp0.io https://icp-api.io;img-src 'self' data:;style-src * 'unsafe-inline';style-src-elem * 'unsafe-inline';font-src *;object-src 'none';base-uri 'self';frame-ancestors 'none';form-action 'self';upgrade-insecure-requests;", + ), + ( + "Permissions-Policy", + "accelerometer=(), ambient-light-sensor=(), autoplay=(), battery=(), camera=(), cross-origin-isolated=(), display-capture=(), document-domain=(), encrypted-media=(), execution-while-not-rendered=(), execution-while-out-of-viewport=(), fullscreen=(), geolocation=(), gyroscope=(), keyboard-map=(), magnetometer=(), microphone=(), midi=(), navigation-override=(), payment=(), picture-in-picture=(), publickey-credentials-get=(), screen-wake-lock=(), sync-xhr=(), usb=(), web-share=(), xr-spatial-tracking=(), clipboard-read=(), clipboard-write=(), gamepad=(), speaker-selection=(), conversion-measurement=(), focus-without-user-activation=(), hid=(), idle-detection=(), interest-cohort=(), serial=(), sync-script=(), trust-token-redemption=(), window-placement=(), vertical-scroll=()", + ), + ("X-Frame-Options", "DENY"), + ("Referrer-Policy", "same-origin"), + ( + "Strict-Transport-Security", + "max-age=31536000; includeSubDomains", + ), + ("X-Content-Type-Options", "nosniff"), + ("X-XSS-Protection", "1; mode=block"), + ], + }, + } + } + + pub(crate) fn to_headers(self) -> HeadersConfig { + self.to_policy().to_headers() + } +} + +impl Display for SecurityPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SecurityPolicy::Disabled => write!(f, "disabled"), + SecurityPolicy::Standard => write!(f, "standard"), + SecurityPolicy::Hardened => write!(f, "hardened"), + } + } +} diff --git a/crates/sync-core/src/sync.rs b/crates/sync-core/src/sync.rs index dfbe92c..83202d2 100644 --- a/crates/sync-core/src/sync.rs +++ b/crates/sync-core/src/sync.rs @@ -1,8 +1,12 @@ -//! Orchestrates: load assets, diff against canister, upload chunks, commit batch. +//! Orchestrates: load assets + `.ic-assets.json5` config, diff against the +//! legacy asset canister, upload chunks, commit batch. //! -//! V2-only port of `ic-asset`'s `sync` flow, simplified: -//! - synchronous (drives the host's sync `canister-call` import) -//! - no proposal mode +//! Port of `ic-asset`'s sync flow targeting the dfx 0.32.0 `assetstorage` +//! canister (`api_version == 2`), simplified: +//! - synchronous (drives the host's sync `canister-call` import), +//! - no proposal mode, +//! - no redirect rules / `_headers` / `_redirects` (the legacy canister has no +//! `SetRedirectRules`); per-asset metadata comes from `.ic-assets.json5`. use candid::{Nat, Principal}; use mime::Mime; @@ -10,17 +14,14 @@ use std::collections::HashMap; use crate::canister::{ api_version, commit_batch, create_batch, create_chunks, get_asset_properties, - get_redirect_rules, grant_permission_via_proxy, list_assets, list_permitted, AssetDetails, - AssetProperties, BatchOperationKind, CanisterCall, CommitBatchArguments, CreateAssetArguments, - DeleteAssetArguments, Permission, RedirectRule, SetAssetContentArguments, - SetAssetPropertiesArguments, SetRedirectRulesArguments, UnsetAssetContentArguments, + grant_permission_via_proxy, list_assets, list_permitted, AssetDetails, AssetProperties, + BatchOperationKind, CanisterCall, CommitBatchArguments, CreateAssetArguments, + DeleteAssetArguments, Permission, SetAssetContentArguments, SetAssetPropertiesArguments, + UnsetAssetContentArguments, }; +use crate::config::{AssetConfig, HeadersConfig}; use crate::content::{encoders_for, Content, Encoder}; -use crate::headers::{self, HeaderRule, HEADERS_FILENAME}; -use crate::html_handling; -use crate::redirects::{self, REDIRECTS_FILENAME}; use crate::scan::AssetSource; -use std::path::Path; // Stay safely under the canister's ingress message limit (~2 MB). const MAX_CHUNK_SIZE: usize = 1_900_000; @@ -39,6 +40,12 @@ struct ProjectAsset { encodings: HashMap, } +impl ProjectAsset { + fn config(&self) -> &AssetConfig { + &self.source.config + } +} + /// Ensures the signing identity has `Commit` permission on the assets canister. /// /// Called only in proxy mode. Queries the current `Commit` permission list and, @@ -70,29 +77,18 @@ pub fn sync( identity_principal: &str, proxy_canister_id: Option<&str>, ) -> Result { - // The assets plugin owns the URL space of its canister: every key starts at - // `/`, `_redirects` lives at the project root, and the canister has no - // notion of "merge two trees together". Multiple input directories would - // produce ambiguous redirect-file precedence and quietly hide key - // collisions, so the contract is exactly one directory. - let dir = match dirs { - [d] => d, - _ => { - return Err(format!( - "assets sync plugin: expected exactly one input directory, got {}", - dirs.len() - )) - } - }; + if dirs.is_empty() { + return Err("assets sync plugin: expected at least one input directory, got 0".to_string()); + } - if let Some(_proxy) = proxy_canister_id { + if proxy_canister_id.is_some() { ensure_commit_permission(canister, identity_principal)?; } let version = api_version(canister)?; if version < 2 { return Err(format!( - "assets canister api_version is {version}; this plugin requires V2" + "assets canister api_version is {version}; this plugin requires V2 (dfx 0.32.0 assetstorage)" )); } println!("api_version: {version}"); @@ -100,65 +96,16 @@ pub fn sync( let sources = crate::scan::scan(dirs)?; println!("found {} file(s) from {:?}", sources.len(), dirs); - // Synthesised CF `auto-trailing-slash` rules first, then the user's - // `_redirects`. The canister matches rules in declaration order, so this - // makes the html-handling defaults win at the exact paths they cover and - // lets user rules catch what's left (e.g. a SPA-style `/* /404.html 404` - // catch-all only fires for paths the html_handling defaults don't claim). - // - // The reason synth must come first is also a certification correctness - // requirement: if a user subtree rule like `/*` is declared before the - // synthesised Exact rules, the user rule wins at request time and the - // canister returns a wildcard expression path (`["http_expr", "<*>"]`), - // while the synthesised Exact entries (e.g. `["http_expr", "index", "<$>"]`) - // still sit in the certified tree. The HTTP gateway's verifier then - // rejects the response with "wildcard expression path provided, but a - // potential exact expression path exists in the tree" and returns 503. - // Putting synth first keeps responses on the Exact path whenever an Exact - // entry exists. - // - // Synthesis is keyed off the scanned asset keys; nothing in the project - // has uploaded yet, so this is the authoritative HTML set. - let user_rules = load_redirect_rules(dir)?; - println!( - "parsed {} redirect rule(s) from _redirects", - user_rules.len() - ); - - let asset_keys: Vec = sources.iter().map(|s| s.key.clone()).collect(); - let synthesised = html_handling::synthesize(&asset_keys); - if !synthesised.is_empty() { - println!( - "synthesised {} html-handling rule(s) for {} html asset(s)", - synthesised.len(), - asset_keys.iter().filter(|k| k.ends_with(".html")).count(), - ); - } - let mut project_rules = synthesised; - project_rules.extend(user_rules); - - let project_header_rules = load_header_rules(dir)?; - println!( - "parsed {} header rule(s) from _headers", - project_header_rules.len() - ); - let canister_assets: HashMap = list_assets(canister)? .into_iter() .map(|d| (d.key.clone(), d)) .collect(); println!("canister currently has {} asset(s)", canister_assets.len()); - let canister_rules = get_redirect_rules(canister)?; - println!( - "canister currently has {} redirect rule(s)", - canister_rules.len() - ); - // Phase 1: compute metadata only — no batch created yet. let mut project_assets: HashMap = HashMap::new(); for source in sources { - let asset = prepare_asset(source, &project_header_rules, &canister_assets)?; + let asset = prepare_asset(source, &canister_assets)?; project_assets.insert(asset.source.key.clone(), asset); } @@ -181,9 +128,6 @@ pub fn sync( &project_assets, &canister_assets, &canister_asset_properties, - &project_rules, - &canister_rules, - &project_header_rules, ) .is_empty() { @@ -204,9 +148,6 @@ pub fn sync( &project_assets, &canister_assets, &canister_asset_properties, - &project_rules, - &canister_rules, - &project_header_rules, ); println!("committing {} operation(s)", operations.len()); @@ -220,27 +161,30 @@ pub fn sync( fn prepare_asset( source: AssetSource, - header_rules: &[HeaderRule], canister_assets: &HashMap, ) -> Result { - let mut content = Content::load(&source.path)?; - // Apply per-glob `Content-Type` override from `_headers` before deciding - // encoders or computing the asset's stored media type. Routing through - // `content.media_type` is what makes a `.did` file declared as - // `text/plain` pick up gzip compression and surface the correct - // `Content-Type` from the canister's certified response. - if let Some(override_mime) = headers::content_type_for(&source.key, header_rules) { - content.media_type = override_mime; - } - // gzip for text/* and js/html, identity for everything else. - let encoders: Vec = encoders_for(&content.media_type); + let content = Content::load(&source.path)?; + + // Per-asset `encodings` override from `.ic-assets.json5`, else the default + // gzip-for-text policy. When identity is not in the list, `force_encoding` + // keeps the alternate encoding even if it doesn't shrink the bytes (mirrors + // ic-asset's plumbing). + let encoders: Vec = source + .config + .encodings + .clone() + .unwrap_or_else(|| encoders_for(&content.media_type)); + let force_encoding = !encoders.contains(&Encoder::Identity); let mut encodings: HashMap = HashMap::new(); for encoder in encoders { let encoded = content.encode(encoder)?; // Identity is always uploaded. Alternate encodings only get uploaded if - // they save bytes vs. identity. - if encoder != Encoder::Identity && encoded.data.len() >= content.data.len() { + // they save bytes vs. identity, unless forced (no identity to compare). + if encoder != Encoder::Identity + && !force_encoding + && encoded.data.len() >= content.data.len() + { continue; } let name = encoder.name().to_string(); @@ -314,14 +258,36 @@ fn encoding_suffix(encoding: &str) -> String { } } +/// Sorted `Vec` view of a `HeadersConfig` for the canister wire type. A +/// `BTreeMap` already iterates in key order, so this is sorted by header name. +fn headers_to_vec(h: HeadersConfig) -> Vec<(String, String)> { + h.into_iter().collect() +} + +/// The legacy canister injects a `Set-Cookie: ic_env=...` header into every +/// HTML asset's stored headers on each asset change (see ic-certified-assets' +/// `add_ic_env_cookie`, driven by icp-cli's env-var step). The canister +/// re-adds it automatically, so the plugin must not treat it as drift or try to +/// own it. Normalises a canister/project headers value for comparison by +/// dropping that cookie, sorting, and collapsing an empty map to `None`. +fn normalize_headers(headers: Option>) -> Option> { + let mut v: Vec<(String, String)> = headers + .unwrap_or_default() + .into_iter() + .filter(|(k, val)| !(k.eq_ignore_ascii_case("Set-Cookie") && val.starts_with("ic_env="))) + .collect(); + v.sort(); + if v.is_empty() { + None + } else { + Some(v) + } +} + /// Pack-and-upload pass: collect every chunk from every not-yet-uploaded /// encoding across all assets, then ship them in `create_chunks` calls of up /// to `MAX_CHUNK_SIZE` total bytes each. /// -/// This is where the wall-clock win lives versus the old "one chunk per call" -/// pattern: a project of 100 small files used to make 100 round-trips; now -/// they ride in a single call (≈1.9 MB budget). -/// /// Routing is by `(asset_key, encoding, chunk_index)`: each `PendingChunk` /// remembers where its eventual canister id should land in /// `enc.chunk_ids[chunk_index]`. @@ -369,8 +335,7 @@ fn pack_and_upload_chunks( } // First-fit-decreasing: sort descending, then in each pass take every - // chunk that still fits under MAX_CHUNK_SIZE. Anything that doesn't fit - // stays in `pending` for the next pass. + // chunk that still fits under MAX_CHUNK_SIZE. pending.sort_by_key(|b| std::cmp::Reverse(b.data.len())); let total_chunks = pending.len(); @@ -421,24 +386,12 @@ fn pack_and_upload_chunks( /// Commits `operations` to the canister, splitting them across multiple /// `commit_batch` ingress calls when a single payload would exceed the IC's -/// 2 MiB per-message ingress limit on application subnets -/// (`MAX_INGRESS_BYTES_PER_MESSAGE_APP_SUBNET` in `dfinity/ic`). The local -/// replica's HTTP boundary accepts up to 4 MiB, but mainnet app subnets cap -/// the inner ingress message at 2 MiB — so we target the tighter limit. +/// 2 MiB per-message ingress limit on application subnets. /// /// Intermediate calls use `batch_id = 0` as a placeholder; the canister's -/// `commit_batch` does not validate that `batch_id` refers to a live batch -/// — it just consumes `chunk_ids` referenced by `SetAssetContent` ops and -/// removes `batch_id` from its batch table at the very end. The chunks -/// uploaded under the real `batch_id` survive between calls because the -/// canister only GCs orphaned chunks at `create_batch` time, never inside -/// `commit_batch`. The trailing call uses the real `batch_id` with empty -/// operations purely to release that batch entry. -/// -/// Trade-off: splitting forfeits cross-batch atomicity. A failure -/// mid-deploy leaves the canister with the operations from previously -/// successful calls applied; the next sync run diffs against the canister -/// and resumes from there. +/// `commit_batch` consumes the `chunk_ids` referenced by `SetAssetContent` ops +/// and only removes the real `batch_id` entry at the end. The trailing call +/// uses the real `batch_id` with empty operations purely to release that entry. fn commit_in_stages( canister: &C, batch_id: Nat, @@ -446,9 +399,6 @@ fn commit_in_stages( ) -> Result<(), String> { let groups = create_commit_batches(operations); if groups.len() <= 1 { - // Everything fits in one ingress message: skip the placeholder - // dance and commit directly under the real `batch_id`, which also - // releases the batch entry in the same call. let ops = groups.into_iter().next().unwrap_or_default(); return commit_batch( canister, @@ -474,8 +424,7 @@ fn commit_in_stages( }, )?; } - // Empty-ops commit on the real batch_id: the canister's - // "all operations processed" branch removes the batch entry. + // Empty-ops commit on the real batch_id: the canister removes the batch entry. commit_batch( canister, CommitBatchArguments { @@ -486,25 +435,8 @@ fn commit_in_stages( } /// Splits `operations` into groups, each small enough that a single -/// `commit_batch` ingress call stays under the IC's 2 MiB per-message -/// limit on application subnets. Greedy in declaration order: walks -/// operations once, starting a new group whenever the running totals -/// would exceed either budget. -/// -/// Budgets per group: -/// - **500 operations** — bounds the certified-tree work each -/// `commit_batch` does and limits the blast radius of a mid-deploy -/// failure. -/// - **1.5 MiB of inlined header bytes** — leaves ~500 KiB of headroom -/// under the 2 MiB ingress cap for fixed per-op overhead (keys, -/// chunk_ids, sha256s, variant tags, request envelope). Header bytes -/// are the only variable-sized per-op field and are where real-world -/// overruns come from — a multi-kilobyte `Content-Security-Policy` -/// from `_headers` gets attached to every asset's `CreateAsset` and -/// to every 3xx rule inside `SetRedirectRules`. -/// -/// An operation whose own header size exceeds the budget gets a group -/// to itself — better to ship it alone than to drop it on the floor. +/// `commit_batch` ingress call stays under the IC's 2 MiB per-message limit. +/// Budgets per group: 500 operations and 1.5 MiB of inlined header bytes. fn create_commit_batches(operations: Vec) -> Vec> { const MAX_OPERATIONS_PER_GROUP: usize = 500; const MAX_HEADER_BYTES_PER_GROUP: usize = 1_500_000; @@ -532,13 +464,6 @@ fn create_commit_batches(operations: Vec) -> Vec usize { fn sum(headers: &[(String, String)]) -> usize { headers.iter().map(|(k, v)| k.len() + v.len()).sum() @@ -548,11 +473,6 @@ fn header_bytes_of(op: &BatchOperationKind) -> usize { BatchOperationKind::SetAssetProperties(a) => { a.headers.as_ref().and_then(|h| h.as_deref()).map_or(0, sum) } - BatchOperationKind::SetRedirectRules(a) => a - .rules - .iter() - .map(|r| r.headers.as_deref().map_or(0, sum)) - .sum(), BatchOperationKind::Clear(_) | BatchOperationKind::DeleteAsset(_) | BatchOperationKind::UnsetAssetContent(_) @@ -564,9 +484,6 @@ fn build_operations( project_assets: &HashMap, canister_assets: &HashMap, canister_asset_properties: &HashMap, - project_rules: &[RedirectRule], - canister_rules: &[RedirectRule], - project_header_rules: &[HeaderRule], ) -> Vec { let mut ops = Vec::new(); let mut canister_assets = canister_assets.clone(); @@ -590,18 +507,19 @@ fn build_operations( canister_assets.remove(&k); } - // 2. Create new assets (those not present after deletions). Per-asset - // headers come from resolving the project's `_headers` rules against - // each new key; max_age and allow_raw_access fall back to defaults. + // 2. Create new assets. Per-asset metadata comes from `.ic-assets.json5`. for (key, pa) in project_assets { if !canister_assets.contains_key(key) { - let resolved = headers::resolve(key, project_header_rules); + let config = pa.config(); + let max_age = config.cache.as_ref().and_then(|c| c.max_age); + let headers = config.combined_headers().map(headers_to_vec); ops.push(BatchOperationKind::CreateAsset(CreateAssetArguments { key: key.clone(), content_type: pa.media_type.to_string(), - max_age: None, - headers: (!resolved.is_empty()).then_some(resolved), - allow_raw_access: Some(true), + max_age, + headers, + enable_aliasing: config.enable_aliasing, + allow_raw_access: config.allow_raw_access, })); } } @@ -647,150 +565,84 @@ fn build_operations( project_assets, &canister_assets, canister_asset_properties, - project_header_rules, ); - // 6. Replace-all the canister's redirect rules when they differ from the - // parsed `_redirects`. Comparison is order-sensitive — rules are - // matched in declaration order at request time, so reordering is a - // semantic change. - // - // 3xx rules synthesize their response (no target asset), so the - // canister has no headers to inherit from. Populate `RedirectRule.headers` - // by resolving `_headers` against the rule's `from` pattern. 200/4xx - // rules borrow headers from their target asset, so no plumbing here. - let project_rules_with_headers: Vec = project_rules - .iter() - .map(|rule| { - let mut rule = rule.clone(); - if is_3xx(rule.status) { - let key = redirect_pattern_to_key(&rule.from); - let resolved = headers::resolve(&key, project_header_rules); - if !resolved.is_empty() { - rule.headers = Some(resolved); - } - } - rule - }) - .collect(); - if project_rules_with_headers != canister_rules { - ops.push(BatchOperationKind::SetRedirectRules( - SetRedirectRulesArguments { - rules: project_rules_with_headers, - }, - )); - } - ops } -fn is_3xx(status: u16) -> bool { - (300..400).contains(&status) -} - -/// Returns a path-like key suitable for running the header resolver against a -/// redirect rule's `from`. Exact patterns yield the path itself; subtree -/// patterns yield the prefix, so only header rules that subsume the subtree -/// (the same or a broader subtree) match — narrower or unrelated patterns are -/// rejected by the resolver's `starts_with` check. -fn redirect_pattern_to_key(pattern: &crate::canister::RulePattern) -> String { - match pattern { - crate::canister::RulePattern::Exact(p) => p.clone(), - crate::canister::RulePattern::Subtree(prefix) => prefix.clone(), - } -} - -/// Reads `_redirects` from the project's input directory, if present. A -/// missing file is treated as "no rules"; parse errors carry the file's -/// path and 1-based line number so users can fix issues without a canister -/// round-trip. -fn load_redirect_rules(dir: &str) -> Result, String> { - let path = Path::new(dir).join(REDIRECTS_FILENAME); - if !path.exists() { - return Ok(Vec::new()); - } - let content = - std::fs::read_to_string(&path).map_err(|e| format!("read {}: {e}", path.display()))?; - redirects::parse(&content).map_err(|e| format!("{}: {e}", path.display())) -} - -// For each asset that already exists on the canister, reset any per-asset -// properties (`max_age`, `headers`, `allow_raw_access`) that drifted from the -// project config. Newly-created assets get the same values via -// `CreateAssetArguments`, so we don't emit `SetAssetProperties` for them. -// -// Headers are resolved from `_headers` per-key; everything else falls back to -// plugin defaults (None / Some(true)). -// -// `canister_assets` is the post-deletion view: keys removed in step 1 (missing -// from the project, or content_type drift forcing delete-then-create) are -// absent. Skipping those keys here avoids emitting a redundant -// `SetAssetProperties` op for an asset whose properties are already being set -// by `CreateAssetArguments` in this same batch. +/// For each asset that already exists on the canister, reset any per-asset +/// properties (`max_age`, `headers`, `allow_raw_access`, `is_aliased`) that +/// drifted from the `.ic-assets.json5` config. Newly-created assets get the +/// same values via `CreateAssetArguments`, so we don't emit `SetAssetProperties` +/// for them. Mirrors `ic-asset`'s `update_properties`. +/// +/// `canister_assets` is the post-deletion view: keys removed in step 1 are +/// absent, so we skip emitting a redundant op for a key being recreated. fn update_properties( ops: &mut Vec, project_assets: &HashMap, canister_assets: &HashMap, canister_asset_properties: &HashMap, - project_header_rules: &[HeaderRule], ) { - for key in project_assets.keys() { + for (key, pa) in project_assets { if !canister_assets.contains_key(key) { continue; } let Some(canister_props) = canister_asset_properties.get(key) else { continue; }; + let config = pa.config(); - let max_age = canister_props.max_age.is_some().then_some(None); + let max_age = { + let project = config.cache.as_ref().and_then(|c| c.max_age); + (project != canister_props.max_age).then_some(project) + }; - let resolved = headers::resolve(key, project_header_rules); - let expected_headers = (!resolved.is_empty()).then_some(resolved); - let headers = if canister_props.headers != expected_headers { - Some(expected_headers) - } else { - None + let headers = { + let project = normalize_headers(config.combined_headers().map(headers_to_vec)); + let canister = normalize_headers(canister_props.headers.clone()); + (project != canister).then_some(project) }; - let allow_raw_access = - (canister_props.allow_raw_access != Some(true)).then_some(Some(true)); + let is_aliased = { + let project = config.enable_aliasing; + (project != canister_props.is_aliased).then_some(project) + }; - if max_age.is_some() || headers.is_some() || allow_raw_access.is_some() { + let allow_raw_access = { + let project = config.allow_raw_access; + (project != canister_props.allow_raw_access).then_some(project) + }; + + if max_age.is_some() + || headers.is_some() + || is_aliased.is_some() + || allow_raw_access.is_some() + { ops.push(BatchOperationKind::SetAssetProperties( SetAssetPropertiesArguments { key: key.clone(), max_age, headers, allow_raw_access, + is_aliased, }, )); } } } -/// Reads `_headers` from the project's input directory, if present. A missing -/// file is treated as "no rules"; parse errors carry the file's path and -/// 1-based line number so users can fix issues without a canister round-trip. -fn load_header_rules(dir: &str) -> Result, String> { - let path = Path::new(dir).join(HEADERS_FILENAME); - if !path.exists() { - return Ok(Vec::new()); - } - let content = - std::fs::read_to_string(&path).map_err(|e| format!("read {}: {e}", path.display()))?; - headers::parse(&content).map_err(|e| format!("{}: {e}", path.display())) -} - #[cfg(test)] mod tests { use super::*; use crate::canister::{ AssetDetails, AssetEncodingDetails, BatchOperationKind, CallType, CanisterCall, }; + use crate::config::{AssetConfig, CacheConfig}; use candid::{CandidType, Nat, Principal}; use serde::de::DeserializeOwned; use std::cell::{Cell, RefCell}; - use std::collections::{HashMap, VecDeque}; + use std::collections::{BTreeMap, HashMap, VecDeque}; use std::path::PathBuf; // Mirrors the private CreateChunksResponse — same field name produces the same Candid encoding. @@ -800,9 +652,7 @@ mod tests { } // Counts each `create_chunks` call, returns one fresh id per chunk in the - // request, and records the batch sizes the packer produced. Used to verify - // that `pack_and_upload_chunks` collapses many small chunks into single - // calls and assigns canister ids to the right encoding slots. + // request, and records the batch sizes the packer produced. struct ChunkBatchRecorder { next_id: Cell, batches: RefCell>, // chunks-per-batch @@ -845,6 +695,14 @@ mod tests { } } + fn mk_source(key: &str) -> AssetSource { + AssetSource { + path: PathBuf::from(key.trim_start_matches('/')), + key: key.to_string(), + config: AssetConfig::default(), + } + } + fn mk_pending_asset(key: &str, encoding: &str, data: Vec) -> (String, ProjectAsset) { let mut enc_map = HashMap::new(); enc_map.insert( @@ -859,10 +717,7 @@ mod tests { ( key.to_string(), ProjectAsset { - source: AssetSource { - path: PathBuf::from(key.trim_start_matches('/')), - key: key.to_string(), - }, + source: mk_source(key), media_type: "application/octet-stream".parse().unwrap(), encodings: enc_map, }, @@ -871,7 +726,6 @@ mod tests { #[test] fn pack_uploads_one_full_chunk_per_call() { - // A single MAX-sized encoding ships in one call carrying one chunk. let mut assets = HashMap::from([mk_pending_asset( "/f", "identity", @@ -885,9 +739,6 @@ mod tests { #[test] fn pack_splits_oversized_encoding_into_max_chunks() { - // MAX*3 + 1 bytes → 4 chunks: three at MAX, one at 1 byte. Each MAX - // chunk fills its own call; the trailing 1-byte chunk gets its own - // call too because nothing else is left to share with it. let mut assets = HashMap::from([mk_pending_asset( "/big", "identity", @@ -901,8 +752,6 @@ mod tests { #[test] fn pack_collapses_many_small_chunks_into_one_call() { - // 100 × 1KB chunks fit comfortably under MAX_CHUNK_SIZE (~1.9 MB) → - // one call carrying all 100 chunks. This is the optimisation. let mut assets: HashMap = (0..100) .map(|i| mk_pending_asset(&format!("/f{i}"), "identity", vec![0u8; 1024])) .collect(); @@ -911,38 +760,8 @@ mod tests { assert_eq!(*mock.batches.borrow(), vec![100]); } - #[test] - fn pack_packs_full_chunk_alone_then_packs_remaining_smalls() { - // One MAX-sized asset + many tiny assets. FFD puts the MAX chunk in - // its own call (nothing else fits), then packs the small chunks - // together. - let mut assets: HashMap = HashMap::new(); - assets.extend([mk_pending_asset( - "/big", - "identity", - vec![0u8; MAX_CHUNK_SIZE], - )]); - for i in 0..10 { - assets.extend([mk_pending_asset( - &format!("/tiny{i}"), - "identity", - vec![0u8; 1024], - )]); - } - let mock = ChunkBatchRecorder::new(); - pack_and_upload_chunks(&mock, &Nat::from(1u32), &mut assets).unwrap(); - // Two calls total: one for the big chunk, one for the ten tinies. - let batches = mock.batches.borrow().clone(); - assert_eq!(batches.len(), 2); - assert!(batches.contains(&1)); // big chunk on its own - assert!(batches.contains(&10)); // 10 tinies packed together - } - #[test] fn pack_routes_chunk_ids_to_correct_encoding_slot() { - // Two assets, multi-chunk each. After upload, every encoding's - // chunk_ids vec must be filled (no default zeros remaining) and - // ids must be distinct. let mut assets = HashMap::from([ mk_pending_asset("/a", "identity", vec![0u8; MAX_CHUNK_SIZE + 100]), // 2 chunks mk_pending_asset("/b", "identity", vec![0u8; 500]), // 1 chunk @@ -953,19 +772,18 @@ mod tests { let b_ids = &assets["/b"].encodings["identity"].chunk_ids; assert_eq!(a_ids.len(), 2); assert_eq!(b_ids.len(), 1); - let mut all: Vec<&Nat> = a_ids.iter().chain(b_ids.iter()).collect(); - all.sort_by(|x, y| { - // Nat doesn't impl Ord; compare textually. - x.to_string().cmp(&y.to_string()) - }); + let mut all: Vec = a_ids + .iter() + .chain(b_ids.iter()) + .map(|n| n.to_string()) + .collect(); + all.sort(); all.dedup(); assert_eq!(all.len(), 3, "ids must be distinct"); } #[test] fn pack_empty_encoding_still_gets_one_chunk_id() { - // A zero-byte encoding still needs a chunk_id so SetAssetContent - // has something to reference. let mut assets = HashMap::from([mk_pending_asset("/empty", "identity", vec![])]); let mock = ChunkBatchRecorder::new(); pack_and_upload_chunks(&mock, &Nat::from(1u32), &mut assets).unwrap(); @@ -974,7 +792,6 @@ mod tests { #[test] fn pack_skips_already_in_place_encodings() { - // Nothing to upload → no calls made. let (k, mut pa) = mk_pending_asset("/skip", "identity", vec![0u8; 100]); pa.encodings.get_mut("identity").unwrap().already_in_place = true; pa.encodings.get_mut("identity").unwrap().data = Vec::new(); @@ -987,7 +804,6 @@ mod tests { // ── commit_batch splitting ────────────────────────────────────────────── fn create_asset_with_headers(key: &str, hdr_bytes: usize) -> BatchOperationKind { - // One header whose name+value bytes sum to `hdr_bytes`. let name = "X-Pad".to_string(); let value = "a".repeat(hdr_bytes.saturating_sub(name.len())); BatchOperationKind::CreateAsset(CreateAssetArguments { @@ -995,6 +811,7 @@ mod tests { content_type: "text/plain".to_string(), max_age: None, headers: Some(vec![(name, value)]), + enable_aliasing: None, allow_raw_access: Some(true), }) } @@ -1015,6 +832,18 @@ mod tests { assert_eq!(header_bytes_of(&op), 1000); } + #[test] + fn header_bytes_of_counts_set_asset_properties_headers() { + let op = BatchOperationKind::SetAssetProperties(SetAssetPropertiesArguments { + key: "/k".to_string(), + max_age: None, + headers: Some(Some(vec![("X-A".into(), "1".into())])), // 4 bytes + allow_raw_access: None, + is_aliased: None, + }); + assert_eq!(header_bytes_of(&op), 4); + } + #[test] fn header_bytes_of_returns_zero_for_headerless_kinds() { assert_eq!(header_bytes_of(&set_content_op("/k")), 0); @@ -1024,64 +853,10 @@ mod tests { })), 0 ); - assert_eq!( - header_bytes_of(&BatchOperationKind::UnsetAssetContent( - UnsetAssetContentArguments { - key: "/k".to_string(), - content_encoding: "identity".to_string(), - } - )), - 0 - ); - } - - #[test] - fn header_bytes_of_sums_redirect_rule_headers() { - // SetRedirectRules: only 3xx rules carry inlined headers; sum across rules. - let rules = vec![ - RedirectRule { - from: crate::canister::RulePattern::Exact("/a".into()), - to: "/b".into(), - status: 301, - headers: Some(vec![("X-A".into(), "1".into())]), // 4 bytes - }, - RedirectRule { - from: crate::canister::RulePattern::Exact("/c".into()), - to: "/d".into(), - status: 200, - headers: None, - }, - RedirectRule { - from: crate::canister::RulePattern::Exact("/e".into()), - to: "/f".into(), - status: 307, - headers: Some(vec![("X-B".into(), "22".into())]), // 5 bytes - }, - ]; - let op = BatchOperationKind::SetRedirectRules(SetRedirectRulesArguments { rules }); - assert_eq!(header_bytes_of(&op), 9); - } - - #[test] - fn create_commit_batches_empty_input_returns_empty() { - assert!(create_commit_batches(vec![]).is_empty()); - } - - #[test] - fn create_commit_batches_small_input_stays_single_group() { - // 100 small ops with tiny headers → fits both budgets in one group. - let ops: Vec = (0..100) - .map(|i| create_asset_with_headers(&format!("/f{i}"), 10)) - .collect(); - let groups = create_commit_batches(ops); - assert_eq!(groups.len(), 1); - assert_eq!(groups[0].len(), 100); } #[test] fn create_commit_batches_splits_at_500_ops() { - // 1200 headerless ops should split into 500/500/200 — three groups - // driven purely by the operation-count cap. let ops: Vec = (0..1200) .map(|i| set_content_op(&format!("/f{i}"))) .collect(); @@ -1094,41 +869,16 @@ mod tests { #[test] fn create_commit_batches_splits_at_header_budget() { - // 4 ops × 500 KB headers = 2 MB > 1.5 MB cap. Split happens before - // the 500-op cap could kick in. let ops: Vec = (0..4) .map(|i| create_asset_with_headers(&format!("/f{i}"), 500_000)) .collect(); let groups = create_commit_batches(ops); - // 3 ops × 500 KB = 1.5 MB fits exactly; the 4th would push over → split. assert_eq!(groups.len(), 2); assert_eq!(groups[0].len(), 3); assert_eq!(groups[1].len(), 1); } - #[test] - fn create_commit_batches_oversized_op_gets_own_group() { - // One op alone exceeds the header budget. The greedy loop must still - // emit it (in its own group) rather than skip it. - let ops = vec![ - create_asset_with_headers("/small", 10), - create_asset_with_headers("/huge", 2_000_000), - create_asset_with_headers("/small2", 10), - ]; - let groups = create_commit_batches(ops); - // First group flushes when /huge would overflow it → [/small], then - // /huge alone (oversized but emitted), then /small2 alone (since /huge - // already pushed header_bytes way over budget). - assert_eq!(groups.len(), 3); - assert_eq!(groups[0].len(), 1); - assert_eq!(groups[1].len(), 1); - assert_eq!(groups[2].len(), 1); - } - - // Mock that records every `commit_batch` call's `(batch_id, op_count)`. - // Used to verify that `commit_in_stages` issues the right shape of - // call sequence (placeholder batch_id for splits, real batch_id for - // cleanup). + // Records every commit_batch call's (batch_id, op_count). struct CommitRecorder { calls: RefCell>, } @@ -1166,8 +916,6 @@ mod tests { #[test] fn commit_in_stages_single_group_uses_real_batch_id() { - // Small enough to fit in one group → one commit_batch call carrying - // the real batch_id, no placeholder dance. let ops: Vec = (0..10).map(|i| set_content_op(&format!("/f{i}"))).collect(); let mock = CommitRecorder::new(); @@ -1179,9 +927,6 @@ mod tests { #[test] fn commit_in_stages_multi_group_uses_placeholder_then_real_cleanup() { - // 1200 ops → three 500/500/200 splits using placeholder batch_id 0, - // then a final empty-ops call on the real batch_id to release the - // canister-side batch entry. let ops: Vec = (0..1200) .map(|i| set_content_op(&format!("/f{i}"))) .collect(); @@ -1199,10 +944,21 @@ mod tests { ); } + // ── build_operations ──────────────────────────────────────────────────── + fn mk_project_asset( key: &str, media_type: &str, encodings: &[(&str, Vec, bool)], + ) -> (String, ProjectAsset) { + mk_project_asset_cfg(key, media_type, encodings, AssetConfig::default()) + } + + fn mk_project_asset_cfg( + key: &str, + media_type: &str, + encodings: &[(&str, Vec, bool)], + config: AssetConfig, ) -> (String, ProjectAsset) { let mime: mime::Mime = media_type.parse().expect("valid MIME"); let mut enc_map = HashMap::new(); @@ -1222,13 +978,12 @@ mod tests { }, ); } + let mut source = mk_source(key); + source.config = config; ( key.to_string(), ProjectAsset { - source: AssetSource { - path: PathBuf::from(key.trim_start_matches('/')), - key: key.to_string(), - }, + source, media_type: mime, encodings: enc_map, }, @@ -1274,6 +1029,26 @@ mod tests { .count() } + fn create_op(ops: &[BatchOperationKind]) -> &CreateAssetArguments { + ops.iter() + .find_map(|op| match op { + BatchOperationKind::CreateAsset(a) => Some(a), + _ => None, + }) + .expect("CreateAsset op") + } + + fn set_props_ops( + ops: &[BatchOperationKind], + ) -> std::collections::BTreeMap<&str, &SetAssetPropertiesArguments> { + ops.iter() + .filter_map(|op| match op { + BatchOperationKind::SetAssetProperties(a) => Some((a.key.as_str(), a)), + _ => None, + }) + .collect() + } + #[test] fn new_asset_emits_create_and_set() { let project = HashMap::from([mk_project_asset( @@ -1281,7 +1056,7 @@ mod tests { "text/html", &[("identity", vec![1, 2, 3], false)], )]); - let ops = build_operations(&project, &HashMap::new(), &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&project, &HashMap::new(), &HashMap::new()); assert_eq!(count_op(&ops, "CreateAsset"), 1); assert_eq!(count_op(&ops, "SetAssetContent"), 1); assert_eq!(ops.len(), 2); @@ -1300,7 +1075,7 @@ mod tests { "text/html", &[("identity", Some(sha))], )]); - assert!(build_operations(&project, &canister, &HashMap::new(), &[], &[], &[]).is_empty()); + assert!(build_operations(&project, &canister, &HashMap::new()).is_empty()); } #[test] @@ -1315,7 +1090,7 @@ mod tests { "text/html", &[("identity", Some(vec![1, 2, 3]))], )]); - let ops = build_operations(&project, &canister, &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&project, &canister, &HashMap::new()); assert_eq!(count_op(&ops, "SetAssetContent"), 1); assert_eq!(count_op(&ops, "CreateAsset"), 0); assert_eq!(count_op(&ops, "DeleteAsset"), 0); @@ -1329,7 +1104,7 @@ mod tests { "text/html", &[("identity", Some(vec![1, 2, 3]))], )]); - let ops = build_operations(&HashMap::new(), &canister, &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&HashMap::new(), &canister, &HashMap::new()); assert_eq!(count_op(&ops, "DeleteAsset"), 1); assert_eq!(ops.len(), 1); } @@ -1346,7 +1121,7 @@ mod tests { "application/octet-stream", &[("identity", Some(vec![1, 2, 3]))], )]); - let ops = build_operations(&project, &canister, &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&project, &canister, &HashMap::new()); assert_eq!(count_op(&ops, "DeleteAsset"), 1); assert_eq!(count_op(&ops, "CreateAsset"), 1); assert_eq!(count_op(&ops, "SetAssetContent"), 1); @@ -1356,7 +1131,6 @@ mod tests { #[test] fn stale_encoding_emits_unset() { let sha = vec![1u8, 2, 3]; - // Project has only identity (already in place); gzip is stale on canister. let project = HashMap::from([mk_project_asset( "/index.html", "text/html", @@ -1367,36 +1141,12 @@ mod tests { "text/html", &[("identity", Some(sha)), ("gzip", Some(vec![9, 8, 7]))], )]); - let ops = build_operations(&project, &canister, &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&project, &canister, &HashMap::new()); assert_eq!(count_op(&ops, "UnsetAssetContent"), 1); assert_eq!(count_op(&ops, "SetAssetContent"), 0); assert_eq!(ops.len(), 1); } - #[test] - fn new_encoding_emits_set_content() { - let identity_sha = vec![1u8, 2, 3]; - // Project gains a gzip encoding; identity is already in place. - let project = HashMap::from([mk_project_asset( - "/index.html", - "text/html", - &[ - ("identity", identity_sha.clone(), true), - ("gzip", vec![9, 8, 7], false), - ], - )]); - let canister = HashMap::from([mk_canister_asset( - "/index.html", - "text/html", - &[("identity", Some(identity_sha))], - )]); - let ops = build_operations(&project, &canister, &HashMap::new(), &[], &[], &[]); - assert_eq!(count_op(&ops, "SetAssetContent"), 1); - assert_eq!(count_op(&ops, "CreateAsset"), 0); - assert_eq!(count_op(&ops, "UnsetAssetContent"), 0); - assert_eq!(ops.len(), 1); - } - #[test] fn empty_project_deletes_all_canister_assets() { let canister = HashMap::from([ @@ -1407,311 +1157,88 @@ mod tests { &[("identity", Some(vec![2]))], ), ]); - let ops = build_operations(&HashMap::new(), &canister, &HashMap::new(), &[], &[], &[]); + let ops = build_operations(&HashMap::new(), &canister, &HashMap::new()); assert_eq!(count_op(&ops, "DeleteAsset"), 2); assert_eq!(ops.len(), 2); } - // ── redirect-rule diff ────────────────────────────────────────────────── - - fn mk_rule(from: crate::canister::RulePattern, to: &str, status: u16) -> RedirectRule { - RedirectRule { - from, - to: to.to_string(), - status, - headers: None, + // ── config-driven create / properties ─────────────────────────────────── + + fn config_with( + max_age: Option, + headers: Option<&[(&str, &str)]>, + enable_aliasing: Option, + allow_raw_access: Option, + ) -> AssetConfig { + AssetConfig { + cache: max_age.map(|m| CacheConfig { max_age: Some(m) }), + headers: headers.map(|hs| { + hs.iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect::>() + }), + enable_aliasing, + allow_raw_access, + ..AssetConfig::default() } } - fn set_rules_op(ops: &[BatchOperationKind]) -> Option<&[RedirectRule]> { - ops.iter().find_map(|op| match op { - BatchOperationKind::SetRedirectRules(args) => Some(args.rules.as_slice()), - _ => None, - }) - } - #[test] - fn rule_only_edit_emits_set_redirect_rules() { - // No asset changes, but the project has a new rule the canister - // doesn't — sync must emit the rules op. - let sha = vec![1u8, 2, 3]; + fn create_asset_args_use_defaults() { let project = HashMap::from([mk_project_asset( "/index.html", "text/html", - &[("identity", sha.clone(), true)], - )]); - let canister = HashMap::from([mk_canister_asset( - "/index.html", - "text/html", - &[("identity", Some(sha))], + &[("identity", vec![1, 2, 3], false)], )]); - let project_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )]; - let ops = build_operations( - &project, - &canister, - &HashMap::new(), - &project_rules, - &[], - &[], - ); - let rules = set_rules_op(&ops).expect("SetRedirectRules op missing"); - assert_eq!(rules, project_rules.as_slice()); - // No asset-side ops should have been emitted. - assert_eq!(count_op(&ops, "CreateAsset"), 0); - assert_eq!(count_op(&ops, "SetAssetContent"), 0); - assert_eq!(count_op(&ops, "DeleteAsset"), 0); - assert_eq!(ops.len(), 1); - } - - #[test] - fn redirects_file_removed_emits_empty_vec_op() { - // Canister has rules, project no longer does — sync emits an - // explicit empty-vec op so the canister clears its ruleset. - let canister_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &[], - &canister_rules, - &[], - ); - let rules = set_rules_op(&ops).expect("SetRedirectRules op missing"); - assert!(rules.is_empty(), "expected empty-vec replace-all op"); - } - - #[test] - fn unchanged_rules_emit_no_op() { - // Same rules on both sides — no SetRedirectRules op emitted. - let rules = vec![mk_rule( - crate::canister::RulePattern::Subtree("/blog/".into()), - "/blog/index.html", - 200, - )]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &rules, - &rules, - &[], - ); - assert!( - set_rules_op(&ops).is_none(), - "no SetRedirectRules op expected when rules match" - ); - assert!(ops.is_empty()); - } - - #[test] - fn reordered_rules_emit_op() { - // Order matters semantically — first matching rule wins at request - // time. A swap is a real change even with identical entries. - let a = mk_rule(crate::canister::RulePattern::Exact("/a".into()), "/x", 301); - let b = mk_rule(crate::canister::RulePattern::Exact("/b".into()), "/y", 301); - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &[a.clone(), b.clone()], - &[b, a], - &[], - ); - assert!( - set_rules_op(&ops).is_some(), - "rule reorder must emit a replace-all op" - ); - } - - #[test] - fn sync_short_circuits_when_redirects_file_only_matches_canister() { - // Drive sync() end-to-end with a _redirects file that matches what - // the canister already has. The "nothing to commit" short-circuit - // must trigger, with no create_batch / commit_batch calls. - let dir = tempfile::tempdir().unwrap(); - std::fs::write(dir.path().join("_redirects"), b"/old /new 301\n").unwrap(); - - let mock = SyncMock::new(); - mock.push_ok("api_version", 2u16); - mock.push_ok("list", Vec::::new()); - mock.push_ok( - "get_redirect_rules", - vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )], - ); - - let result = sync( - &mock, - &[dir.path().to_str().unwrap().to_string()], - &Principal::anonymous().to_text(), - None, - ); - // No create_batch / commit_batch programmed — if sync reached them - // SyncMock would panic with "no programmed response". - assert!(result.is_ok(), "expected success, got: {result:?}"); - } - - // Mirrors the private `CreateBatchResponse` in canister.rs — same field - // name gives the same Candid encoding, so the test mock can decode it. - #[derive(CandidType)] - struct CreateBatchOk { - batch_id: Nat, - } - - #[test] - fn sync_emits_rules_op_when_redirects_file_only_changed() { - // _redirects has a rule; canister has none. The asset list is also - // empty so no asset ops are produced — the only operation must be - // SetRedirectRules, exercising the early "nothing to commit" check. - let dir = tempfile::tempdir().unwrap(); - std::fs::write(dir.path().join("_redirects"), b"/old /new 301\n").unwrap(); - - let mock = SyncMock::new(); - mock.push_ok("api_version", 2u16); - mock.push_ok("list", Vec::::new()); - mock.push_ok("get_redirect_rules", Vec::::new()); - mock.push_ok( - "create_batch", - CreateBatchOk { - batch_id: Nat::from(1u32), - }, - ); - mock.push_ok("commit_batch", ()); - - let result = sync( - &mock, - &[dir.path().to_str().unwrap().to_string()], - &Principal::anonymous().to_text(), - None, - ); - assert!(result.is_ok(), "expected success, got: {result:?}"); + let ops = build_operations(&project, &HashMap::new(), &HashMap::new()); + let create = create_op(&ops); + assert_eq!(create.max_age, None); + assert!(create.headers.is_none()); + assert_eq!(create.enable_aliasing, None); + assert_eq!(create.allow_raw_access, Some(true)); } - // prepare_asset itself skips gzip when the compressed output is not smaller - // than the identity bytes. All 256 distinct byte values are maximally - // incompressible: gzip's ~18-byte header alone exceeds the savings. #[test] - fn prepare_asset_skips_gzip_when_not_smaller() { - use std::io::Write; - let mut f = tempfile::Builder::new().suffix(".txt").tempfile().unwrap(); - f.write_all(&(0u8..=255u8).collect::>()).unwrap(); - let source = AssetSource { - path: f.path().to_path_buf(), - key: "/test.txt".to_string(), - }; - let asset = prepare_asset(source, &[], &HashMap::new()).unwrap(); - assert!( - asset.encodings.contains_key("identity"), - "identity must be present" - ); - assert!( - !asset.encodings.contains_key("gzip"), - "gzip must be absent when not smaller" + fn create_asset_args_carry_config() { + let config = config_with( + Some(99), + Some(&[("X-Frame-Options", "DENY")]), + Some(true), + Some(false), ); - } - - // `_headers` Content-Type override drives both the stored media type and - // the encoder selection. Without the override, a `.did` file is - // `application/octet-stream` (mime_guess has no entry) and gets only the - // identity encoding; with the override to `text/plain`, encoders_for - // selects gzip too. - #[test] - fn header_content_type_override_applies_to_prepare_asset() { - use std::io::Write; - - // Highly compressible content so gzip is genuinely smaller and gets - // kept by prepare_asset's "skip if not smaller" check. - let mut f = tempfile::Builder::new().suffix(".did").tempfile().unwrap(); - f.write_all( - b"service : { greet : (text) -> (text); }\n" - .repeat(100) - .as_ref(), - ) - .unwrap(); - let mk_source = || AssetSource { - path: f.path().to_path_buf(), - key: "/ic.did".to_string(), - }; - - // No override: mime_guess returns octet-stream, gzip is not selected. - let without = prepare_asset(mk_source(), &[], &HashMap::new()).unwrap(); - assert_eq!(without.media_type.to_string(), "application/octet-stream"); - assert!(!without.encodings.contains_key("gzip")); - - // With override to text/plain via `_headers`, both the media type - // and the encoder pick change. - let rules = - crate::headers::parse("/*.did\n Content-Type: text/plain; charset=utf-8\n").unwrap(); - let with = prepare_asset(mk_source(), &rules, &HashMap::new()).unwrap(); - assert_eq!(with.media_type.to_string(), "text/plain; charset=utf-8"); - assert!( - with.encodings.contains_key("gzip"), - "gzip should be selected for text/* override" - ); - } - - // When gzip output is not smaller than identity, prepare_asset skips it, so - // build_operations sees only the identity encoding and emits no gzip op. - #[test] - fn gzip_absent_from_project_emits_no_gzip_op() { - let project = HashMap::from([mk_project_asset( - "/tiny.txt", - "text/plain", + let project = HashMap::from([mk_project_asset_cfg( + "/index.html", + "text/html", &[("identity", vec![1, 2, 3], false)], + config, )]); - let ops = build_operations(&project, &HashMap::new(), &HashMap::new(), &[], &[], &[]); - assert_eq!(count_op(&ops, "CreateAsset"), 1); - assert_eq!(count_op(&ops, "SetAssetContent"), 1); - assert!(!ops.iter().any(|op| matches!( - op, - BatchOperationKind::SetAssetContent(a) if a.content_encoding == "gzip" - ))); + let ops = build_operations(&project, &HashMap::new(), &HashMap::new()); + let create = create_op(&ops); + assert_eq!(create.max_age, Some(99)); + assert_eq!( + create.headers, + Some(vec![("X-Frame-Options".into(), "DENY".into())]) + ); + assert_eq!(create.enable_aliasing, Some(true)); + assert_eq!(create.allow_raw_access, Some(false)); } #[test] - fn create_asset_args_use_defaults() { - let project = HashMap::from([mk_project_asset( + fn create_asset_args_inject_security_policy_headers() { + let config = AssetConfig { + security_policy: Some(crate::security_policy::SecurityPolicy::Standard), + ..AssetConfig::default() + }; + let project = HashMap::from([mk_project_asset_cfg( "/index.html", "text/html", &[("identity", vec![1, 2, 3], false)], + config, )]); - let ops = build_operations(&project, &HashMap::new(), &HashMap::new(), &[], &[], &[]); - let create_op = ops - .iter() - .find_map(|op| { - if let BatchOperationKind::CreateAsset(a) = op { - Some(a) - } else { - None - } - }) - .expect("CreateAsset op"); - - assert_eq!(create_op.max_age, None); - assert!(create_op.headers.is_none()); - assert_eq!(create_op.allow_raw_access, Some(true)); - } - - fn set_props_ops( - ops: &[BatchOperationKind], - ) -> std::collections::BTreeMap<&str, &SetAssetPropertiesArguments> { - ops.iter() - .filter_map(|op| match op { - BatchOperationKind::SetAssetProperties(a) => Some((a.key.as_str(), a)), - _ => None, - }) - .collect() + let ops = build_operations(&project, &HashMap::new(), &HashMap::new()); + let create = create_op(&ops); + let headers = create.headers.as_ref().expect("policy headers"); + assert!(headers.iter().any(|(k, _)| k == "Content-Security-Policy")); } #[test] @@ -1732,21 +1259,26 @@ mod tests { max_age: None, headers: None, allow_raw_access: Some(true), + is_aliased: None, }, )]); - let ops = build_operations(&project, &canister, &canister_props, &[], &[], &[]); - assert!( - set_props_ops(&ops).is_empty(), - "no SetAssetProperties op when canister already matches defaults" - ); + let ops = build_operations(&project, &canister, &canister_props); + assert!(set_props_ops(&ops).is_empty()); } #[test] - fn update_properties_clears_max_age_when_canister_has_it() { - let project = HashMap::from([mk_project_asset( + fn update_properties_sets_aliasing_and_headers() { + let config = config_with( + None, + Some(&[("X-Frame-Options", "DENY")]), + Some(true), + Some(true), + ); + let project = HashMap::from([mk_project_asset_cfg( "/index.html", "text/html", &[("identity", vec![1, 2, 3], true)], + config, )]); let canister = HashMap::from([mk_canister_asset( "/index.html", @@ -1756,21 +1288,28 @@ mod tests { let canister_props = HashMap::from([( "/index.html".to_string(), AssetProperties { - max_age: Some(60), + max_age: None, headers: None, allow_raw_access: Some(true), + is_aliased: Some(false), }, )]); - let ops = build_operations(&project, &canister, &canister_props, &[], &[], &[]); + let ops = build_operations(&project, &canister, &canister_props); let by_key = set_props_ops(&ops); assert_eq!(by_key.len(), 1); - // canister has Some(60), defaults are None — the op must explicitly - // request clearing (the inner None means "set to null on the canister"). - assert_eq!(by_key["/index.html"].max_age, Some(None)); + let op = by_key["/index.html"]; + assert_eq!( + op.headers, + Some(Some(vec![("X-Frame-Options".into(), "DENY".into())])) + ); + assert_eq!(op.is_aliased, Some(Some(true))); + // allow_raw_access matches (both Some(true)) → not set; max_age matches → not set. + assert_eq!(op.allow_raw_access, None); + assert_eq!(op.max_age, None); } #[test] - fn update_properties_clears_canister_headers() { + fn update_properties_clears_canister_headers_and_max_age() { let project = HashMap::from([mk_project_asset( "/index.html", "text/html", @@ -1781,142 +1320,66 @@ mod tests { "text/html", &[("identity", Some(vec![1, 2, 3]))], )]); - let canister_headers = vec![("X-Frame-Options".to_string(), "DENY".to_string())]; let canister_props = HashMap::from([( "/index.html".to_string(), AssetProperties { - max_age: None, - headers: Some(canister_headers), + max_age: Some(60), + headers: Some(vec![("X-Frame-Options".into(), "DENY".into())]), allow_raw_access: Some(true), + is_aliased: None, }, )]); - let ops = build_operations(&project, &canister, &canister_props, &[], &[], &[]); + let ops = build_operations(&project, &canister, &canister_props); let by_key = set_props_ops(&ops); assert_eq!(by_key.len(), 1); - // The inner None clears the headers map on the canister. + assert_eq!(by_key["/index.html"].max_age, Some(None)); assert_eq!(by_key["/index.html"].headers, Some(None)); } #[test] - fn update_properties_skips_assets_being_recreated_due_to_content_type_drift() { - // Asset on canister has a different content_type → step 1 deletes it - // and step 2 recreates it with default properties. update_properties - // must not emit a redundant SetAssetProperties op for that key, even - // if canister_asset_properties still contains pre-deletion data. + fn update_properties_ignores_canister_injected_env_cookie() { + // The legacy canister injects `Set-Cookie: ic_env=...` into HTML assets' + // headers. A project with no header config must not see this as drift, + // otherwise every sync would emit a (futile) clear op. let project = HashMap::from([mk_project_asset( - "/file", + "/index.html", "text/html", - &[("identity", vec![1, 2, 3], false)], + &[("identity", vec![1, 2, 3], true)], )]); let canister = HashMap::from([mk_canister_asset( - "/file", - "application/octet-stream", + "/index.html", + "text/html", &[("identity", Some(vec![1, 2, 3]))], )]); let canister_props = HashMap::from([( - "/file".to_string(), + "/index.html".to_string(), AssetProperties { - max_age: Some(60), - headers: None, + max_age: None, + headers: Some(vec![( + "Set-Cookie".into(), + "ic_env=deadbeef; SameSite=Lax".into(), + )]), allow_raw_access: Some(true), + is_aliased: None, }, )]); - let ops = build_operations(&project, &canister, &canister_props, &[], &[], &[]); - assert_eq!(count_op(&ops, "DeleteAsset"), 1); - assert_eq!(count_op(&ops, "CreateAsset"), 1); + let ops = build_operations(&project, &canister, &canister_props); assert!( set_props_ops(&ops).is_empty(), - "no SetAssetProperties op when the asset is being recreated in the same batch" + "the canister-injected ic_env cookie must not count as header drift" ); } #[test] - fn update_properties_skips_assets_not_on_canister() { - // Asset is new to the canister — properties get set via CreateAsset, - // not SetAssetProperties. - let project = HashMap::from([mk_project_asset( - "/new.html", - "text/html", - &[("identity", vec![1, 2, 3], false)], - )]); - let ops = build_operations(&project, &HashMap::new(), &HashMap::new(), &[], &[], &[]); - assert!(set_props_ops(&ops).is_empty()); - } - - // ── _headers integration ─────────────────────────────────────────────── - - fn mk_header_rule(pattern_src: &str, headers: &[(&str, &str)]) -> HeaderRule { - HeaderRule { - pattern: crate::glob::parse(pattern_src).unwrap(), - headers: headers - .iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), - content_type: None, - } - } - - #[test] - fn create_asset_args_carry_resolved_headers() { - let project = HashMap::from([mk_project_asset( - "/index.html", - "text/html", - &[("identity", vec![1, 2, 3], false)], - )]); - let header_rules = vec![mk_header_rule("/*", &[("X-Frame-Options", "DENY")])]; - let ops = build_operations( - &project, - &HashMap::new(), - &HashMap::new(), - &[], - &[], - &header_rules, - ); - let create_op = ops - .iter() - .find_map(|op| match op { - BatchOperationKind::CreateAsset(a) => Some(a), - _ => None, - }) - .expect("CreateAsset op"); - assert_eq!( - create_op.headers, - Some(vec![("X-Frame-Options".into(), "DENY".into())]) - ); - } - - #[test] - fn create_asset_args_omit_headers_when_no_rules_match() { - let project = HashMap::from([mk_project_asset( - "/public.html", - "text/html", - &[("identity", vec![1, 2, 3], false)], - )]); - let header_rules = vec![mk_header_rule("/private", &[("X-Frame-Options", "DENY")])]; - let ops = build_operations( - &project, - &HashMap::new(), - &HashMap::new(), - &[], - &[], - &header_rules, - ); - let create_op = ops - .iter() - .find_map(|op| match op { - BatchOperationKind::CreateAsset(a) => Some(a), - _ => None, - }) - .expect("CreateAsset op"); - assert!(create_op.headers.is_none()); - } - - #[test] - fn update_properties_sets_headers_when_canister_missing_them() { - let project = HashMap::from([mk_project_asset( + fn update_properties_keeps_config_headers_alongside_env_cookie() { + // Project defines X-Custom; canister stores X-Custom plus the injected + // ic_env cookie. After stripping the cookie the two match → no op. + let config = config_with(None, Some(&[("X-Custom", "yes")]), None, Some(true)); + let project = HashMap::from([mk_project_asset_cfg( "/index.html", "text/html", &[("identity", vec![1, 2, 3], true)], + config, )]); let canister = HashMap::from([mk_canister_asset( "/index.html", @@ -1927,200 +1390,78 @@ mod tests { "/index.html".to_string(), AssetProperties { max_age: None, - headers: None, + headers: Some(vec![ + ("Set-Cookie".into(), "ic_env=abc; SameSite=Lax".into()), + ("X-Custom".into(), "yes".into()), + ]), allow_raw_access: Some(true), + is_aliased: None, }, )]); - let header_rules = vec![mk_header_rule("/*", &[("X-Frame-Options", "DENY")])]; - let ops = build_operations( - &project, - &canister, - &canister_props, - &[], - &[], - &header_rules, - ); - let by_key = set_props_ops(&ops); - assert_eq!(by_key.len(), 1); - assert_eq!( - by_key["/index.html"].headers, - Some(Some(vec![("X-Frame-Options".into(), "DENY".into())])) - ); + let ops = build_operations(&project, &canister, &canister_props); + assert!(set_props_ops(&ops).is_empty()); } #[test] - fn update_properties_clears_headers_when_no_rules_match() { + fn update_properties_skips_assets_being_recreated_due_to_content_type_drift() { let project = HashMap::from([mk_project_asset( - "/index.html", + "/file", "text/html", - &[("identity", vec![1, 2, 3], true)], + &[("identity", vec![1, 2, 3], false)], )]); let canister = HashMap::from([mk_canister_asset( - "/index.html", - "text/html", + "/file", + "application/octet-stream", &[("identity", Some(vec![1, 2, 3]))], )]); let canister_props = HashMap::from([( - "/index.html".to_string(), + "/file".to_string(), AssetProperties { - max_age: None, - headers: Some(vec![("X-Frame-Options".into(), "DENY".into())]), + max_age: Some(60), + headers: None, allow_raw_access: Some(true), + is_aliased: None, }, )]); - // No header rules — canister-stored headers should be cleared. - let ops = build_operations(&project, &canister, &canister_props, &[], &[], &[]); - let by_key = set_props_ops(&ops); - assert_eq!(by_key.len(), 1); - assert_eq!(by_key["/index.html"].headers, Some(None)); - } - - #[test] - fn three_xx_redirect_rule_carries_resolved_headers() { - // 3xx rules synthesize their response; populate `headers` from any - // `_headers` rule whose pattern matches the redirect's `from`. - let header_rules = vec![mk_header_rule("/*", &[("X-Robots-Tag", "noindex")])]; - let project_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &project_rules, - &[], - &header_rules, - ); - let rules = set_rules_op(&ops).expect("SetRedirectRules op missing"); - assert_eq!(rules.len(), 1); - assert_eq!( - rules[0].headers, - Some(vec![("X-Robots-Tag".into(), "noindex".into())]) - ); - } - - #[test] - fn non_3xx_redirect_rule_does_not_carry_resolved_headers() { - // 200 / 4xx rules inherit headers from their target asset, so the - // plugin must leave `RedirectRule.headers` as `None` even when a - // matching `_headers` rule exists. - let header_rules = vec![mk_header_rule("/*", &[("X-Robots-Tag", "noindex")])]; - for status in [200u16, 404, 410] { - let project_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/target.html", - status, - )]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &project_rules, - &[], - &header_rules, - ); - let rules = set_rules_op(&ops).expect("SetRedirectRules op missing"); - assert_eq!(rules.len(), 1); - assert!( - rules[0].headers.is_none(), - "status {status}: expected no headers on non-3xx rule" - ); - } + let ops = build_operations(&project, &canister, &canister_props); + assert_eq!(count_op(&ops, "DeleteAsset"), 1); + assert_eq!(count_op(&ops, "CreateAsset"), 1); + assert!(set_props_ops(&ops).is_empty()); } - #[test] - fn three_xx_redirect_rule_omits_headers_when_no_match() { - let header_rules = vec![mk_header_rule("/other", &[("X-Foo", "bar")])]; - let project_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &project_rules, - &[], - &header_rules, - ); - let rules = set_rules_op(&ops).expect("SetRedirectRules op missing"); - assert!(rules[0].headers.is_none()); - } + // ── prepare_asset ──────────────────────────────────────────────────────── #[test] - fn redirect_rules_match_when_headers_populated_matches_canister() { - // Canister stores the same rule (with the resolved 3xx headers) — no - // SetRedirectRules op should be emitted. - let header_rules = vec![mk_header_rule("/*", &[("X-Robots-Tag", "noindex")])]; - let project_rules = vec![mk_rule( - crate::canister::RulePattern::Exact("/old".into()), - "/new", - 301, - )]; - let canister_rules = vec![RedirectRule { - from: crate::canister::RulePattern::Exact("/old".into()), - to: "/new".to_string(), - status: 301, - headers: Some(vec![("X-Robots-Tag".into(), "noindex".into())]), - }]; - let ops = build_operations( - &HashMap::new(), - &HashMap::new(), - &HashMap::new(), - &project_rules, - &canister_rules, - &header_rules, - ); - assert!( - set_rules_op(&ops).is_none(), - "no SetRedirectRules op when rules with headers match canister-stored" - ); + fn prepare_asset_skips_gzip_when_not_smaller() { + use std::io::Write; + let mut f = tempfile::Builder::new().suffix(".txt").tempfile().unwrap(); + f.write_all(&(0u8..=255u8).collect::>()).unwrap(); + let mut source = mk_source("/test.txt"); + source.path = f.path().to_path_buf(); + let asset = prepare_asset(source, &HashMap::new()).unwrap(); + assert!(asset.encodings.contains_key("identity")); + assert!(!asset.encodings.contains_key("gzip")); } #[test] - fn update_properties_no_op_when_canister_headers_match_resolved() { - let project = HashMap::from([mk_project_asset( - "/index.html", - "text/html", - &[("identity", vec![1, 2, 3], true)], - )]); - let canister = HashMap::from([mk_canister_asset( - "/index.html", - "text/html", - &[("identity", Some(vec![1, 2, 3]))], - )]); - let canister_props = HashMap::from([( - "/index.html".to_string(), - AssetProperties { - max_age: None, - headers: Some(vec![("X-Frame-Options".into(), "DENY".into())]), - allow_raw_access: Some(true), - }, - )]); - let header_rules = vec![mk_header_rule("/*", &[("X-Frame-Options", "DENY")])]; - let ops = build_operations( - &project, - &canister, - &canister_props, - &[], - &[], - &header_rules, - ); - assert!( - set_props_ops(&ops).is_empty(), - "no SetAssetProperties op when resolved headers byte-match canister-stored" - ); + fn prepare_asset_honors_config_encodings() { + use std::io::Write; + // A .wasm file defaults to identity-only, but config forces gzip too. + let mut f = tempfile::Builder::new().suffix(".wasm").tempfile().unwrap(); + f.write_all(b"hello hello hello hello hello hello".repeat(10).as_ref()) + .unwrap(); + let mut source = mk_source("/mod.wasm"); + source.path = f.path().to_path_buf(); + source.config.encodings = Some(vec![Encoder::Identity, Encoder::Gzip]); + let asset = prepare_asset(source, &HashMap::new()).unwrap(); + assert!(asset.encodings.contains_key("identity")); + assert!(asset.encodings.contains_key("gzip")); } - // ---- Authorization tests ---- + // ── authorization ──────────────────────────────────────────────────────── - // Mock for ensure_commit_permission: handles list_permitted and grant_permission only. struct PermissionMock { permitted: Vec, - // Tracks the `direct` flag for each grant_permission call. grant_calls: RefCell>, } @@ -2155,7 +1496,6 @@ mod tests { } } - // General-purpose scripted mock: pre-programs per-method response queues. type MockQueue = RefCell, String>>>>; struct SyncMock { @@ -2206,17 +1546,19 @@ mod tests { } } - // Proxy mode: identity absent from Commit list → grant_permission called via proxy. + #[derive(CandidType)] + struct CreateBatchOk { + batch_id: Nat, + } + #[test] fn ensure_commit_permission_grants_via_proxy_when_absent() { let identity = Principal::anonymous(); let mock = PermissionMock::new(vec![]); ensure_commit_permission(&mock, &identity.to_text()).unwrap(); - // grant_permission must be called exactly once with direct=false (routed via proxy). assert_eq!(*mock.grant_calls.borrow(), vec![false]); } - // Proxy mode: identity already in Commit list → grant_permission not called. #[test] fn ensure_commit_permission_skips_grant_when_already_permitted() { let identity = Principal::anonymous(); @@ -2229,36 +1571,18 @@ mod tests { fn sync_rejects_zero_input_dirs() { let mock = SyncMock::new(); let err = sync(&mock, &[], &Principal::anonymous().to_text(), None).unwrap_err(); - assert!( - err.contains("expected exactly one input directory"), - "got: {err}" - ); + assert!(err.contains("at least one input directory"), "got: {err}"); } #[test] - fn sync_rejects_multiple_input_dirs() { - let mock = SyncMock::new(); - let err = sync( - &mock, - &["dist-a".to_string(), "dist-b".to_string()], - &Principal::anonymous().to_text(), - None, - ) - .unwrap_err(); - assert!(err.contains("got 2"), "got: {err}"); - } - - #[test] - fn sync_short_circuits_when_headers_file_only_matches_canister() { - // The canister already stores the headers a `_headers`-only project - // would resolve. The "nothing to commit" short-circuit must trigger. - // The asset is `.txt`, not `.html`, so the auto-synthesised - // html-handling rules don't get in the way of the comparison. + fn sync_short_circuits_when_config_only_matches_canister() { + // The canister already stores the headers a `.ic-assets.json5`-only + // project would resolve. The "nothing to commit" short-circuit triggers. let dir = tempfile::tempdir().unwrap(); std::fs::write(dir.path().join("notes.txt"), b"hello").unwrap(); std::fs::write( - dir.path().join("_headers"), - b"/*\n X-Frame-Options: DENY\n", + dir.path().join(".ic-assets.json5"), + br#"[{ "match": "*", "headers": { "X-Frame-Options": "DENY" } }]"#, ) .unwrap(); @@ -2279,13 +1603,13 @@ mod tests { }], ); mock.push_ok("list", Vec::::new()); - mock.push_ok("get_redirect_rules", Vec::::new()); mock.push_ok( "get_asset_properties", AssetProperties { max_age: None, headers: Some(vec![("X-Frame-Options".into(), "DENY".into())]), allow_raw_access: Some(true), + is_aliased: None, }, ); @@ -2299,22 +1623,14 @@ mod tests { assert!(result.is_ok(), "expected success, got: {result:?}"); } - // ── html-handling auto-synthesis ──────────────────────────────────────── - #[test] - fn sync_synthesises_html_handling_rules_when_html_present() { - // No `_redirects` file; an `index.html` asset alone should produce - // exactly the three rules from `html_handling::synthesize` (the root - // index variant: /, /index, /index.html). The canister has empty - // rules and no asset, so the batch contains both the asset upload - // and a SetRedirectRules op. + fn sync_uploads_new_asset() { let dir = tempfile::tempdir().unwrap(); std::fs::write(dir.path().join("index.html"), b"").unwrap(); let mock = SyncMock::new(); mock.push_ok("api_version", 2u16); mock.push_ok("list", Vec::::new()); - mock.push_ok("get_redirect_rules", Vec::::new()); mock.push_ok( "create_batch", CreateBatchOk { @@ -2338,114 +1654,6 @@ mod tests { assert!(result.is_ok(), "expected success, got: {result:?}"); } - #[test] - fn synthesised_rules_win_over_user_rule_at_same_from() { - // Synthesised rules are emitted **before** the user's `_redirects` — - // synth must come first so html-handling Exact rules don't get - // shadowed by a broader user subtree (e.g. `/*` catch-all), which - // would otherwise make the gateway verifier reject responses on - // those paths (wildcard expr_path vs. exact entry in the tree). - // - // The cost: if the user happens to declare a rule at the exact same - // `from` as something synthesis produces, the synthesised rule wins. - // To override an HTML asset's default html_handling, remove the - // source `.html` and use a non-HTML asset key instead. - let user_rules = redirects::parse("/index /elsewhere 301\n").unwrap(); - let synthesised = crate::html_handling::synthesize(&["/index.html".to_string()]); - - let mut combined = synthesised; - combined.extend(user_rules); - - // First rule matching `/index` is the synthesised 307 -> /. - let first_at_index = combined - .iter() - .find(|r| matches!(&r.from, crate::canister::RulePattern::Exact(p) if p == "/index")) - .expect("a rule at /index"); - assert_eq!(first_at_index.status, 307); - assert_eq!(first_at_index.to, "/"); - } - - #[test] - fn user_subtree_falls_through_to_paths_synth_doesnt_cover() { - // The motivating fix: a user `/*` 404 catch-all must NOT shadow the - // synthesised Exact rules — otherwise the cert tree carries Exact - // entries that the response (served on the `<*>` subtree witness) - // doesn't use, and the gateway verifier returns 503. - // - // With synth first, the catch-all only fires for paths nothing else - // claims. We verify the rule order: synthesised /index Exact comes - // before the user's /* Subtree. - let user_rules = redirects::parse("/* /404.html 404\n").unwrap(); - let synthesised = crate::html_handling::synthesize(&["/index.html".to_string()]); - - let mut combined = synthesised; - combined.extend(user_rules); - - let index_pos = combined - .iter() - .position( - |r| matches!(&r.from, crate::canister::RulePattern::Exact(p) if p == "/index"), - ) - .expect("a rule at /index"); - let catchall_pos = combined - .iter() - .position(|r| matches!(&r.from, crate::canister::RulePattern::Subtree(p) if p == "/")) - .expect("the /* catch-all"); - assert!( - index_pos < catchall_pos, - "synth Exact must precede user Subtree /*; got index@{index_pos}, /*@{catchall_pos}" - ); - } - - #[test] - fn sync_short_circuits_when_synthesised_rules_match_canister() { - // The canister already stores the rules synthesis would produce. - // No SetRedirectRules op should be emitted, and with the asset - // already up to date the sync should short-circuit before - // create_batch. - let dir = tempfile::tempdir().unwrap(); - std::fs::write(dir.path().join("index.html"), b"").unwrap(); - - use sha2::Digest; - let identity_sha = sha2::Sha256::digest(b"").to_vec(); - - let canister_rules = crate::html_handling::synthesize(&["/index.html".to_string()]); - - let mock = SyncMock::new(); - mock.push_ok("api_version", 2u16); - mock.push_ok( - "list", - vec![AssetDetails { - key: "/index.html".to_string(), - content_type: "text/html".to_string(), - encodings: vec![AssetEncodingDetails { - content_encoding: "identity".to_string(), - sha256: Some(identity_sha), - }], - }], - ); - mock.push_ok("list", Vec::::new()); - mock.push_ok("get_redirect_rules", canister_rules); - mock.push_ok( - "get_asset_properties", - AssetProperties { - max_age: None, - headers: None, - allow_raw_access: Some(true), - }, - ); - - let result = sync( - &mock, - &[dir.path().to_str().unwrap().to_string()], - &Principal::anonymous().to_text(), - None, - ); - // No create_batch / commit_batch programmed — would panic if reached. - assert!(result.is_ok(), "expected success, got: {result:?}"); - } - - // Direct mode: canister rejects create_batch with a permission error → sync propagates it. #[test] fn sync_propagates_permission_error_from_create_batch() { let dir = tempfile::tempdir().unwrap(); @@ -2453,9 +1661,7 @@ mod tests { let mock = SyncMock::new(); mock.push_ok("api_version", 2u16); - // Empty canister → build_operations will produce work → create_batch is called. mock.push_ok("list", Vec::::new()); - mock.push_ok("get_redirect_rules", Vec::::new()); mock.push_err("create_batch", "Caller does not have Commit permission"); let result = sync( @@ -2464,11 +1670,7 @@ mod tests { &Principal::anonymous().to_text(), None, ); - let err = result.unwrap_err(); - assert!( - err.contains("Commit permission"), - "expected permission error, got: {err}" - ); + assert!(err.contains("Commit permission"), "got: {err}"); } } diff --git a/crates/sync-core/tests/bench_sync.rs b/crates/sync-core/tests/bench_sync.rs index 98d0cd2..86c4094 100644 --- a/crates/sync-core/tests/bench_sync.rs +++ b/crates/sync-core/tests/bench_sync.rs @@ -20,7 +20,7 @@ use serde::Deserialize; use std::cell::{Cell, RefCell}; use std::collections::BTreeMap; use std::path::Path; -use sync_core::canister::{AssetDetails, AssetProperties, CallType, CanisterCall, RedirectRule}; +use sync_core::canister::{AssetDetails, AssetProperties, CallType, CanisterCall}; use sync_core::sync::sync; // Wire-compatible mirrors of the response types defined privately in @@ -95,11 +95,11 @@ impl CanisterCall for BenchMock { let resp = match method { "api_version" => Encode!(&2u16), "list" => Encode!(&Vec::::new()), - "get_redirect_rules" => Encode!(&Vec::::new()), "get_asset_properties" => Encode!(&AssetProperties { max_age: None, headers: None, allow_raw_access: Some(true), + is_aliased: None, }), "create_batch" => Encode!(&CreateBatchOk { batch_id: Nat::from(1u32), diff --git a/crates/sync-plugin/src/lib.rs b/crates/sync-plugin/src/lib.rs index 23201c9..e40d4e3 100644 --- a/crates/sync-plugin/src/lib.rs +++ b/crates/sync-plugin/src/lib.rs @@ -49,7 +49,7 @@ struct Plugin; impl Guest for Plugin { fn exec(input: SyncExecInput) -> Result<(), String> { println!( - "sync plugin: starting for canister {} (environment: {})", + "sync plugin (legacy assetstorage / .ic-assets.json5): starting for canister {} (environment: {})", input.canister_id, input.environment ); let summary = sync_core::sync::sync(