diff --git a/docs/netsuke-design.md b/docs/netsuke-design.md index bb21c4d8..ae0ec4d8 100644 --- a/docs/netsuke-design.md +++ b/docs/netsuke-design.md @@ -400,6 +400,21 @@ The cleaner model is: - `always`: When set to `true`, the target runs on every invocation regardless of timestamps or dependencies. The default value is `false`. +### 2.7 Table: Netsuke Manifest vs. Makefile + +To illustrate the ergonomic advantages of the Netsuke schema, the following +table compares a simple C compilation project defined in both a traditional +`Makefile` and a `Netsukefile` file. The comparison highlights Netsuke's +explicit, structured, and self-documenting nature. + +| Feature | Makefile Example | Netsukefile Example | +| --------------- | ---------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| Variables | CC=gcc | { vars: { cc: gcc } } | +| Macros | define greet\\t@echo Hello $$1endef | { macros: { signature: "greet(name)", body: "Hello {{ name }}" } } | +| Rule Definition | %.o: %.c\\n\\t$(CC) -c $< -o $@ | { rules: { name: compile, command: "{{ cc }} -c {{ ins }} -o {{ outs }}", description: "Compiling {{ outs }}" } } | +| Target Build | my_program: main.o utils.o\\t$(CC) $^ -o $@ | { targets: { name: my_program, rule: link, sources: [main.o, utils.o] } | +| Readability | Relies on cryptic automatic variables ($@, $\<, $^) and implicit pattern matching. | Uses explicit, descriptive keys (name, rule, sources) and standard YAML list/map syntax. | + ### 2.5 Generated Targets and Actions with `foreach` Large sets of similar outputs or setup actions can clutter a manifest when @@ -600,21 +615,6 @@ output: manifest keys for recipe selection. Warnings should be reserved for degraded behaviour that Netsuke can classify itself. -### 2.7 Table: Netsuke Manifest vs. Makefile - -To illustrate the ergonomic advantages of the Netsuke schema, the following -table compares a simple C compilation project defined in both a traditional -`Makefile` and a `Netsukefile` file. The comparison highlights Netsuke's -explicit, structured, and self-documenting nature. - -| Feature | Makefile Example | Netsukefile Example | -| --------------- | ---------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | -| Variables | CC=gcc | { vars: { cc: gcc } } | -| Macros | define greet\\t@echo Hello $$1endef | { macros: { signature: "greet(name)", body: "Hello {{ name }}" } } | -| Rule Definition | %.o: %.c\\n\\t$(CC) -c $< -o $@ | { rules: { name: compile, command: "{{ cc }} -c {{ ins }} -o {{ outs }}", description: "Compiling {{ outs }}" } } | -| Target Build | my_program: main.o utils.o\\t$(CC) $^ -o $@ | { targets: { name: my_program, rule: link, sources: [main.o, utils.o] } | -| Readability | Relies on cryptic automatic variables ($@, $\<, $^) and implicit pattern matching. | Uses explicit, descriptive keys (name, rule, sources) and standard YAML list/map syntax. | - ## Section 3: Parsing and Deserialization Strategy Once the Jinja evaluation stage has produced a pure YAML string, the next @@ -624,7 +624,7 @@ data structures are crucial for the robustness and maintainability of Netsuke. ### 3.1 Crate Selection: `serde_saphyr` -Netsuke now relies on `serde_saphyr` for YAML parsing and serialisation. The +Netsuke now relies on `serde_saphyr` for YAML parsing and serialization. The crate wraps the actively maintained `saphyr` parser while preserving the familiar `serde_yaml`-style API: helpers such as `from_str`, `from_reader`, and `to_string` integrate cleanly with `serde` derives, and the error type exposes @@ -1233,11 +1233,11 @@ Semantics honour platform conventions while enforcing predictable behaviour: bit. Empty `PATH` segments (leading, trailing, or `::`) map to the working directory when `cwd_mode` is `"auto"` or `"always"`. - On Windows, the lookup respects `PATHEXT` when the command lacks an - extension. Comparisons are case-insensitive, results normalise both slash + extension. Comparisons are case-insensitive, results normalize both slash styles, and `cwd_mode` defaults to skipping the working directory to avoid the platform’s surprise "search CWD first" rule. Opting in via `"always"` restores that behaviour. -- Canonicalisation happens after discovery and only when requested so that +- Canonicalization happens after discovery and only when requested so that manifests can balance reproducibility against host-specific absolute paths. The resolver keeps a small LRU cache keyed by the command, a fingerprint of @@ -1706,7 +1706,7 @@ use camino::Utf8PathBuf; /// The complete, static build graph. pub struct BuildGraph { /// A map of all unique actions (rules) in the build. - /// The key is a hash of a canonical JSON serialisation of the action's + /// The key is a hash of a canonical JSON serialization of the action's /// properties to enable deduplication. pub actions: HashMap, @@ -1882,10 +1882,11 @@ This transformation involves several steps: deterministic error messages. Traversal state is implemented in the dedicated `ir::cycle` module. Its - `CycleDetector` helper owns the recursion stack and visitation map. Keys are - cloned from the `targets` map so traversal leaves the input graph untouched. - Missing dependencies encountered during traversal are logged, collected, and - returned alongside any cycle to aid diagnostics. + `CycleDetector` helper exposes a `detect` API and owns the recursion stack + and visitation map. Keys are cloned from the `targets` map so traversal + leaves the input graph untouched. Missing dependencies encountered during + traversal are logged, collected, and returned alongside any cycle to aid + diagnostics. ### 5.4 Ninja File Synthesis (`ninja_gen.rs`) @@ -1967,7 +1968,7 @@ manifest. No Ninja specific placeholders are stored in the IR to keep the representation portable. - Actions are deduplicated using a SHA-256 hash of a canonical JSON - serialisation of their recipe, inputs, and outputs. Because commands embed + serialization of their recipe, inputs, and outputs. Because commands embed shell-quoted file paths, two targets share an identifier only when both the command text and file sets match exactly. - Multiple rule references in a single target are not yet supported. The IR @@ -2237,7 +2238,7 @@ enrichment: `.with_context(|| "Failed to build the internal build graph from the manifest")?` . -4. This process of propagation and contextualisation repeats as the error +4. This process of propagation and contextualization repeats as the error bubbles up towards `main`. Use `anyhow::Context` to add detail, but never convert a `miette::Diagnostic` into a plain `anyhow::Error`--doing so would discard spans and help text. @@ -2337,7 +2338,7 @@ given). /// This is the default subcommand. Build(BuildArgs), /// Remove build artefacts and intermediate files. Clean, - /// Display the build dependency graph in DOT format for visualisation. + /// Display the build dependency graph in DOT format for visualization. Graph, /// Write the Ninja manifest to `FILE` without invoking Ninja. diff --git a/src/ir/cycle.rs b/src/ir/cycle.rs index afad3cec..f6d42913 100644 --- a/src/ir/cycle.rs +++ b/src/ir/cycle.rs @@ -1,8 +1,22 @@ -//! Cycle detection utilities for the IR target graph. +//! Cycle detection for the IR target graph. +//! +//! The public entry point is [`analyse`], which accepts the target map +//! (`HashMap`) produced by IR lowering and +//! returns a [`CycleDetectionReport`]. The report carries an optional +//! detected cycle — an ordered, canonicalised list of paths — together +//! with any dependencies referenced by a target but absent from the map. +//! +//! Traversal state is managed by the private [`CycleDetector`] struct, +//! which owns the DFS recursion stack and per-node visitation map. +//! Callers drive detection through [`CycleDetector::detect`], which +//! iterates over every node in the target map and delegates depth-first +//! visiting to `visit` and `visit_dependency`. Detected cycles are +//! normalised by [`canonicalize_cycle`] to produce deterministic error +//! messages regardless of traversal order. use std::collections::HashMap; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use super::BuildEdge; @@ -13,29 +27,35 @@ enum VisitState { Visited, } +/// The result of a cycle-detection pass over the target graph. +/// +/// `cycle` is `Some` when a dependency cycle was found; the vec holds the +/// cycle's nodes in canonical order, with the first node repeated as the +/// last element. `missing_dependencies` lists every `(dependent, dep)` +/// pair where `dep` is referenced but absent from the target map. pub(crate) struct CycleDetectionReport { pub(crate) cycle: Option>, pub(crate) missing_dependencies: Vec<(Utf8PathBuf, Utf8PathBuf)>, } +/// Analyse `targets` for dependency cycles and missing dependencies. +/// +/// Returns a [`CycleDetectionReport`] containing the first detected cycle +/// (if any) and the full list of missing-dependency pairs encountered +/// during traversal. pub(crate) fn analyse(targets: &HashMap) -> CycleDetectionReport { let mut detector = CycleDetector::new(targets); - let mut cycle = None; - for node in targets.keys() { - if detector.is_visited(node) { - continue; - } - if let Some(found) = detector.visit(node.clone()) { - cycle = Some(found); - break; - } - } + let cycle = detector.detect(); CycleDetectionReport { cycle, missing_dependencies: detector.missing_dependencies, } } +/// Depth-first cycle detector that owns its traversal state. +/// +/// Create with [`CycleDetector::new`] and drive detection with +/// [`CycleDetector::detect`]. struct CycleDetector<'targets> { targets: &'targets HashMap, stack: Vec, @@ -44,6 +64,8 @@ struct CycleDetector<'targets> { } impl CycleDetector<'_> { + /// Create a new detector borrowing `targets` for the duration of the + /// traversal. fn new(targets: &HashMap) -> CycleDetector<'_> { CycleDetector { targets, @@ -53,46 +75,68 @@ impl CycleDetector<'_> { } } - fn is_visited(&self, node: &Utf8PathBuf) -> bool { + /// Walk every node in the target map and return the first cycle found, + /// or `None` if the graph is acyclic. + fn detect(&mut self) -> Option> { + let mut nodes: Vec = self.targets.keys().cloned().collect(); + nodes.sort(); + for node in nodes { + if self.is_visited(node.as_path()) { + continue; + } + if let Some(cycle) = self.visit(node.as_path()) { + return Some(cycle); + } + } + None + } + + /// Return `true` if `node` has been fully visited. + fn is_visited(&self, node: &Utf8Path) -> bool { matches!(self.states.get(node), Some(VisitState::Visited)) } - fn visit(&mut self, node: Utf8PathBuf) -> Option> { - match self.states.get(&node) { + /// Visit `node` depth-first. + /// + /// Returns `Some(cycle)` if a back-edge to an in-progress node is + /// discovered, `None` otherwise. + fn visit(&mut self, node: &Utf8Path) -> Option> { + match self.states.get(node) { Some(VisitState::Visited) => return None, Some(VisitState::Visiting) => { let idx = self .stack .iter() - .position(|n| n == &node) + .position(|n| n.as_path() == node) .unwrap_or_else(|| { debug_assert!(false, "visiting node must be on the stack"); 0 }); let mut cycle: Vec = self.stack.iter().skip(idx).cloned().collect(); - cycle.push(node); + cycle.push(node.to_path_buf()); return Some(canonicalize_cycle(cycle)); } None => { - self.states.insert(node.clone(), VisitState::Visiting); + self.states.insert(node.to_path_buf(), VisitState::Visiting); } } - self.stack.push(node.clone()); + self.stack.push(node.to_path_buf()); - if let Some(cycle) = self + let cycle = self .targets - .get(&node) + .get(node) .into_iter() .flat_map(|edge| edge.inputs.iter()) - .find_map(|dep| self.visit_dependency(&node, dep)) - { - return Some(cycle); - } + .find_map(|dep| self.visit_dependency(node, dep)); self.stack.pop(); - self.states.insert(node, VisitState::Visited); - None + + if cycle.is_none() { + self.states.insert(node.to_path_buf(), VisitState::Visited); + } + + cycle } #[cfg(test)] @@ -107,7 +151,9 @@ impl CycleDetector<'_> { } impl CycleDetector<'_> { - fn record_missing_dependency(&mut self, node: &Utf8PathBuf, dep: &Utf8PathBuf) -> bool { + /// Record `dep` as missing and return `true` if `dep` is absent from the + /// target map; return `false` if it is present. + fn record_missing_dependency(&mut self, node: &Utf8Path, dep: &Utf8Path) -> bool { if self.targets.contains_key(dep) { return false; } @@ -117,23 +163,29 @@ impl CycleDetector<'_> { dependent = %node, "skipping dependency missing from targets during cycle detection", ); - self.missing_dependencies.push((node.clone(), dep.clone())); + self.missing_dependencies + .push((node.to_path_buf(), dep.to_path_buf())); true } - fn visit_dependency( - &mut self, - node: &Utf8PathBuf, - dep: &Utf8PathBuf, - ) -> Option> { + /// Optionally record `dep` as missing, then visit it. + /// + /// Returns early with `None` when the dependency is absent from the target + /// map. + fn visit_dependency(&mut self, node: &Utf8Path, dep: &Utf8Path) -> Option> { if self.record_missing_dependency(node, dep) { return None; } - self.visit(dep.clone()) + self.visit(dep) } } +/// Rotate `cycle` so that the lexicographically smallest node appears +/// first, then re-close it by appending the first node. +/// +/// The input must contain at least two nodes; the first and last node are +/// expected to be identical (the standard DFS cycle representation). fn canonicalize_cycle(mut cycle: Vec) -> Vec { debug_assert!( cycle.len() >= 2, @@ -146,10 +198,10 @@ fn canonicalize_cycle(mut cycle: Vec) -> Vec { .enumerate() .min_by(|(_, a), (_, b)| a.cmp(b)) .map_or(0, |(idx, _)| idx); - let (prefix, suffix) = cycle.split_at_mut(len); - prefix.rotate_left(start); - if let (Some(first), Some(last)) = (prefix.first().cloned(), suffix.first_mut()) { - *last = first; + cycle.pop(); + cycle.rotate_left(start); + if let Some(first) = cycle.first().cloned() { + cycle.push(first); } cycle } @@ -192,9 +244,9 @@ mod tests { targets.insert(b.clone(), build_edge(&[], "b")); let mut detector = CycleDetector::new(&targets); - assert!(detector.visit(a.clone()).is_none()); - assert!(detector.is_visited(&a)); - assert!(detector.is_visited(&b)); + assert!(detector.detect().is_none()); + assert!(detector.is_visited(a.as_path())); + assert!(detector.is_visited(b.as_path())); assert!( detector.stack.is_empty(), "stack should be empty after complete traversal", @@ -207,7 +259,7 @@ mod tests { targets.insert(path("a"), build_edge(&["b"], "a")); let mut detector = CycleDetector::new(&targets); - assert!(detector.visit(path("a")).is_none()); + assert!(detector.visit(path("a").as_path()).is_none()); assert_eq!(detector.missing_dependencies(), &[(path("a"), path("b"))],); } @@ -223,18 +275,139 @@ mod tests { } #[test] - fn canonicalize_cycle_rotates_smallest_node() { - let cycle = vec![path("c"), path("a"), path("b"), path("c")]; + fn find_cycle_is_deterministic() { + let mut targets = HashMap::new(); + targets.insert(path("p"), build_edge(&["q"], "p")); + targets.insert(path("q"), build_edge(&["p"], "q")); + targets.insert(path("x"), build_edge(&["y"], "x")); + targets.insert(path("y"), build_edge(&["x"], "y")); + + let first = CycleDetector::find_cycle(&targets).expect("cycle"); + for _ in 1..100 { + let cycle = CycleDetector::find_cycle(&targets).expect("cycle"); + assert!( + cycle == first, + "find_cycle returned inconsistent results across runs: \ + first={first:?}, got={cycle:?}", + ); + } + // Probabilistic guard: 100 runs; `detect` sorts keys for stable traversal. + tracing::info!("find_cycle returned the same cycle across 100 runs"); + } + + #[test] + fn find_cycle_detects_one_of_multiple_disjoint_cycles() { + let mut targets = HashMap::new(); + targets.insert(path("p"), build_edge(&["q"], "p")); + targets.insert(path("q"), build_edge(&["p"], "q")); + targets.insert(path("x"), build_edge(&["y"], "x")); + targets.insert(path("y"), build_edge(&["x"], "y")); + + assert!(CycleDetector::find_cycle(&targets).is_some()); + } + + #[test] + fn cycle_detector_stack_is_empty_after_cycle_detected() { + let mut targets = HashMap::new(); + targets.insert(path("a"), build_edge(&["b"], "a")); + targets.insert(path("b"), build_edge(&["a"], "b")); + + let mut detector = CycleDetector::new(&targets); + assert!(detector.detect().is_some(), "expected a cycle"); + assert!( + detector.stack.is_empty(), + "stack must be empty after cycle detection", + ); + } + + fn check_canonicalize_cycle(input: &[&str], expected: &[&str]) { + let cycle: Vec = input.iter().map(|&s| path(s)).collect(); let canonical = canonicalize_cycle(cycle); - let expected = vec![path("a"), path("b"), path("c"), path("a")]; - assert_eq!(canonical, expected); + let want: Vec = expected.iter().map(|&s| path(s)).collect(); + assert_eq!(canonical, want); + } + + #[test] + fn canonicalize_cycle_rotates_smallest_node() { + check_canonicalize_cycle(&["c", "a", "b", "c"], &["a", "b", "c", "a"]); } #[test] fn canonicalize_cycle_handles_reverse_direction() { - let cycle = vec![path("c"), path("b"), path("a"), path("c")]; - let canonical = canonicalize_cycle(cycle); - let expected = vec![path("a"), path("c"), path("b"), path("a")]; - assert_eq!(canonical, expected); + check_canonicalize_cycle(&["c", "b", "a", "c"], &["a", "c", "b", "a"]); + } + + mod property_tests { + use proptest::prelude::*; + + use super::super::canonicalize_cycle; + use super::path; + + /// Generate a non-empty list of distinct single-character node names. + fn node_names(min: usize, max: usize) -> impl Strategy> { + proptest::collection::vec("[a-z]", min..=max).prop_filter("nodes must be unique", |v| { + let set: std::collections::HashSet<_> = v.iter().collect(); + set.len() == v.len() + }) + } + + /// Build a closed cycle from `nodes`: [...nodes, nodes[0]]. + fn make_cycle(nodes: &[String]) -> Vec { + let mut cycle: Vec<_> = nodes.iter().map(|s| path(s)).collect(); + cycle.push(path( + nodes + .first() + .expect("node_names generates at least two nodes"), + )); + cycle + } + + proptest! { + /// Canonicalisation is idempotent: applying it twice yields the + /// same result as applying it once. + #[test] + fn canonicalize_is_idempotent(nodes in node_names(2, 10)) { + let cycle = make_cycle(&nodes); + let once = canonicalize_cycle(cycle.clone()); + let twice = canonicalize_cycle(once.clone()); + prop_assert_eq!(once, twice); + } + + /// All rotations of a cycle canonicalise to the same sequence. + #[test] + fn all_rotations_canonicalise_identically(nodes in node_names(2, 8)) { + let base = canonicalize_cycle(make_cycle(&nodes)); + for i in 1..nodes.len() { + let mut rotated = nodes.clone(); + rotated.rotate_left(i); + let result = canonicalize_cycle(make_cycle(&rotated)); + prop_assert_eq!(&base, &result); + } + } + + /// The first node in the canonical form is lexicographically <= + /// every other non-terminal node. + #[test] + fn canonical_first_node_is_smallest(nodes in node_names(2, 10)) { + let canonical = canonicalize_cycle(make_cycle(&nodes)); + let interior = canonical + .get(..canonical.len().saturating_sub(1)) + .expect("canonicalize_cycle produces at least two nodes"); + let first = canonical + .first() + .expect("canonicalize_cycle produces at least one node"); + for node in interior { + prop_assert!(first <= node); + } + } + + /// The canonical form is closed: first and last elements are + /// equal. + #[test] + fn canonical_cycle_is_closed(nodes in node_names(2, 10)) { + let canonical = canonicalize_cycle(make_cycle(&nodes)); + prop_assert_eq!(canonical.first(), canonical.last()); + } + } } }