From 5eed8a9bfed573af618c9692555151163c2d5e22 Mon Sep 17 00:00:00 2001 From: Julien Duchesne Date: Tue, 9 Jun 2026 12:59:14 -0400 Subject: [PATCH] feat(jsonnet): preserve hidden fields in rtkMemoize Store memoized values as a thread-portable OwnedVal snapshot instead of a lossy JSON string. Hidden (::) object fields now round-trip through the cross-worker cache, where previously any hidden field was rejected. Functions still cannot be memoized (they capture a thread-local closure context plus Rust builtins), but the error now lists the path of every function found in the value rather than failing on the first one. Co-authored-by: Cursor --- .../jsonnet/evaluator/jrsonnet/builtins.rs | 179 +++++++--- .../rtk/src/jsonnet/evaluator/jrsonnet/mod.rs | 321 +++++++++++++++++- 2 files changed, 438 insertions(+), 62 deletions(-) diff --git a/cmds/rtk/src/jsonnet/evaluator/jrsonnet/builtins.rs b/cmds/rtk/src/jsonnet/evaluator/jrsonnet/builtins.rs index b7037a557..ab45a1a29 100644 --- a/cmds/rtk/src/jsonnet/evaluator/jrsonnet/builtins.rs +++ b/cmds/rtk/src/jsonnet/evaluator/jrsonnet/builtins.rs @@ -15,6 +15,7 @@ use std::{ use jrsonnet_evaluator::{ error::{ErrorKind::*, Result}, manifest::JsonFormat, + val::{ArrValue, NumValue}, IStr, ObjValue, Thunk, Val, }; use jrsonnet_macros::builtin; @@ -122,14 +123,17 @@ pub fn helm_cache_put_json(key: String, json: String) { /// State of a single `rtkMemoize` cache slot. /// -/// The value is stored as a JSON string (not a `Val`) because `Val` is not -/// `Send`/`Sync` (it uses `Rc` internally), while the cache is shared across -/// worker threads. +/// The value is stored as an [`OwnedVal`] snapshot (not a `Val`) because `Val` +/// is not `Send`/`Sync` (it uses `Rc`/`Cc` internally and is bound to the +/// evaluating thread), while the cache is shared across worker threads. The +/// snapshot is shared via `Arc` so cache hits clone a pointer rather than the +/// whole tree, and a fresh `Val` is rebuilt from it in each consuming thread. enum MemoState { /// A worker is currently evaluating the value for this key. Computing, - /// The value has been computed and is available as JSON. - Done(String), + /// The value has been computed and is available as a thread-portable + /// snapshot. + Done(Arc), /// Computation failed; the next waiter should retry the computation. Failed, } @@ -191,42 +195,114 @@ impl Drop for MemoComputeGuard<'_> { } } -/// Recursively check whether a value contains any hidden object field. +/// Thread-portable, owned snapshot of a [`Val`]. /// -/// The memoization cache stores values as JSON, which silently drops hidden -/// (`::`) fields. To avoid surprising data loss we reject such values up front -/// instead of caching a lossy projection. Hidden fields are detected by name -/// (comparing the full field set against the visible one), so a hidden field's -/// value is never evaluated. -fn contains_hidden_field(val: &Val) -> Result { - match val { +/// `Val` is `Rc`/`Cc`-based and tied to the thread that evaluated it, so it +/// cannot live in the cross-worker memo cache directly. `OwnedVal` is a deep +/// copy made of only owned, `Send + Sync` data, mirroring the jsonnet value +/// model closely enough to round-trip without the lossy JSON projection the +/// cache used previously. In particular, object field visibility (hidden `::`) +/// is preserved, so memoized values keep their hidden fields. +/// +/// Functions are intentionally not representable: a `FuncVal` captures a +/// closure `Context` (free variables, `self`/`super`, `std`, imports) plus Rust +/// builtins, none of which can be turned into thread-independent data. A value +/// containing any function (at any depth) is therefore rejected at snapshot +/// time rather than cached lossily. +enum OwnedVal { + Null, + Bool(bool), + Num(f64), + Str(String), + Arr(Vec), + /// Object fields in iteration order. The flag marks a hidden (`::`) field. + Obj(Vec<(String, bool, OwnedVal)>), +} + +/// Deep-copy a fully-evaluated `Val` into a thread-portable [`OwnedVal`]. +/// +/// Every array element and object field (including hidden ones) is forced, so +/// the snapshot is a complete, eager copy. Functions cannot be snapshotted (see +/// [`OwnedVal`]); rather than failing on the first one, every function's path +/// (e.g. `spec.template.withName`) is appended to `funcs` so the caller can +/// report all of them at once. `path` is the dotted/indexed location of `val` +/// within the memoized value (empty at the root). +fn val_to_owned(funcs: &mut Vec, path: &str, val: &Val) -> Result { + Ok(match val { + Val::Null => OwnedVal::Null, + Val::Bool(b) => OwnedVal::Bool(*b), + Val::Num(n) => OwnedVal::Num(n.get()), + Val::Str(s) => OwnedVal::Str(s.clone().into_flat().to_string()), + Val::Arr(arr) => { + let mut items = Vec::with_capacity(arr.len()); + for (idx, item) in arr.iter().enumerate() { + let child = format!("{path}[{idx}]"); + items.push(val_to_owned(funcs, &child, &item?)?); + } + OwnedVal::Arr(items) + } Val::Obj(obj) => { + // A field present with hidden included but absent from the visible + // set is hidden. `:::` (unhide) collapses to a plain visible field, + // which is correct for a flattened snapshot with no further supers. let visible: HashSet = obj.fields_ex(false).into_iter().collect(); let all = obj.fields_ex(true); - // `all` is always a superset of `visible`; a size difference means - // at least one field is hidden. - if all.len() != visible.len() { - return Ok(true); - } + let mut fields = Vec::with_capacity(all.len()); for name in all { + let hidden = !visible.contains(&name); let field = obj - .get(name)? + .get(name.clone())? .expect("field listed by fields_ex must exist"); - if contains_hidden_field(&field)? { - return Ok(true); - } + let child = if path.is_empty() { + name.to_string() + } else { + format!("{path}.{name}") + }; + fields.push(( + name.to_string(), + hidden, + val_to_owned(funcs, &child, &field)?, + )); } - Ok(false) + OwnedVal::Obj(fields) } - Val::Arr(arr) => { - for item in arr.iter() { - if contains_hidden_field(&item?)? { - return Ok(true); + Val::Func(_) => { + funcs.push(if path.is_empty() { + "".to_string() + } else { + path.to_string() + }); + OwnedVal::Null + } + }) +} + +/// Rebuild a fresh `Val` in the current thread from an [`OwnedVal`] snapshot. +/// +/// Object fields are recreated with their original visibility via the object +/// builder, so hidden fields stay hidden. +fn owned_to_val(owned: &OwnedVal) -> Val { + match owned { + OwnedVal::Null => Val::Null, + OwnedVal::Bool(b) => Val::Bool(*b), + OwnedVal::Num(n) => { + Val::Num(NumValue::new(*n).expect("snapshot numbers were finite when captured")) + } + OwnedVal::Str(s) => Val::string(s.as_str()), + OwnedVal::Arr(items) => Val::Arr(ArrValue::eager(items.iter().map(owned_to_val).collect())), + OwnedVal::Obj(fields) => { + let mut builder = ObjValue::builder_with_capacity(fields.len()); + for (name, hidden, value) in fields { + let value = owned_to_val(value); + let member = builder.field(name.as_str()); + if *hidden { + member.hide().value(value); + } else { + member.value(value); } } - Ok(false) + Val::Obj(builder.build()) } - _ => Ok(false), } } @@ -238,12 +314,13 @@ fn contains_hidden_field(val: &Val) -> Result { /// a key, other workers requesting the same key block until the result is /// ready, then reuse it without re-evaluating their own thunk. /// -/// The cached value is stored as JSON, so the returned value is always the -/// JSON-manifested projection of the thunk. Because that projection silently -/// drops hidden (`::`) fields, a value containing any hidden field (at any -/// depth) is rejected with an error rather than cached lossily. The same JSON -/// value is returned to the computing worker too, so every caller observes an -/// identical value regardless of who wins the race to compute it. +/// The cached value is stored as an [`OwnedVal`] snapshot: a thread-portable +/// deep copy that preserves hidden (`::`) object fields, so memoized values +/// keep them instead of being reduced to their visible JSON projection. +/// Functions cannot be snapshotted (they capture a thread-local closure +/// context), so a value containing any function at any depth is rejected. The +/// same snapshot is rebuilt for the computing worker too, so every caller +/// observes an identical value regardless of who wins the race to compute it. #[builtin] pub fn rtk_memoize(key: String, value: Thunk) -> Result { // Guard against a thunk that memoizes its own key on the same thread, @@ -285,28 +362,32 @@ pub fn rtk_memoize(key: String, value: Thunk) -> Result { completed: false, }; - // Evaluate the (lazy) thunk and fully manifest it to JSON so the - // result can be shared with other threads. The slot lock is NOT - // held during evaluation, so other keys make progress and waiters - // on this key simply block on the condvar. + // Evaluate the (lazy) thunk and deep-copy it into a thread-portable + // snapshot so the result can be shared with other threads. The slot + // lock is NOT held during evaluation, so other keys make progress + // and waiters on this key simply block on the condvar. let evaluated = value.evaluate()?; - if contains_hidden_field(&evaluated)? { + let mut funcs = Vec::new(); + let snapshot = val_to_owned(&mut funcs, "", &evaluated)?; + if !funcs.is_empty() { + funcs.sort_unstable(); + funcs.dedup(); return Err(RuntimeError( format!( - "rtkMemoize: value for key {key:?} contains hidden field(s), \ - which cannot be memoized (they would be dropped by JSON serialization)" + "rtkMemoize: value for key {key:?} contains function(s), which cannot be \ + memoized (functions capture a thread-local closure context); found at: {}", + funcs.join(", ") ) .into(), ) .into()); } - let json = evaluated.manifest(JsonFormat::default())?; - let result: Val = serde_json::from_str(&json) - .map_err(|e| RuntimeError(format!("failed to parse memoized value: {e}").into()))?; + let snapshot = Arc::new(snapshot); + let result = owned_to_val(&snapshot); { let mut state = slot.state.lock().unwrap_or_else(|e| e.into_inner()); - *state = MemoState::Done(json); + *state = MemoState::Done(snapshot); slot.cond.notify_all(); } guard.completed = true; @@ -317,10 +398,8 @@ pub fn rtk_memoize(key: String, value: Thunk) -> Result { let mut state = slot.state.lock().unwrap_or_else(|e| e.into_inner()); loop { match &*state { - MemoState::Done(json) => { - return serde_json::from_str(json).map_err(|e| { - RuntimeError(format!("failed to parse memoized value: {e}").into()).into() - }); + MemoState::Done(snapshot) => { + return Ok(owned_to_val(snapshot)); } // The computing worker failed; retry the whole operation so // this worker (or another) re-attempts the computation. diff --git a/cmds/rtk/src/jsonnet/evaluator/jrsonnet/mod.rs b/cmds/rtk/src/jsonnet/evaluator/jrsonnet/mod.rs index 258274e6c..ef69b4ec6 100644 --- a/cmds/rtk/src/jsonnet/evaluator/jrsonnet/mod.rs +++ b/cmds/rtk/src/jsonnet/evaluator/jrsonnet/mod.rs @@ -1278,33 +1278,330 @@ mod tests { } #[test] - fn test_eval_native_rtk_memoize_rejects_hidden_field() { - // A top-level hidden field would be dropped by JSON serialization, so - // memoizing such a value must error instead of silently losing data. + fn test_eval_native_rtk_memoize_preserves_hidden_field() { + // Hidden (`::`) fields survive memoization: the snapshot records field + // visibility, so the rebuilt value still exposes them to jsonnet (they + // are only dropped by the final JSON manifest, as usual). + let result = default_evaluator() + .eval_snippet( + r#" + local m = std.native("rtkMemoize")("rtk_memoize_hidden", { visible: 1, hidden:: 2 }); + { sum: m.visible + m.hidden, fields: std.objectFieldsAll(m) } + "#, + &EvaluatorOptions::default(), + ) + .unwrap(); + assert_eq!(result.value["sum"], 3); + assert_eq!(result.value["fields"][0], "hidden"); + assert_eq!(result.value["fields"][1], "visible"); + } + + #[test] + fn test_eval_native_rtk_memoize_preserves_nested_hidden_field() { + // Hidden fields are preserved at any depth, including inside arrays. + let result = default_evaluator() + .eval_snippet( + r#" + local m = std.native("rtkMemoize")("rtk_memoize_hidden_nested", { a: { b: [{ c:: 1 }] } }); + m.a.b[0].c + "#, + &EvaluatorOptions::default(), + ) + .unwrap(); + assert_eq!(result.value, 1); + } + + #[test] + fn test_eval_native_rtk_memoize_rejects_function_lists_paths() { + // Functions capture a thread-local closure context and cannot be + // snapshotted. The error must list the path of every function found, not + // just the first one. let result = default_evaluator().eval_snippet( - r#"std.native("rtkMemoize")("rtk_memoize_hidden", { visible: 1, hidden:: 2 })"#, + r#"std.native("rtkMemoize")("rtk_memoize_func", { + a: 1, + f:: function(x) x + 1, + nested: { g(y):: y, arr: [function() 0] }, + })"#, &EvaluatorOptions::default(), ); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!( - err.contains("hidden"), - "error should mention hidden field, got: {err}" + err.contains("function"), + "error should mention function, got: {err}" + ); + assert!(err.contains("f"), "error should list f, got: {err}"); + assert!( + err.contains("nested.g"), + "error should list nested.g, got: {err}" + ); + assert!( + err.contains("nested.arr[0]"), + "error should list nested.arr[0], got: {err}" ); } + /// Memoize `value_expr` under a unique key and return the manifested JSON of + /// `{ out: }`. The consumer runs in + /// jsonnet against the rebuilt value, so it can observe hidden fields that + /// the final manifest would otherwise drop. + fn memoize_roundtrip(key: &str, value_expr: &str, consumer: &str) -> serde_json::Value { + let snippet = format!( + r#"local m = std.native("rtkMemoize")("{key}", {value_expr}); {{ out: {consumer} }}"# + ); + default_evaluator() + .eval_snippet(&snippet, &EvaluatorOptions::default()) + .unwrap() + .value["out"] + .clone() + } + #[test] - fn test_eval_native_rtk_memoize_rejects_nested_hidden_field() { - // Hidden fields are rejected at any depth, including inside arrays. + fn test_rtk_memoize_scalar_null() { + assert!(memoize_roundtrip("rtk_mz_null", "null", "m == null") + .as_bool() + .unwrap()); + } + + #[test] + fn test_rtk_memoize_scalar_bools() { + assert_eq!(memoize_roundtrip("rtk_mz_true", "true", "m"), true); + assert_eq!(memoize_roundtrip("rtk_mz_false", "false", "m"), false); + } + + #[test] + fn test_rtk_memoize_scalar_numbers() { + assert_eq!(memoize_roundtrip("rtk_mz_int", "42", "m"), 42); + assert_eq!(memoize_roundtrip("rtk_mz_neg", "-17", "m"), -17); + assert_eq!(memoize_roundtrip("rtk_mz_zero", "0", "m"), 0); + assert_eq!( + memoize_roundtrip("rtk_mz_float", "3.5", "m") + .as_f64() + .unwrap(), + 3.5 + ); + assert_eq!( + memoize_roundtrip("rtk_mz_bignum", "1000000000000", "m"), + 1_000_000_000_000_i64 + ); + } + + #[test] + fn test_rtk_memoize_scalar_strings() { + assert_eq!(memoize_roundtrip("rtk_mz_str", r#""hello""#, "m"), "hello"); + assert_eq!(memoize_roundtrip("rtk_mz_empty_str", r#""""#, "m"), ""); + // Special characters, unicode and emoji must survive the round-trip. + assert_eq!( + memoize_roundtrip("rtk_mz_special", r#""a\nb\t\"c\" é\u00e9 😀""#, "m"), + "a\nb\t\"c\" éé 😀" + ); + } + + #[test] + fn test_rtk_memoize_empty_containers() { + assert_eq!( + memoize_roundtrip("rtk_mz_empty_obj", "{}", "std.length(m)"), + 0 + ); + assert_eq!( + memoize_roundtrip("rtk_mz_empty_arr", "[]", "std.length(m)"), + 0 + ); + } + + #[test] + fn test_rtk_memoize_array_of_scalars() { + assert_eq!( + memoize_roundtrip("rtk_mz_arr", "[1, 2, 3]", "m[0] + m[1] + m[2]"), + 6 + ); + assert_eq!( + memoize_roundtrip( + "rtk_mz_arr_mixed", + r#"[1, "a", true, null, [2]]"#, + r#"[m[1], m[2], m[3], m[4][0]]"# + ), + serde_json::json!(["a", true, null, 2]) + ); + } + + #[test] + fn test_rtk_memoize_nested_objects() { + assert_eq!( + memoize_roundtrip( + "rtk_mz_nested", + r#"{ a: { b: { c: [10, 20] } } }"#, + "m.a.b.c[1]" + ), + 20 + ); + } + + #[test] + fn test_rtk_memoize_array_of_objects_with_hidden() { + // Hidden fields inside array elements survive. + assert_eq!( + memoize_roundtrip( + "rtk_mz_arr_obj_hidden", + r#"[{ v: 1, h:: 2 }, { v: 3, h:: 4 }]"#, + "m[0].h + m[1].h + m[0].v + m[1].v" + ), + 10 + ); + } + + #[test] + fn test_rtk_memoize_preserves_field_order() { + // Object field order (including hidden) is preserved as jsonnet's sorted + // order via objectFieldsAll. + assert_eq!( + memoize_roundtrip( + "rtk_mz_order", + r#"{ zebra: 1, alpha:: 2, mango: 3 }"#, + "std.objectFieldsAll(m)" + ), + serde_json::json!(["alpha", "mango", "zebra"]) + ); + } + + #[test] + fn test_rtk_memoize_deeply_nested_hidden() { + assert_eq!( + memoize_roundtrip( + "rtk_mz_deep_hidden", + r#"{ a:: { b:: { c:: { d: 99 } } } }"#, + "m.a.b.c.d" + ), + 99 + ); + } + + #[test] + fn test_rtk_memoize_hidden_field_is_array() { + assert_eq!( + memoize_roundtrip( + "rtk_mz_hidden_arr", + r#"{ items:: [1, 2, 3] }"#, + "std.length(m.items)" + ), + 3 + ); + } + + #[test] + fn test_rtk_memoize_unicode_field_names() { + assert_eq!( + memoize_roundtrip( + "rtk_mz_unicode_keys", + r#"{ "clé": 1, "名前":: 2 }"#, + r#"m["clé"] + m["名前"]"# + ), + 3 + ); + } + + #[test] + fn test_rtk_memoize_unhide_collapses_to_visible() { + // `:::` (unhide) has no further super to act on in a flat snapshot, so it + // is reconstructed as a plain visible field. + let result = memoize_roundtrip( + "rtk_mz_unhide", + r#"{ x::: 5 }"#, + r#"{ has: std.objectHas(m, "x"), val: m.x }"#, + ); + assert_eq!(result["has"], true); + assert_eq!(result["val"], 5); + } + + #[test] + fn test_rtk_memoize_self_reference_in_data() { + // A data field referencing `self` is forced to a concrete value before + // snapshotting, so it round-trips. + assert_eq!( + memoize_roundtrip("rtk_mz_selfref", r#"{ a: 1, b: self.a + 10 }"#, "m.a + m.b"), + 12 + ); + } + + #[test] + fn test_rtk_memoize_top_level_scalar_cached() { + // A non-container value is cached and the second call reuses it (the + // second thunk would error if evaluated). + let result = default_evaluator() + .eval_snippet( + r#"{ + first: std.native("rtkMemoize")("rtk_mz_scalar_cache", 123), + second: std.native("rtkMemoize")("rtk_mz_scalar_cache", error "nope"), + }"#, + &EvaluatorOptions::default(), + ) + .unwrap(); + assert_eq!(result.value["first"], 123); + assert_eq!(result.value["second"], 123); + } + + #[test] + fn test_rtk_memoize_hidden_dropped_by_final_manifest() { + // Hidden fields are preserved through memoization but, as always, the + // final JSON manifest drops them. + let result = default_evaluator() + .eval_snippet( + r#"std.native("rtkMemoize")("rtk_mz_manifest", { visible: 1, hidden:: 2 })"#, + &EvaluatorOptions::default(), + ) + .unwrap(); + assert_eq!(result.value["visible"], 1); + assert!(result.value.get("hidden").is_none()); + } + + #[test] + fn test_rtk_memoize_forcing_hidden_field_propagates_error() { + // Snapshotting forces every field, including hidden ones, so a hidden + // field that errors fails the memoization even if it is never used. + let result = default_evaluator().eval_snippet( + r#"std.native("rtkMemoize")("rtk_mz_hidden_err", { ok: 1, bad:: (error "boom") }).ok"#, + &EvaluatorOptions::default(), + ); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("boom")); + } + + #[test] + fn test_rtk_memoize_function_at_root() { let result = default_evaluator().eval_snippet( - r#"std.native("rtkMemoize")("rtk_memoize_hidden_nested", { a: { b: [{ c:: 1 }] } })"#, + r#"std.native("rtkMemoize")("rtk_mz_func_root", function(x) x)"#, &EvaluatorOptions::default(), ); assert!(result.is_err()); let err = result.unwrap_err().to_string(); - assert!( - err.contains("hidden"), - "error should mention hidden field, got: {err}" + assert!(err.contains(""), "expected path, got: {err}"); + } + + #[test] + fn test_rtk_memoize_function_in_array() { + let result = default_evaluator().eval_snippet( + r#"std.native("rtkMemoize")("rtk_mz_func_arr", [1, function() 0, 3])"#, + &EvaluatorOptions::default(), ); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("[1]"), "expected [1] path, got: {err}"); + } + + #[test] + fn test_rtk_memoize_multiple_functions_sorted_deduped() { + let result = default_evaluator().eval_snippet( + r#"std.native("rtkMemoize")("rtk_mz_func_multi", { + zeta:: function() 0, + alpha:: function() 0, + data: 1, + })"#, + &EvaluatorOptions::default(), + ); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + let alpha = err.find("alpha").expect("alpha listed"); + let zeta = err.find("zeta").expect("zeta listed"); + assert!(alpha < zeta, "paths should be sorted, got: {err}"); } }