diff --git a/spec/mir_checker_spec.rb b/spec/mir_checker_spec.rb index 97c3882d..4f0c341c 100644 --- a/spec/mir_checker_spec.rb +++ b/spec/mir_checker_spec.rb @@ -174,6 +174,93 @@ def fn_def(name, body) end end + # =========================================================================== + # CROSS_FRAME_PARAM_ALLOC -- allocator on a pointer-passed param must be heap + # =========================================================================== + # + # Pin for the test 380 UAF class. A `MUTABLE T[]@list` parameter is + # pointer-passed; its Zig type is `*ArrayList(T)`. Buffer growth via + # `.append` (an InlineZig op with `alloc: :receiver_storage`) must + # resolve to `:heap`, not `:frame`. If lowering's `resolve_alloc_sym` + # ever regresses, the checker catches it here independently. + + describe "CROSS_FRAME_PARAM_ALLOC" do + # Helper: FnDef with one pointer-passed param. + def fn_with_ptr_param(body) + param = MIR::Param.new("items", "anytype", true) + MIR::FnDef.new("record", [param], "void", body, :pub, true, nil) + end + + it "detects :frame allocator on a pointer-passed @list param" do + iz = MIR::InlineZig.new("try {0}.append({alloc}, {1})", "intrinsic") + iz.allocs = { alloc: :frame } + iz.target_var = "items" + body = [MIR::ExprStmt.new(iz, false)] + errors = checker.check_fn!(fn_with_ptr_param(body)) + expect(errors.any? { |e| e.include?("CROSS_FRAME_PARAM_ALLOC") && e.include?("items") }).to be true + end + + it "passes when the allocator on a pointer-passed param is :heap" do + iz = MIR::InlineZig.new("try {0}.append({alloc}, {1})", "intrinsic") + iz.allocs = { alloc: :heap } + iz.target_var = "items" + body = [MIR::ExprStmt.new(iz, false)] + errors = checker.check_fn!(fn_with_ptr_param(body)) + expect(errors).to be_empty + end + + it "ignores :frame allocator on a NON-pointer-passed local binding" do + iz = MIR::InlineZig.new("try {0}.append({alloc}, {1})", "intrinsic") + iz.allocs = { alloc: :frame } + iz.target_var = "local_list" + body = [ + MIR::FrameSave.new("rt"), + MIR::AllocMark.new("local_list", :frame), + MIR::ExprStmt.new(iz, false), + ] + # Plain locals: cross-frame doesn't apply. The other invariants may + # speak (alloc/cleanup match etc) but not this one. + errors = checker.check_fn!(fn_def("local_only", body)) + expect(errors.none? { |e| e.include?("CROSS_FRAME_PARAM_ALLOC") }).to be true + end + + it "fires for every :frame allocator key, not just :alloc" do + iz = MIR::InlineZig.new("try {target}.put({key_alloc}, {val_alloc}, {k}, {v})", "index_set") + iz.allocs = { key_alloc: :frame, val_alloc: :frame } + iz.target_var = "map" + param = MIR::Param.new("map", "anytype", true) + fn = MIR::FnDef.new("update", [param], "void", [MIR::ExprStmt.new(iz, false)], :pub, true, nil) + errors = checker.check_fn!(fn) + ksay = errors.select { |e| e.include?("CROSS_FRAME_PARAM_ALLOC") } + expect(ksay.length).to eq(2) + expect(ksay.any? { |e| e.include?("key_alloc") }).to be true + expect(ksay.any? { |e| e.include?("val_alloc") }).to be true + end + + it "no-ops on functions with empty params (no false positives)" do + iz = MIR::InlineZig.new("try {0}.append({alloc}, {1})", "intrinsic") + iz.allocs = { alloc: :frame } + iz.target_var = "anything" + body = [ + MIR::FrameSave.new("rt"), + MIR::AllocMark.new("anything", :frame), + MIR::ExprStmt.new(iz, false), + ] + errors = checker.check_fn!(fn_def("paramless", body)) + expect(errors.none? { |e| e.include?("CROSS_FRAME_PARAM_ALLOC") }).to be true + end + + it "leaves slice (non-pointer-passed) params alone" do + param = MIR::Param.new("slice", "[]const TraceItem", false) + iz = MIR::InlineZig.new("for ({0}) |x| {{ ... }}", "iter") + iz.allocs = { alloc: :frame } + iz.target_var = "slice" + fn = MIR::FnDef.new("read_only", [param], "void", [MIR::ExprStmt.new(iz, false)], :pub, false, nil) + errors = checker.check_fn!(fn) + expect(errors.none? { |e| e.include?("CROSS_FRAME_PARAM_ALLOC") }).to be true + end + end + # =========================================================================== # FRAME_NO_REWIND -- scopes that frame-allocate must have rewind # =========================================================================== diff --git a/src/ast/diagnostic_registry.rb b/src/ast/diagnostic_registry.rb index d0a3166e..951f7dda 100644 --- a/src/ast/diagnostic_registry.rb +++ b/src/ast/diagnostic_registry.rb @@ -1713,6 +1713,13 @@ module DiagnosticRegistry cause: "Storing frame-allocated data into a heap container (or heap data into a frame container) leaves dangling pointers after frame rewind. The checker compares the InlineZig op's `:alloc` / `:key_alloc` / `:val_alloc` against the container's AllocMark allocator and rejects mismatches.", fix_hint: "Make the container's allocator and the inserted data's allocator agree. Usually the fix is upgrading the container to heap (`@list:heap` or assignment-time promotion) or making the inserted data heap-owned.", }, + CROSS_FRAME_PARAM_ALLOC: { + severity: :error, category: :mir, + template: "%{message}", + summary: "InlineZig op on a pointer-passed parameter must not use the frame allocator.", + cause: "A pointer-passed parameter (MUTABLE collection / `*T` Zig type) carries a lifetime that extends past this function's mark/restore. A `:frame` allocation here would die when the function returns, leaving the caller with a dangling buffer pointer — a cross-frame use-after-free.", + fix_hint: "Lowering bug — `resolve_alloc_sym` for `:receiver_storage` should pick `:heap` when the receiver is a pointer-passed param. The matching escape promotion lives in escape_analysis.rb (Condition 9). If both look right, check that `@current_fn_collection_params` is populated for this function.", + }, INLINE_NO_CONTRACT: { severity: :error, category: :mir, template: "%{message}", diff --git a/src/backends/pipeline_host.rb b/src/backends/pipeline_host.rb index c0afb35f..a780fa06 100644 --- a/src/backends/pipeline_host.rb +++ b/src/backends/pipeline_host.rb @@ -3540,11 +3540,11 @@ def build_bounded_concurrent_callback(conc_op, item_type, return_type, body_kind fd } - raw_ctx = MIR::Param.new("raw_ctx", "?*anyopaque") + raw_ctx = MIR::Param.new("raw_ctx", "?*anyopaque", false) params = [ - MIR::Param.new("__rt", "*Runtime"), + MIR::Param.new("__rt", "*Runtime", false), raw_ctx, - MIR::Param.new("__item", Type.new(item_type).zig_type), + MIR::Param.new("__item", Type.new(item_type).zig_type, false), ] body = [MIR::Suppress.new("__rt")] @@ -4043,12 +4043,12 @@ def build_bounded_concurrent_callback_pointer(conc_op, item_type) fd } - raw_ctx = MIR::Param.new("raw_ctx", "?*anyopaque") + raw_ctx = MIR::Param.new("raw_ctx", "?*anyopaque", false) item_zig_ptr = "*#{Type.new(item_type).zig_type}" params = [ - MIR::Param.new("__rt", "*Runtime"), + MIR::Param.new("__rt", "*Runtime", false), raw_ctx, - MIR::Param.new("__item", item_zig_ptr), + MIR::Param.new("__item", item_zig_ptr, false), ] body = [MIR::Suppress.new("__rt")] diff --git a/src/mir/escape_analysis.rb b/src/mir/escape_analysis.rb index 38cc6fd6..d266da0b 100644 --- a/src/mir/escape_analysis.rb +++ b/src/mir/escape_analysis.rb @@ -329,6 +329,47 @@ def self.analyze!(fn_nodes, heap_fns:, promotion_plans: {}) end end + # ── Condition 9: MUTABLE @list arg crosses a frame boundary ── + # When a `@list` is passed to a MUTABLE @list parameter, the callee + # treats the receiver as pointer-passed (see needs_pointer_passing? + # + the call-site routing in mir_lowering.rb). Inside the callee, + # any `.append` allocates the growth buffer; if the receiver's + # storage is :frame, the `:receiver_storage` resolver picks the + # callee's frameAlloc(). The buffer's lifetime is then bounded by + # the *callee's* frame mark (rewound on return), and the caller's + # `items.items.ptr` is dangling -- subsequent allocations corrupt + # the buffer or the next `.append` segfaults. + # + # Promote the caller's binding to :heap at the decl so the buffer + # outlives any frame, and the cleanup uses heapAlloc consistently. + e2_walk_calls(fn.body) do |call| + callee_name = call.name.to_s + callee_fn = fn_nodes[callee_name] || fn_nodes[callee_name.to_sym] + next unless callee_fn&.respond_to?(:params) && callee_fn.params + + args = call.args || [] + callee_fn.params.each_with_index do |param, idx| + next unless param[:mutable] + param_t = param[:type] + param_t = param_t.is_a?(Type) ? param_t : (Type.new(param_t) rescue nil) + next unless param_t && param_t.list_collection? + + arg = args[idx] + next unless arg.is_a?(AST::Identifier) && arg.symbol + + next if [:heap, :multiowned, :shared].include?(arg.symbol.storage) + + ti = arg.symbol.type + ti = ti.is_a?(Type) ? ti : (Type.new(ti) rescue nil) + next unless ti && ti.list_collection? + + arg.symbol.storage = :heap + decl = arg.symbol.reg + decl.storage = :heap if decl.respond_to?(:storage=) + ti.provenance = :heap if ti.respond_to?(:provenance=) && !ti.heap_provenance? + end + end + { bg_upgraded: bg_upgraded, always_escaped: always_escaped, carry_return_vars: carry_ret_vars } end diff --git a/src/mir/mir.rb b/src/mir/mir.rb index b8050ce6..00443eea 100644 --- a/src/mir/mir.rb +++ b/src/mir/mir.rb @@ -61,7 +61,15 @@ def has_own_frame? = true # Function parameter. # Zig: name: zig_type - Param = Struct.new(:name, :zig_type) do + # + # `pointer_passed` is true when this parameter receives a pointer-to-T + # at the Zig level (MUTABLE collections that lower to `*ArrayList(...)`, + # or any param whose receiver gets `&` at the call site). Note that + # collection params lower to `anytype` for polymorphism, so we can't + # infer this from `zig_type` alone — the lowering pass tags it + # explicitly. Used by `MIRChecker#verify_cross_frame_param_alloc!` + # to reject `:frame` allocators on receivers that outlive this fn. + Param = Struct.new(:name, :zig_type, :pointer_passed) do include Emittable end diff --git a/src/mir/mir_checker.rb b/src/mir/mir_checker.rb index 9b996aa4..80dac9fa 100644 --- a/src/mir/mir_checker.rb +++ b/src/mir/mir_checker.rb @@ -33,6 +33,16 @@ # a primitive or Id (with no sync/rc capability) is a compiler bug: # value types that never own heap memory must not receive cleanup nodes. # +# INV-CROSS-FRAME-PARAM-ALLOC: When an InlineZig op targets a parameter +# that was pointer-passed into this function (MUTABLE collection param +# or any param whose Zig type is `*T`), its resolved allocator must +# NOT be `:frame`. Frame allocations are bounded by THIS function's +# mark/restore; the parameter's lifetime extends past that mark, so +# a frame alloc for it produces a buffer that dies before its owner. +# Cross-frame UAF — caught here even if mir_lowering's allocator- +# routing in `resolve_alloc_sym` regresses. Defense in depth on top +# of escape_analysis.rb's Condition 9 promotion. See test 380. +# # THE MOMENT this checker adds logic outside these invariants, it is no # longer a gatekeeper -- it is ad-hoc patch code that gives false confidence. # Every new check must be justified by one of these invariants. @@ -114,6 +124,7 @@ def check_fn!(fn_def, strict: false) hpt_leaks.each { |e| @errors << e } verify_inline_alloc_contracts!(inline_alloc_nodes, allocs) + verify_cross_frame_param_alloc!(inline_alloc_nodes, fn_def) verify_alloc_cleanup_match!(allocs, cleanups, errdefer_destroy_names) verify_zig_contracts!(all_zig_nodes) verify_raw_justified!(all_zig_nodes) @@ -356,6 +367,43 @@ def verify_inline_alloc_contracts!(inline_nodes, allocs) end end + # CROSS_FRAME_PARAM_ALLOC: an InlineZig op targeting a pointer-passed + # parameter must not use the `:frame` allocator. Pointer-passed params + # (MUTABLE collection / `*T` Zig type) carry a lifetime that extends + # past the current function's mark/restore -- a frame allocation here + # would die before the binding it serves, producing a cross-frame UAF. + # + # Independently re-derives "is this param pointer-passed?" from the + # MIR-level Zig type (prefix `*`) so the check is decoupled from + # mir_lowering's `@current_fn_collection_params` set. Defense in depth: + # if lowering's `resolve_alloc_sym` or escape_analysis's Condition 9 + # ever regresses, this catches the resulting bad MIR before codegen. + def verify_cross_frame_param_alloc!(inline_nodes, fn_def) + return if fn_def.params.nil? || fn_def.params.empty? + + # `pointer_passed` flag is set on MIR::Param at lowering time. Collection + # params lower to `anytype` (polymorphic) so we can't read pointer-pass + # status from the Zig type string alone — the lowering tags it explicitly. + pointer_passed = fn_def.params.each_with_object(Set.new) do |p, set| + set << p.name.to_s if p.respond_to?(:pointer_passed) && p.pointer_passed + end + return if pointer_passed.empty? + + inline_nodes.each do |iz| + next unless iz.allocs + target = iz.target_var.to_s + next unless pointer_passed.include?(target) + + iz.allocs.each do |alloc_key, alloc_sym| + next unless alloc_sym == :frame + @errors << error(:CROSS_FRAME_PARAM_ALLOC, target, + "operation #{alloc_key} is :frame but '#{target}' is a pointer-passed " \ + "parameter (lifetime extends past this function's frame mark; " \ + "buffer would dangle on return). Use :heap.") + end + end + end + # ALLOC_CLEANUP_MISMATCH: allocator at AllocMark must match allocator in Cleanup. # # Every binding has a single allocator for its entire lifetime (INV-1). If the diff --git a/src/mir/mir_lowering.rb b/src/mir/mir_lowering.rb index bbae6427..0054b3ee 100644 --- a/src/mir/mir_lowering.rb +++ b/src/mir/mir_lowering.rb @@ -567,10 +567,22 @@ def resolve_alloc_sym(alloc_sym, receiver_type = nil, target_node = nil, node = # after the Identifier was annotated (Identifier.storage may be stale). needs_heap ||= if node.is_a?(AST::MethodCall) obj = node.object - obj.storage == :heap || obj.symbol&.reg&.storage == :heap + obj.storage == :heap || obj.symbol&.reg&.storage == :heap || + # Pointer-passed `@list` parameter: the receiver crosses a frame + # boundary into this function. The caller's frame allocator is + # not reachable here -- using *this* function's frameAlloc for + # buffer growth would tie the buffer's lifetime to the callee's + # frame mark, which gets rewound on return. The caller's + # `items.items.ptr` would then be dangling, and a subsequent + # `.append` writes through it. Force heap here so the buffer + # outlives any frame; the matching escape-promote on the + # caller's binding (escape_analysis.rb Condition 9) ensures + # the cleanup uses heapAlloc too. + (obj.is_a?(AST::Identifier) && @current_fn_collection_params&.include?(obj.name)) elsif node.mutates_receiver first = node.args&.first - first&.storage == :heap || first&.symbol&.reg&.storage == :heap + first&.storage == :heap || first&.symbol&.reg&.storage == :heap || + (first.is_a?(AST::Identifier) && @current_fn_collection_params&.include?(first.name)) end needs_heap ||= (node.storage == :heap) needs_heap ? :heap : :frame @@ -843,7 +855,7 @@ def lower_union_def(node) methods = if deinit_stmts.any? deinit_fn = MIR::FnDef.new( "deinit", - [MIR::Param.new("self", "*@This()"), MIR::Param.new("alloc", "std.mem.Allocator")], + [MIR::Param.new("self", "*@This()", false), MIR::Param.new("alloc", "std.mem.Allocator", false)], "void", deinit_stmts, :pub @@ -975,9 +987,20 @@ def lower_function_def(node) # between its decl and the next same-name decl. @fn_name_rename_map = {} # original_name => disambiguated Zig name - # Mutable scalar params: Zig params are const, need shadow vars + # Mutable scalar params: Zig params are const, need shadow vars. + # Collections (MUTABLE @list / pool / etc.) are pointer-passed and + # mutated through the pointer — NOT scalar shadows. Exclude them + # explicitly: `transpile_type` returns "anytype" for MUTABLE @list + # which doesn't match the [] / * prefix check, so without this they + # incorrectly received the `_m_` rename. The rename then masked the + # original name from MIR-level checks (notably the new + # INV-CROSS-FRAME-PARAM-ALLOC verifier in mir_checker.rb). mutable_scalar_params = (node.params || []).select { |p| - p[:mutable] && !transpile_type(p[:type], is_param: true).start_with?("[]", "*") + next false unless p[:mutable] + p_type_obj = p[:type].is_a?(Type) ? p[:type] : (Type.new(p[:type] || :Any) rescue nil) + next false if p_type_obj && (p_type_obj.collection? || + (p_type_obj.respond_to?(:needs_pointer_passing?) && p_type_obj.needs_pointer_passing?)) + !transpile_type(p[:type], is_param: true).start_with?("[]", "*") }.map { |p| p[:name] }.to_set @current_fn_mutable_scalar_params = mutable_scalar_params @@ -1024,12 +1047,19 @@ def lower_function_def(node) else transpile_type(param[:type], is_param: true) end - MIR::Param.new(p_name, zig_t) + # `pointer_passed`: this param's receiver is a pointer-to-T at the + # Zig level, so allocations made inside this function on its behalf + # outlive the function. Mirrors `@current_fn_collection_params`'s + # criteria so the MIR checker can independently verify the + # allocator-routing decision (see INV-CROSS-FRAME-PARAM-ALLOC). + pointer_passed = p_type_obj.needs_pointer_passing? || + (param[:mutable] && p_type_obj.list_collection?) + MIR::Param.new(p_name, zig_t, pointer_passed) } # Prepend rt param if fn_needs_rt - params_mir.unshift(MIR::Param.new("rt", "*Runtime")) + params_mir.unshift(MIR::Param.new("rt", "*Runtime", false)) end # Comptime params @@ -2036,10 +2066,13 @@ def lower_lambda(node) fn_name = "_lambda_#{@lambda_counter}" params_list = sig.params || [] - params_mir = [MIR::Param.new("_rt", "*Runtime")] + params_list.map { |p| + params_mir = [MIR::Param.new("_rt", "*Runtime", false)] + params_list.map { |p| p_type = p[:type] type_str = p_type.is_a?(Type) ? p_type.zig_type(is_param: true) : transpile_type(p_type || :Any, is_param: true) - MIR::Param.new(p[:name], type_str) + pt_obj = p_type.is_a?(Type) ? p_type : (Type.new(p_type) rescue nil) + pp = !!(pt_obj && (pt_obj.respond_to?(:needs_pointer_passing?) && pt_obj.needs_pointer_passing? || + (p[:mutable] && pt_obj.respond_to?(:list_collection?) && pt_obj.list_collection?))) + MIR::Param.new(p[:name], type_str, pp) } ret = sig.return_type || :Void diff --git a/transpile-tests/380_list_append_buffer_uaf.cht b/transpile-tests/380_list_append_buffer_uaf.cht new file mode 100644 index 00000000..05bc0abb --- /dev/null +++ b/transpile-tests/380_list_append_buffer_uaf.cht @@ -0,0 +1,74 @@ +# Test: a helper function that does `.append` on a `MUTABLE T[]@list` +# parameter must allocate the list's growth buffer with a long-lived +# allocator -- not the helper's own frame allocator. +# +# Previously, `append` resolved its allocator via `:receiver_storage`, +# which read the receiver binding's storage tag (`:frame` for default +# locals) and emitted `rt.frameAlloc()`. Inside a helper, that's the +# *helper's* frame mark. The helper's `defer rt.restoreFrameMark()` +# rewinds those bytes on return -- the caller's `items.items.ptr` is +# now dangling. Subsequent allocations in the caller advance the +# bump arena past the freed bytes; the next `.append` in the helper +# then writes through a dangling pointer to deallocated memory. +# +# Repro pattern: +# 1. Caller declares `MUTABLE items: T[]@list = List[]` (frame). +# 2. Caller calls a helper that `.append`s a struct. +# 3. Caller does any `frameAlloc`-using work (e.g. `.toString` / +# string concat) so the bump position advances past the freed +# buffer bytes. +# 4. Caller calls the helper again. The buffer has length 1, +# capacity > 1 (no grow needed), so `.append` writes to a +# dangling `items[1]` pointer -- segfault. +# +# Fix: when a helper receives a pointer-passed `@list` parameter, +# `.append` must use the heap allocator (or any allocator whose +# lifetime outlives the function call). The `:receiver_storage` +# resolver at the helper's call site should treat pointer-passed +# `@list` receivers as heap-stored. + +STRUCT TraceItem { + step: Int64, + kind: Int64, + slot: Int64, + iBefore: Int64, + iAfter: Int64, + fBefore: Float64, + fAfter: Float64, + ip: Int64, +} + +FN record!(MUTABLE items: TraceItem[]@list, step: Int64) RETURNS !Void -> + items.append(TraceItem{ + step: step, kind: 1_i64, slot: 0_i64, + iBefore: 0_i64, iAfter: step, + fBefore: 0.0, fAfter: 0.0, ip: step * 100_i64 + }); + RETURN; +END + +FN burnFrame!(value: Int64) RETURNS !String -> + s1 = value.toString() OR "?"; + s2 = (value * 2_i64).toString() OR "?"; + s3 = (value + 1000_i64).toString() OR "?"; + RETURN s1 + ":" + s2 + ":" + s3; +END + +FN main!() RETURNS !Void -> + MUTABLE items: TraceItem[]@list = List[]; + record!(items, 1_i64); + junk1 = burnFrame!(42_i64); + junk2 = burnFrame!(43_i64); + junk3 = burnFrame!(44_i64); + record!(items, 2_i64); + junk4 = burnFrame!(45_i64); + record!(items, 3_i64); + + ASSERT items.length() == 3_i64, "len"; + ASSERT items[0_i64].step == 1_i64, "first.step (UAF site)"; + ASSERT items[0_i64].kind == 1_i64, "first.kind"; + ASSERT items[0_i64].iAfter == 1_i64, "first.iAfter"; + ASSERT items[1_i64].step == 2_i64, "second.step"; + ASSERT items[2_i64].step == 3_i64, "third.step"; + RETURN; +END