From a31e628dd11b9c89c350b3cd882fae926823a686 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Mar 2026 19:03:29 -0700 Subject: [PATCH 1/5] [WIP] Handle StructCmpxchg::expected in Heap2Local We previously assumed that if we were optimizing a StructCmpxchg in Heap2Local, then the flow must be from the `ref` operand. This was based on an assumption that allocations are processed in order of appearance, and because the `ref` operand appears before the `expected operand`, it must be the one getting optimized. But this neglected the fact the array allocations are processed before struct allocations, so if `expected` is an array, it could be optimized first. This faulty assumption led to assertion failures and invalid code. Fix the problem by handling flows from `expected` explicitly. When a non-escaping allocation flows into `expected`, we know it cannot possibly match the value already in the accessed struct, so we can optimize the `cmpxchg` to an atomic `struct.get`. WIP because this is not quite correct due to stale LocalGraph information. When `ref` is subsequently optimized, the LocalGraph does not know about the scratch local it is written to, so the `struct.atomic.get` it now flows into is not properly optimized out. --- src/passes/Heap2Local.cpp | 37 +++++++++++---- test/lit/passes/heap2local-rmw.wast | 71 ++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 11 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 00bd1ad2cf2..aec33083ac3 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -157,11 +157,11 @@ #include "ir/local-graph.h" #include "ir/parents.h" #include "ir/properties.h" -#include "ir/type-updating.h" #include "ir/utils.h" #include "pass.h" #include "support/unique_deferring_queue.h" #include "wasm-builder.h" +#include "wasm-type.h" #include "wasm.h" namespace wasm { @@ -1110,17 +1110,36 @@ struct Struct2Local : PostWalker { } void visitStructCmpxchg(StructCmpxchg* curr) { - if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { - // The allocation can't flow into `replacement` if we've made it this far, - // but it might flow into `expected`, in which case we don't need to do - // anything because we would still be performing the cmpxchg on a real - // struct. We only need to replace the cmpxchg if the ref is being - // replaced with locals. + if (curr->type == Type::unreachable) { + // Leave this for DCE. return; } - if (curr->type == Type::unreachable) { - // As with RefGetDesc and StructGet, above. + // The allocation might flow into `ref` or `expected`, but not + // `replacement`, because then it would be considered to have escaped. + if (analyzer.getInteraction(curr->expected) == + ParentChildInteraction::Flows) { + // Since the allocation does not escape, it cannot possibly match the + // value already in the struct. The cmpxchg will just do a read. Drop the + // other arguments and do the atomic read at the end, when the cmpxchg + // would have happened. Use a nullable scratch local in case we also + // optimize `ref` later and need to replace it with a null. + auto refType = curr->ref->type.with(Nullable); + auto refScratch = builder.addVar(func, refType); + auto* block = builder.makeBlock( + {builder.makeLocalSet(refScratch, curr->ref), + builder.makeDrop(curr->expected), + builder.makeDrop(curr->replacement), + builder.makeStructGet(curr->index, + builder.makeLocalGet(refScratch, refType), + curr->order, + curr->type)}); + replaceCurrent(block); + return; + } + if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { + // Since the allocation does not flow from `ref`, it must not flow through + // this cmpxchg at all. return; } diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index a8245b3b9f9..37c72054693 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; RUN: wasm-opt %s -all --heap2local -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --heap2local -S -o - | filecheck %s (module (type $i32 (struct (field (mut i32)))) @@ -52,16 +52,24 @@ ;; CHECK: (func $no-escape-cmpxchg-expected (type $3) (param $0 (ref null $struct)) (result (ref null $struct)) ;; CHECK-NEXT: (local $1 (ref null $struct)) - ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $struct 0 + ;; CHECK-NEXT: (local $2 (ref null $struct)) + ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.atomic.get $struct 0 + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $no-escape-cmpxchg-expected (param (ref null $struct)) (result (ref null $struct)) ;; Allocations that flow into the cmpxchg `expected` operand do not escape @@ -939,3 +947,62 @@ ) ) ) + +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $struct (struct (field (mut (ref null $array))))) + (type $struct (struct (field (mut (ref null $array))))) + ;; CHECK: (type $array (array (mut i32))) + (type $array (array (field (mut i32)))) + ) + ;; CHECK: (type $2 (func)) + + ;; CHECK: (func $test (type $2) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $2 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.atomic.get $struct 0 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test + (drop + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + ;; This will be optimized out before the previous `ref` operand because + ;; array allocations are processed before struct allocations. If we did + ;; not handle flows from `expected` specially, we would incorrectly + ;; refinalize the cmpxchg to have the array replacement type, then get + ;; confused and crash when later processing the `ref` operand. + (array.new_default $array (i32.const 1)) + (array.new_default $array (i32.const 2)) + ) + ) + ) +) From 698251d1e756c2f6150a9b266b09139f1b062f3a Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Mar 2026 20:34:21 -0700 Subject: [PATCH 2/5] Fix staleness by updating parents --- src/ir/parents.h | 7 ++- src/passes/Heap2Local.cpp | 26 +++++---- test/lit/passes/heap2local-rmw.wast | 83 +++++++++++++++++++++++++++-- 3 files changed, 100 insertions(+), 16 deletions(-) diff --git a/src/ir/parents.h b/src/ir/parents.h index 10652a8a1f1..6eea568fd2d 100644 --- a/src/ir/parents.h +++ b/src/ir/parents.h @@ -17,7 +17,8 @@ #ifndef wasm_ir_parents_h #define wasm_ir_parents_h -#include "parsing.h" +#include "wasm-traversal.h" +#include "wasm.h" namespace wasm { @@ -32,6 +33,10 @@ struct Parents { return nullptr; } + void setParent(Expression* child, Expression* parent) { + inner.parentMap[child] = parent; + } + private: struct Inner : public ExpressionStackWalker> { diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index aec33083ac3..bfd07237505 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -203,14 +203,14 @@ struct EscapeAnalyzer { // We use a lazy graph here because we only need this for reference locals, // and even among them, only ones we see an allocation is stored to. const LazyLocalGraph& localGraph; - const Parents& parents; + Parents& parents; const BranchUtils::BranchTargets& branchTargets; const PassOptions& passOptions; Module& wasm; EscapeAnalyzer(const LazyLocalGraph& localGraph, - const Parents& parents, + Parents& parents, const BranchUtils::BranchTargets& branchTargets, const PassOptions& passOptions, Module& wasm) @@ -1126,15 +1126,21 @@ struct Struct2Local : PostWalker { // optimize `ref` later and need to replace it with a null. auto refType = curr->ref->type.with(Nullable); auto refScratch = builder.addVar(func, refType); - auto* block = builder.makeBlock( - {builder.makeLocalSet(refScratch, curr->ref), - builder.makeDrop(curr->expected), - builder.makeDrop(curr->replacement), - builder.makeStructGet(curr->index, - builder.makeLocalGet(refScratch, refType), - curr->order, - curr->type)}); + auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref); + auto* getRefScratch = builder.makeLocalGet(refScratch, refType); + auto* structGet = builder.makeStructGet( + curr->index, getRefScratch, curr->order, curr->type); + auto* block = builder.makeBlock({setRefScratch, + builder.makeDrop(curr->expected), + builder.makeDrop(curr->replacement), + structGet}); replaceCurrent(block); + // Record the new data flow into and out of the new scratch local. This is + // necessary in case `ref` gets optimized later so we can detect that it + // flows to the new struct.atomic.get. + analyzer.parents.setParent(curr->ref, setRefScratch); + analyzer.parents.setParent(getRefScratch, structGet); + analyzer.parents.setParent(structGet, block); return; } if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index 37c72054693..192b658d9d3 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -958,13 +958,13 @@ ) ;; CHECK: (type $2 (func)) - ;; CHECK: (func $test (type $2) + ;; CHECK: (func $cmpxchg-expected-first (type $2) ;; CHECK-NEXT: (local $0 i32) ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) ;; CHECK-NEXT: (local $2 (ref null $array)) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result (ref null $array)) - ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (ref.null none) @@ -985,13 +985,16 @@ ;; CHECK-NEXT: (i32.const 2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (struct.atomic.get $struct 0 - ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test + (func $cmpxchg-expected-first (drop (struct.atomic.rmw.cmpxchg $struct 0 (struct.new_default $struct) @@ -1001,8 +1004,78 @@ ;; refinalize the cmpxchg to have the array replacement type, then get ;; confused and crash when later processing the `ref` operand. (array.new_default $array (i32.const 1)) + ;; Normally replacements escape and cannot be optimized, but the cmpxchg + ;; is replaced when `expected` is processed, so we end up seeing that + ;; this doesn't escape. (array.new_default $array (i32.const 2)) ) ) ) ) + +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $struct (struct (field (mut (ref null $array))))) + (type $struct (struct (field (mut (ref null $array))))) + ;; CHECK: (type $array (array (mut i32))) + (type $array (array (field (mut i32)))) + ) + ;; CHECK: (type $2 (func (result (ref $array)))) + + ;; CHECK: (func $cmpxchg-expected-first (type $2) (result (ref $array)) + ;; CHECK-NEXT: (local $array (ref $array)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $3 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $array + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: ) + (func $cmpxchg-expected-first (result (ref $array)) + ;; Same as before, but now the replacement still escapes. Nothing should go + ;; wrong. + (local $array (ref $array)) + (drop + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + (array.new_default $array (i32.const 1)) + (local.tee $array + (array.new_default $array (i32.const 2)) + ) + ) + ) + (local.get $array) + ) +) From f1cdca4fd6340aa9c20928888b6e296d019a8abf Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 18 Mar 2026 20:41:04 -0700 Subject: [PATCH 3/5] do less --- src/passes/Heap2Local.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index bfd07237505..4fcdeea41ff 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -1136,11 +1136,10 @@ struct Struct2Local : PostWalker { structGet}); replaceCurrent(block); // Record the new data flow into and out of the new scratch local. This is - // necessary in case `ref` gets optimized later so we can detect that it - // flows to the new struct.atomic.get. + // necessary in case `ref` gets processed later so we can detect that it + // flows to the new struct.atomic.get, which may need to be replaced. analyzer.parents.setParent(curr->ref, setRefScratch); analyzer.parents.setParent(getRefScratch, structGet); - analyzer.parents.setParent(structGet, block); return; } if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { From 9a2df0a35eb2e092e1a26f48437e90c26cdbf8c0 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 19 Mar 2026 14:36:09 -0700 Subject: [PATCH 4/5] Handle scratch locals --- src/passes/Heap2Local.cpp | 44 +++++++++++++------ test/lit/passes/heap2local-rmw.wast | 67 +++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 13 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 4fcdeea41ff..5c48db12f6d 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -192,6 +192,10 @@ enum class ParentChildInteraction : int8_t { None, }; +// When we insert scratch locals, we need to record the flow between their set +// and subsequent get. +using ScratchInfo = std::unordered_map; + // Core analysis that provides an escapes() method to check if an allocation // escapes in a way that prevents optimizing it away as described above. It also // stashes information about the relevant expressions as it goes, which helps @@ -201,8 +205,11 @@ struct EscapeAnalyzer { // parents, and via branches, and through locals. // // We use a lazy graph here because we only need this for reference locals, - // and even among them, only ones we see an allocation is stored to. + // and even among them, only ones we see an allocation is stored to. The + // LocalGraph is is augmented by ScratchInfo, since the LocalGraph does not + // know about scratch locals we add. const LazyLocalGraph& localGraph; + ScratchInfo& scratchInfo; Parents& parents; const BranchUtils::BranchTargets& branchTargets; @@ -210,12 +217,13 @@ struct EscapeAnalyzer { Module& wasm; EscapeAnalyzer(const LazyLocalGraph& localGraph, + ScratchInfo& scratchInfo, Parents& parents, const BranchUtils::BranchTargets& branchTargets, const PassOptions& passOptions, Module& wasm) - : localGraph(localGraph), parents(parents), branchTargets(branchTargets), - passOptions(passOptions), wasm(wasm) {} + : localGraph(localGraph), scratchInfo(scratchInfo), parents(parents), + branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} // We must track all the local.sets that write the allocation, to verify // exclusivity. @@ -275,15 +283,23 @@ struct EscapeAnalyzer { } if (auto* set = parent->dynCast()) { - // This is one of the sets we are written to, and so we must check for - // exclusive use of our allocation by all the gets that read the value. - // Note the set, and we will check the gets at the end once we know all - // of our sets. - sets.insert(set); - - // We must also look at how the value flows from those gets. - for (auto* get : localGraph.getSetInfluences(set)) { + + // We must also look at how the value flows from those gets. Check the + // scratchInfo first because it contains sets that localGraph doesn't + // know about. + if (auto it = scratchInfo.find(set); it != scratchInfo.end()) { + auto* get = it->second; flows.push({get, parents.getParent(get)}); + } else { + // This is one of the sets we are written to, and so we must check for + // exclusive use of our allocation by all the gets that read the + // value. Note the set, and we will check the gets at the end once we + // know all of our sets. (For scratch locals above, we know all the + // sets are already accounted for.) + sets.insert(set); + for (auto* get : localGraph.getSetInfluences(set)) { + flows.push({get, parents.getParent(get)}); + } } } @@ -1139,6 +1155,7 @@ struct Struct2Local : PostWalker { // necessary in case `ref` gets processed later so we can detect that it // flows to the new struct.atomic.get, which may need to be replaced. analyzer.parents.setParent(curr->ref, setRefScratch); + analyzer.scratchInfo.insert({setRefScratch, getRefScratch}); analyzer.parents.setParent(getRefScratch, structGet); return; } @@ -1492,6 +1509,7 @@ struct Heap2Local { const PassOptions& passOptions; LazyLocalGraph localGraph; + ScratchInfo scratchInfo; Parents parents; BranchUtils::BranchTargets branchTargets; @@ -1563,7 +1581,7 @@ struct Heap2Local { continue; } EscapeAnalyzer analyzer( - localGraph, parents, branchTargets, passOptions, wasm); + localGraph, scratchInfo, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. @@ -1583,7 +1601,7 @@ struct Heap2Local { // Check for escaping, noting relevant information as we go. If this does // not escape, optimize it into locals. EscapeAnalyzer analyzer( - localGraph, parents, branchTargets, passOptions, wasm); + localGraph, scratchInfo, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { Struct2Local(allocation, analyzer, func, wasm); optimized = true; diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index 192b658d9d3..2892d184514 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -1079,3 +1079,70 @@ (local.get $array) ) ) + +(module + (rec + ;; CHECK: (type $0 (func)) + + ;; CHECK: (rec + ;; CHECK-NEXT: (type $array (array i8)) + (type $array (array i8)) + ;; CHECK: (type $struct (struct (field (mut anyref)))) + (type $struct (struct (field (mut anyref)))) + ) + ;; CHECK: (func $test-scratch-oob (type $0) + ;; CHECK-NEXT: (local $a arrayref) + ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $2 anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.new_fixed $array 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $test-scratch-oob + (local $a arrayref) + ;; This allocation and set get optimized, creating the LocalGraph flower. + (local.set $a + (array.new_fixed $array 0) + ) + (drop + ;; Then `expected` gets optimized, creating a new scratch local. Since the + ;; flower is already created, the index of the scratch local would be out + ;; of bounds if we tried to look it up in the LocalGraph. + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + (array.new_fixed $array 0) + (array.new_fixed $array 0) + ) + ) + (unreachable) + ) +) From e30478c05c0ef2cbe0e47ef55c8a0b65f17a0499 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 19 Mar 2026 18:01:12 -0700 Subject: [PATCH 5/5] comment about only recording some scratch locals --- src/passes/Heap2Local.cpp | 10 +++++++--- test/lit/passes/heap2local-rmw.wast | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 5c48db12f6d..d8db73aa67b 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -192,8 +192,8 @@ enum class ParentChildInteraction : int8_t { None, }; -// When we insert scratch locals, we need to record the flow between their set -// and subsequent get. +// When we insert scratch locals, we sometimes need to record the flow between +// their set and subsequent get. using ScratchInfo = std::unordered_map; // Core analysis that provides an escapes() method to check if an allocation @@ -207,7 +207,11 @@ struct EscapeAnalyzer { // We use a lazy graph here because we only need this for reference locals, // and even among them, only ones we see an allocation is stored to. The // LocalGraph is is augmented by ScratchInfo, since the LocalGraph does not - // know about scratch locals we add. + // know about scratch locals we add. We currently only record scratch locals + // that might possibly have another optimized allocation flowing through them. + // If it's not possible for another optimized allocation to flow through the + // scratch local, then we will never look at it again after creating it and do + // not need to record it here. const LazyLocalGraph& localGraph; ScratchInfo& scratchInfo; Parents& parents; diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index 2892d184514..71504dd47a3 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -1090,7 +1090,7 @@ ;; CHECK: (type $struct (struct (field (mut anyref)))) (type $struct (struct (field (mut anyref)))) ) - ;; CHECK: (func $test-scratch-oob (type $0) + ;; CHECK: (func $test-cmpxchg-scratch-oob (type $0) ;; CHECK-NEXT: (local $a arrayref) ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) ;; CHECK-NEXT: (local $2 anyref) @@ -1127,7 +1127,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) - (func $test-scratch-oob + (func $test-cmpxchg-scratch-oob (local $a arrayref) ;; This allocation and set get optimized, creating the LocalGraph flower. (local.set $a