diff --git a/src/ir/parents.h b/src/ir/parents.h index 10652a8a1f1..6eea568fd2d 100644 --- a/src/ir/parents.h +++ b/src/ir/parents.h @@ -17,7 +17,8 @@ #ifndef wasm_ir_parents_h #define wasm_ir_parents_h -#include "parsing.h" +#include "wasm-traversal.h" +#include "wasm.h" namespace wasm { @@ -32,6 +33,10 @@ struct Parents { return nullptr; } + void setParent(Expression* child, Expression* parent) { + inner.parentMap[child] = parent; + } + private: struct Inner : public ExpressionStackWalker> { diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index 00bd1ad2cf2..d8db73aa67b 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -157,11 +157,11 @@ #include "ir/local-graph.h" #include "ir/parents.h" #include "ir/properties.h" -#include "ir/type-updating.h" #include "ir/utils.h" #include "pass.h" #include "support/unique_deferring_queue.h" #include "wasm-builder.h" +#include "wasm-type.h" #include "wasm.h" namespace wasm { @@ -192,6 +192,10 @@ enum class ParentChildInteraction : int8_t { None, }; +// When we insert scratch locals, we sometimes need to record the flow between +// their set and subsequent get. +using ScratchInfo = std::unordered_map; + // Core analysis that provides an escapes() method to check if an allocation // escapes in a way that prevents optimizing it away as described above. It also // stashes information about the relevant expressions as it goes, which helps @@ -201,21 +205,29 @@ struct EscapeAnalyzer { // parents, and via branches, and through locals. // // We use a lazy graph here because we only need this for reference locals, - // and even among them, only ones we see an allocation is stored to. + // and even among them, only ones we see an allocation is stored to. The + // LocalGraph is is augmented by ScratchInfo, since the LocalGraph does not + // know about scratch locals we add. We currently only record scratch locals + // that might possibly have another optimized allocation flowing through them. + // If it's not possible for another optimized allocation to flow through the + // scratch local, then we will never look at it again after creating it and do + // not need to record it here. const LazyLocalGraph& localGraph; - const Parents& parents; + ScratchInfo& scratchInfo; + Parents& parents; const BranchUtils::BranchTargets& branchTargets; const PassOptions& passOptions; Module& wasm; EscapeAnalyzer(const LazyLocalGraph& localGraph, - const Parents& parents, + ScratchInfo& scratchInfo, + Parents& parents, const BranchUtils::BranchTargets& branchTargets, const PassOptions& passOptions, Module& wasm) - : localGraph(localGraph), parents(parents), branchTargets(branchTargets), - passOptions(passOptions), wasm(wasm) {} + : localGraph(localGraph), scratchInfo(scratchInfo), parents(parents), + branchTargets(branchTargets), passOptions(passOptions), wasm(wasm) {} // We must track all the local.sets that write the allocation, to verify // exclusivity. @@ -275,15 +287,23 @@ struct EscapeAnalyzer { } if (auto* set = parent->dynCast()) { - // This is one of the sets we are written to, and so we must check for - // exclusive use of our allocation by all the gets that read the value. - // Note the set, and we will check the gets at the end once we know all - // of our sets. - sets.insert(set); - - // We must also look at how the value flows from those gets. - for (auto* get : localGraph.getSetInfluences(set)) { + + // We must also look at how the value flows from those gets. Check the + // scratchInfo first because it contains sets that localGraph doesn't + // know about. + if (auto it = scratchInfo.find(set); it != scratchInfo.end()) { + auto* get = it->second; flows.push({get, parents.getParent(get)}); + } else { + // This is one of the sets we are written to, and so we must check for + // exclusive use of our allocation by all the gets that read the + // value. Note the set, and we will check the gets at the end once we + // know all of our sets. (For scratch locals above, we know all the + // sets are already accounted for.) + sets.insert(set); + for (auto* get : localGraph.getSetInfluences(set)) { + flows.push({get, parents.getParent(get)}); + } } } @@ -1110,17 +1130,42 @@ struct Struct2Local : PostWalker { } void visitStructCmpxchg(StructCmpxchg* curr) { - if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { - // The allocation can't flow into `replacement` if we've made it this far, - // but it might flow into `expected`, in which case we don't need to do - // anything because we would still be performing the cmpxchg on a real - // struct. We only need to replace the cmpxchg if the ref is being - // replaced with locals. + if (curr->type == Type::unreachable) { + // Leave this for DCE. return; } - if (curr->type == Type::unreachable) { - // As with RefGetDesc and StructGet, above. + // The allocation might flow into `ref` or `expected`, but not + // `replacement`, because then it would be considered to have escaped. + if (analyzer.getInteraction(curr->expected) == + ParentChildInteraction::Flows) { + // Since the allocation does not escape, it cannot possibly match the + // value already in the struct. The cmpxchg will just do a read. Drop the + // other arguments and do the atomic read at the end, when the cmpxchg + // would have happened. Use a nullable scratch local in case we also + // optimize `ref` later and need to replace it with a null. + auto refType = curr->ref->type.with(Nullable); + auto refScratch = builder.addVar(func, refType); + auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref); + auto* getRefScratch = builder.makeLocalGet(refScratch, refType); + auto* structGet = builder.makeStructGet( + curr->index, getRefScratch, curr->order, curr->type); + auto* block = builder.makeBlock({setRefScratch, + builder.makeDrop(curr->expected), + builder.makeDrop(curr->replacement), + structGet}); + replaceCurrent(block); + // Record the new data flow into and out of the new scratch local. This is + // necessary in case `ref` gets processed later so we can detect that it + // flows to the new struct.atomic.get, which may need to be replaced. + analyzer.parents.setParent(curr->ref, setRefScratch); + analyzer.scratchInfo.insert({setRefScratch, getRefScratch}); + analyzer.parents.setParent(getRefScratch, structGet); + return; + } + if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { + // Since the allocation does not flow from `ref`, it must not flow through + // this cmpxchg at all. return; } @@ -1468,6 +1513,7 @@ struct Heap2Local { const PassOptions& passOptions; LazyLocalGraph localGraph; + ScratchInfo scratchInfo; Parents parents; BranchUtils::BranchTargets branchTargets; @@ -1539,7 +1585,7 @@ struct Heap2Local { continue; } EscapeAnalyzer analyzer( - localGraph, parents, branchTargets, passOptions, wasm); + localGraph, scratchInfo, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { // Convert the allocation and all its uses into a struct. Then convert // the struct into locals. @@ -1559,7 +1605,7 @@ struct Heap2Local { // Check for escaping, noting relevant information as we go. If this does // not escape, optimize it into locals. EscapeAnalyzer analyzer( - localGraph, parents, branchTargets, passOptions, wasm); + localGraph, scratchInfo, parents, branchTargets, passOptions, wasm); if (!analyzer.escapes(allocation)) { Struct2Local(allocation, analyzer, func, wasm); optimized = true; diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index a8245b3b9f9..71504dd47a3 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; RUN: wasm-opt %s -all --heap2local -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt -all --heap2local -S -o - | filecheck %s (module (type $i32 (struct (field (mut i32)))) @@ -52,16 +52,24 @@ ;; CHECK: (func $no-escape-cmpxchg-expected (type $3) (param $0 (ref null $struct)) (result (ref null $struct)) ;; CHECK-NEXT: (local $1 (ref null $struct)) - ;; CHECK-NEXT: (struct.atomic.rmw.cmpxchg $struct 0 + ;; CHECK-NEXT: (local $2 (ref null $struct)) + ;; CHECK-NEXT: (local.set $2 ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result nullref) ;; CHECK-NEXT: (local.set $1 ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.atomic.get $struct 0 + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $no-escape-cmpxchg-expected (param (ref null $struct)) (result (ref null $struct)) ;; Allocations that flow into the cmpxchg `expected` operand do not escape @@ -939,3 +947,202 @@ ) ) ) + +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $struct (struct (field (mut (ref null $array))))) + (type $struct (struct (field (mut (ref null $array))))) + ;; CHECK: (type $array (array (mut i32))) + (type $array (array (field (mut i32)))) + ) + ;; CHECK: (type $2 (func)) + + ;; CHECK: (func $cmpxchg-expected-first (type $2) + ;; CHECK-NEXT: (local $0 i32) + ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $2 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $0 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-expected-first + (drop + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + ;; This will be optimized out before the previous `ref` operand because + ;; array allocations are processed before struct allocations. If we did + ;; not handle flows from `expected` specially, we would incorrectly + ;; refinalize the cmpxchg to have the array replacement type, then get + ;; confused and crash when later processing the `ref` operand. + (array.new_default $array (i32.const 1)) + ;; Normally replacements escape and cannot be optimized, but the cmpxchg + ;; is replaced when `expected` is processed, so we end up seeing that + ;; this doesn't escape. + (array.new_default $array (i32.const 2)) + ) + ) + ) +) + +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $struct (struct (field (mut (ref null $array))))) + (type $struct (struct (field (mut (ref null $array))))) + ;; CHECK: (type $array (array (mut i32))) + (type $array (array (field (mut i32)))) + ) + ;; CHECK: (type $2 (func (result (ref $array)))) + + ;; CHECK: (func $cmpxchg-expected-first (type $2) (result (ref $array)) + ;; CHECK-NEXT: (local $array (ref $array)) + ;; CHECK-NEXT: (local $1 i32) + ;; CHECK-NEXT: (local $2 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $3 (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.tee $array + ;; CHECK-NEXT: (array.new_default $array + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref null $array)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $array) + ;; CHECK-NEXT: ) + (func $cmpxchg-expected-first (result (ref $array)) + ;; Same as before, but now the replacement still escapes. Nothing should go + ;; wrong. + (local $array (ref $array)) + (drop + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + (array.new_default $array (i32.const 1)) + (local.tee $array + (array.new_default $array (i32.const 2)) + ) + ) + ) + (local.get $array) + ) +) + +(module + (rec + ;; CHECK: (type $0 (func)) + + ;; CHECK: (rec + ;; CHECK-NEXT: (type $array (array i8)) + (type $array (array i8)) + ;; CHECK: (type $struct (struct (field (mut anyref)))) + (type $struct (struct (field (mut anyref)))) + ) + ;; CHECK: (func $test-cmpxchg-scratch-oob (type $0) + ;; CHECK-NEXT: (local $a arrayref) + ;; CHECK-NEXT: (local $1 (ref null (exact $struct))) + ;; CHECK-NEXT: (local $2 anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.new_fixed $array 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + (func $test-cmpxchg-scratch-oob + (local $a arrayref) + ;; This allocation and set get optimized, creating the LocalGraph flower. + (local.set $a + (array.new_fixed $array 0) + ) + (drop + ;; Then `expected` gets optimized, creating a new scratch local. Since the + ;; flower is already created, the index of the scratch local would be out + ;; of bounds if we tried to look it up in the LocalGraph. + (struct.atomic.rmw.cmpxchg $struct 0 + (struct.new_default $struct) + (array.new_fixed $array 0) + (array.new_fixed $array 0) + ) + ) + (unreachable) + ) +)