diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 47964f33713..41e28500ec2 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1796,6 +1796,8 @@ struct OptimizeInstructions } void visitRefEq(RefEq* curr) { + Builder builder(*getModule()); + // Check for unreachability. Note we must check both the children and the // ref.eq itself, as in e.g. optimizeTernary, as we only refinalize at the // end, so unreachable children may not update the parent yet. @@ -1832,12 +1834,16 @@ struct OptimizeInstructions skipCast(curr->left, nullableEq); skipCast(curr->right, nullableEq); - // Identical references compare equal. - // (Technically we do not need to check if the inputs are also foldable into - // a single one, but we do not have utility code to handle non-foldable - // cases yet; the foldable case we do handle is the common one of the first - // child being a tee and the second a get of that tee. TODO) + // Identical references compare equal. If the inputs are foldable, we need + // only keep one of them. if (areConsecutiveInputsEqualAndFoldable(curr->left, curr->right)) { + replaceCurrent( + builder.makeSequence(builder.makeDrop(curr->left), + builder.makeConst(Literal::makeOne(Type::i32)))); + return; + } + + if (areConsecutiveInputsEqual(curr->left, curr->right)) { replaceCurrent( getDroppedChildrenAndAppend(curr, Literal::makeOne(Type::i32))); return; @@ -2867,9 +2873,24 @@ struct OptimizeInstructions return false; } - // They do look the same! Make sure nothing executed in between them can - // affect the value of `right` and make it different from `left`. - if (interferingEffects.orderedBefore(effects(right))) { + // They do look the same! Determine whether the left hand expression can + // change the value of the operands flowing into the right-hand expression. + // Do not consider the right-hand expression itself here because the + // expressions might be idempotent function calls and we might otherwise + // mistakenly conclude that the first call interferes with the second. + EffectAnalyzer rightEffects(getPassOptions(), *getModule()); + for (auto* child : ChildIterator(right)) { + rightEffects.walk(child); + } + ShallowEffectAnalyzer leftEffects(getPassOptions(), *getModule(), left); + if (leftEffects.orderedBefore(rightEffects)) { + return false; + } + + // Make sure nothing executed in between the expressions can affect the + // value of `right` (or its operands) and make it different from `left`. + rightEffects.visit(right); + if (interferingEffects.orderedBefore(rightEffects)) { return false; } @@ -2896,7 +2917,8 @@ struct OptimizeInstructions return true; } - // To fold the right side into the left, it must have no effects. + // To fold the right side into the left, it must have no unremovable + // effects. auto rightMightHaveEffects = true; if (auto* call = right->dynCast()) { // If these are a pair of idempotent calls, then the second has no @@ -2929,7 +2951,9 @@ struct OptimizeInstructions } ShallowEffectAnalyzer parentEffects( getPassOptions(), *getModule(), call); - if (parentEffects.invalidates(childEffects)) { + // Check if the first parent is ordered before the second child (in the + // example above, the first call vs the second global.get). + if (parentEffects.orderedBefore(childEffects)) { return false; } // No effects are possible. diff --git a/test/lit/passes/optimize-instructions-gc-iit.wast b/test/lit/passes/optimize-instructions-gc-iit.wast index feefa3435b7..10dd6593521 100644 --- a/test/lit/passes/optimize-instructions-gc-iit.wast +++ b/test/lit/passes/optimize-instructions-gc-iit.wast @@ -145,12 +145,22 @@ ;; CHECK: (func $ref-eq-ref-cast (type $6) (param $x eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; TNH: (func $ref-eq-ref-cast (type $6) (param $x eqref) ;; TNH-NEXT: (drop - ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: (block (result i32) + ;; TNH-NEXT: (drop + ;; TNH-NEXT: (local.get $x) + ;; TNH-NEXT: ) + ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: ) ;; TNH-NEXT: ) ;; TNH-NEXT: ) (func $ref-eq-ref-cast (param $x eqref) diff --git a/test/lit/passes/optimize-instructions-gc-tnh.wast b/test/lit/passes/optimize-instructions-gc-tnh.wast index 9323488e336..45674df82a2 100644 --- a/test/lit/passes/optimize-instructions-gc-tnh.wast +++ b/test/lit/passes/optimize-instructions-gc-tnh.wast @@ -56,34 +56,48 @@ ) ) - ;; TNH: (func $ref.eq-no (type $8) (param $a eqref) (param $b eqref) (param $any anyref) + ;; TNH: (func $ref.eq-different-traps (type $8) (param $a eqref) (param $b eqref) (param $any anyref) ;; TNH-NEXT: (drop - ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: (block (result i32) + ;; TNH-NEXT: (drop + ;; TNH-NEXT: (ref.cast (ref null $struct) + ;; TNH-NEXT: (local.get $any) + ;; TNH-NEXT: ) + ;; TNH-NEXT: ) + ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: ) ;; TNH-NEXT: ) ;; TNH-NEXT: ) - ;; NO_TNH: (func $ref.eq-no (type $8) (param $a eqref) (param $b eqref) (param $any anyref) + ;; NO_TNH: (func $ref.eq-different-traps (type $8) (param $a eqref) (param $b eqref) (param $any anyref) ;; NO_TNH-NEXT: (drop - ;; NO_TNH-NEXT: (ref.eq - ;; NO_TNH-NEXT: (ref.cast (ref null $struct) - ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: (block (result i32) + ;; NO_TNH-NEXT: (drop + ;; NO_TNH-NEXT: (ref.cast (ref null $struct) + ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) - ;; NO_TNH-NEXT: (ref.cast (ref struct) - ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: (drop + ;; NO_TNH-NEXT: (ref.cast (ref struct) + ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) + ;; NO_TNH-NEXT: (i32.const 1) ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) - (func $ref.eq-no (param $a (ref null eq)) (param $b (ref null eq)) (param $any anyref) - ;; We must leave the inputs to ref.eq of type eqref or a subtype. + (func $ref.eq-different-traps (param $a (ref null eq)) (param $b (ref null eq)) (param $any anyref) + ;; The right hand side traps on nulls but the left hand side does not. We + ;; can always see that they produce the same value, but we can only fold + ;; the right side away if we assume traps never happen. (drop (ref.eq (ref.cast (ref null $struct) - (local.get $any) ;; *Not* an eqref! + (local.get $any) ) (ref.as_non_null (ref.cast (ref struct) (ref.as_non_null - (local.get $any) ;; *Not* an eqref! + (local.get $any) ) ) ) diff --git a/test/lit/passes/optimize-instructions-gc.wast b/test/lit/passes/optimize-instructions-gc.wast index c8532cf7ae4..0e1676ef063 100644 --- a/test/lit/passes/optimize-instructions-gc.wast +++ b/test/lit/passes/optimize-instructions-gc.wast @@ -57,6 +57,11 @@ ;; CHECK: (import "env" "get-i32" (func $get-i32 (type $8) (result i32))) (import "env" "get-i32" (func $get-i32 (result i32))) + ;; CHECK: (global $g (mut eqref) (ref.i31 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: )) + (global $g (mut eqref) (ref.i31 (i32.const 0))) + ;; These functions test if an `if` with subtyped arms is correctly folded ;; 1. if its `ifTrue` and `ifFalse` arms are identical (can fold) ;; CHECK: (func $if-arms-subtype-fold (type $29) (result anyref) @@ -415,7 +420,12 @@ ;; CHECK-NEXT: (local $lx eqref) ;; CHECK-NEXT: (local $ly eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.eq @@ -427,10 +437,23 @@ ;; CHECK-NEXT: (call $get-eqref) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $lx) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq (param $x eqref) (param $y eqref) @@ -489,22 +512,49 @@ ;; CHECK-NEXT: (ref.eq ;; CHECK-NEXT: (block (result eqref) ;; CHECK-NEXT: (call $nothing) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (global.get $g) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block (result eqref) ;; CHECK-NEXT: (call $nothing) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (global.get $g) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (call $nothing) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (call $nothing) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.eq ;; CHECK-NEXT: (struct.new_default $struct) ;; CHECK-NEXT: (struct.new_default $struct) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) @@ -518,7 +568,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-corner-cases (param $x eqref) - ;; side effects prevent optimization + ;; Side effects generally prevent optimization. Here the call might change + ;; the value of the global. + (drop + (ref.eq + (block (result eqref) + (call $nothing) + (global.get $g) + ) + (block (result eqref) + (call $nothing) + (global.get $g) + ) + ) + ) + ;; But here the call cannot change the value of the local, so we can still + ;; optimize. (drop (ref.eq (block (result eqref) @@ -531,18 +596,18 @@ ) ) ) - ;; allocation prevents optimization + ;; Allocation prevents optimization. (drop (ref.eq (struct.new_default $struct) (struct.new_default $struct) ) ) - ;; but irrelevant allocations do not prevent optimization + ;; But irrelevant allocations do not prevent optimization. (drop (ref.eq (block (result eqref) - ;; an allocation that does not trouble us + ;; An allocation that does not trouble us. (drop (struct.new_default $struct) ) @@ -552,15 +617,15 @@ (drop (struct.new_default $struct) ) - ;; add a nop to make the two inputs to ref.eq not structurally equal, + ;; Add a nop to make the two inputs to ref.eq not structurally equal, ;; but in a way that does not matter (since only the value falling - ;; out does) + ;; out does). (nop) (local.get $x) ) ) ) - ;; a tee does not prevent optimization, as we can fold the tee and the get. + ;; A tee does not prevent optimization, as we can fold the tee and the get. (drop (ref.eq (local.tee $x @@ -573,17 +638,19 @@ ;; CHECK: (func $ref-eq-ref-cast (type $5) (param $x eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.eq - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (ref.cast (ref null $struct) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref null $struct) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-ref-cast (param $x eqref) - ;; it is almost valid to look through a cast, except that it might trap so - ;; there is a side effect + ;; We can see that both sides produce the same value if they finish + ;; executing, but we have to preserve the trap. (drop (ref.eq (local.get $x) @@ -899,7 +966,12 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-null (param $x eqref) diff --git a/test/lit/passes/optimize-instructions-global-effects-idempotent.wast b/test/lit/passes/optimize-instructions-global-effects-idempotent.wast new file mode 100644 index 00000000000..dbb6af3da83 --- /dev/null +++ b/test/lit/passes/optimize-instructions-global-effects-idempotent.wast @@ -0,0 +1,93 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt --generate-global-effects --optimize-instructions -all -S -o - %s | filecheck %s + +(module + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + + ;; CHECK: (global $g i31ref (ref.i31 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: )) + + ;; CHECK: (memory $mem 1 1 shared) + (memory $mem 1 1 shared) + + (global $g i31ref (ref.i31 (i32.const 0))) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $acquire-mem (type $1) (param $0 i32) (result eqref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.atomic.load acqrel + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $acquire-mem (param i32) (result eqref) + (drop + (i32.atomic.load acqrel (local.get 0)) + ) + (global.get $g) + ) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $release-mem (type $1) (param $0 i32) (result eqref) + ;; CHECK-NEXT: (i32.atomic.store acqrel + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $release-mem (param i32) (result eqref) + (i32.atomic.store acqrel (local.get 0) (local.get 0)) + (global.get $g) + ) + + ;; CHECK: (func $fold (type $2) (param $s (ref $struct)) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $release-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $fold (param $s (ref $struct)) (result i32) + ;; The value loaded by the second struct.get cannot be forced to be + ;; different by the release operation done by the first call, so we can + ;; assume the same value is passed to both calls. That lets us fold them + ;; because they are marked idempotent. + (ref.eq + (call $release-mem (struct.get $struct 0 (local.get $s))) + (call $release-mem (struct.get $struct 0 (local.get $s))) + ) + ) + + ;; CHECK: (func $no-fold (type $2) (param $s (ref $struct)) (result i32) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (call $acquire-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $acquire-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $no-fold (param $s (ref $struct)) (result i32) + ;; The second struct.get might see a different value because of + ;; synchronization established by the first call, so the calls might receive + ;; different arguments and therefore cannot be folded even though the callee + ;; is marked idempotent. + (ref.eq + (call $acquire-mem (struct.get $struct 0 (local.get $s))) + (call $acquire-mem (struct.get $struct 0 (local.get $s))) + ) + ) +) diff --git a/test/lit/passes/optimize-instructions_idempotent.wast b/test/lit/passes/optimize-instructions_idempotent.wast index d5ade65810c..abc999a133e 100644 --- a/test/lit/passes/optimize-instructions_idempotent.wast +++ b/test/lit/passes/optimize-instructions_idempotent.wast @@ -8,13 +8,20 @@ ;; CHECK: (import "a" "b" (func $import (type $2) (result f32))) (import "a" "b" (func $import (result f32))) + ;; CHECK: (global $g (mut f32) (f32.const 1)) + (global $g (mut f32) (f32.const 1)) + ;; CHECK: (@binaryen.idempotent) ;; CHECK-NEXT: (func $idempotent (type $0) (param $x f32) (result f32) + ;; CHECK-NEXT: (global.set $g + ;; CHECK-NEXT: (f32.const -1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) (@binaryen.idempotent) (func $idempotent (param $x f32) (result f32) ;; This function is idempotent: same inputs, same outputs. + (global.set $g (f32.const -1)) (local.get $x) ) @@ -26,6 +33,7 @@ (call $import) ) + ;; CHECK: (func $test-abs (type $1) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f32.mul @@ -61,6 +69,18 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.abs + ;; CHECK-NEXT: (f32.mul + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-abs ;; These calls are identical, since the second returns the same. We can @@ -103,6 +123,20 @@ ) ) ) + ;; Here the arguments look the same, but the first call will change the + ;; value of the argument to the second call, so we cannot optimize. + (drop + (f32.abs + (f32.mul + (call $idempotent + (global.get $g) + ) + (call $idempotent + (global.get $g) + ) + ) + ) + ) ) ;; CHECK: (func $test-abs-calls (type $1) @@ -234,11 +268,6 @@ ;; CHECK-NEXT: (global.get $g1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (call $idempotent - ;; CHECK-NEXT: (global.get $g1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: )