From b95d47878947ac165a5fdf646c2cdc7f24a95f70 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 4 Jun 2026 21:10:38 -0700 Subject: [PATCH 1/3] Fix areConsecutiveInputsEqual for idempotent calls OptimizeInstruction's `areConsecutiveInputsEqual` function finds the effects that are executed in between the left-hand and right-hand expressions and checks that they cannot affect the value of the right-hand expression or its operands. However, it did not previously handle the case where the left-hand expression itself changes the value of the operands flowing into the right-hand expression. This can happen in the case of calls to idempotent functions, where the first call changes a global that is passed as an argument to the second call. --- src/passes/OptimizeInstructions.cpp | 21 ++++++++++-- .../optimize-instructions_idempotent.wast | 34 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 47964f33713..79e615d83bb 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2867,9 +2867,24 @@ struct OptimizeInstructions return false; } - // They do look the same! Make sure nothing executed in between them can - // affect the value of `right` and make it different from `left`. - if (interferingEffects.orderedBefore(effects(right))) { + // They do look the same! Determine whether the left hand expression can + // change the value of the operands flowing into the right-hand expression. + // Do not consider the right-hand expression itself here because the + // expressions might be idempotent function calls and we might otherwise + // mistakenly conclude that the first call interferes with the second. + EffectAnalyzer rightEffects(getPassOptions(), *getModule()); + for (auto* child : ChildIterator(right)) { + rightEffects.walk(child); + } + ShallowEffectAnalyzer leftEffects(getPassOptions(), *getModule(), left); + if (leftEffects.orderedBefore(rightEffects)) { + return false; + } + + // Make sure nothing executed in between the expressions can affect the + // value of `right` (or its operands) and make it different from `left`. + rightEffects.visit(right); + if (interferingEffects.orderedBefore(rightEffects)) { return false; } diff --git a/test/lit/passes/optimize-instructions_idempotent.wast b/test/lit/passes/optimize-instructions_idempotent.wast index d5ade65810c..26346b55314 100644 --- a/test/lit/passes/optimize-instructions_idempotent.wast +++ b/test/lit/passes/optimize-instructions_idempotent.wast @@ -8,13 +8,20 @@ ;; CHECK: (import "a" "b" (func $import (type $2) (result f32))) (import "a" "b" (func $import (result f32))) + ;; CHECK: (global $g (mut f32) (f32.const 1)) + (global $g (mut f32) (f32.const 1)) + ;; CHECK: (@binaryen.idempotent) ;; CHECK-NEXT: (func $idempotent (type $0) (param $x f32) (result f32) + ;; CHECK-NEXT: (global.set $g + ;; CHECK-NEXT: (f32.const -1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) (@binaryen.idempotent) (func $idempotent (param $x f32) (result f32) ;; This function is idempotent: same inputs, same outputs. + (global.set $g (f32.const -1)) (local.get $x) ) @@ -26,6 +33,7 @@ (call $import) ) + ;; CHECK: (func $test-abs (type $1) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (f32.mul @@ -61,6 +69,18 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (f32.abs + ;; CHECK-NEXT: (f32.mul + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $idempotent + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-abs ;; These calls are identical, since the second returns the same. We can @@ -103,6 +123,20 @@ ) ) ) + ;; Here the arguments look the same, but the first call will change the + ;; value of the argument to the second call, so we cannot optimize. + (drop + (f32.abs + (f32.mul + (call $idempotent + (global.get $g) + ) + (call $idempotent + (global.get $g) + ) + ) + ) + ) ) ;; CHECK: (func $test-abs-calls (type $1) From 8355ae50fddea8aec8d45be1dd605fade5f82af2 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 4 Jun 2026 21:27:57 -0700 Subject: [PATCH 2/3] Improve ref.eq optimization OptimizeInstructions would previously optimize ref.eq by checking if its operands were equal and foldable. If so, it would be replaced with the dropped operands (if they had effects that needed preserving) followed by a constant 1. Improve this by dropping only one of the operands; if they are equal and foldable, then there is no need to keep both around. When the operands do not have effects, this leaves more work for Vacuum than the old optimization did, but that's fine. Also newly optimize when the operands are equal but not foldable. In this case they can still be dropped and followed by a constant 1. Finally, replace the EffectAnalyzer `invalidates` call in `areConsecutiveInputsEqualAndFoldable` with an `orderedBefore` call. Add tests using release/acquire operations to show that the direction of the effect comparison is correct. --- src/passes/OptimizeInstructions.cpp | 21 ++-- .../passes/optimize-instructions-gc-iit.wast | 14 ++- .../passes/optimize-instructions-gc-tnh.wast | 38 ++++-- test/lit/passes/optimize-instructions-gc.wast | 112 ++++++++++++++---- ...nstructions-global-effects-idempotent.wast | 93 +++++++++++++++ .../optimize-instructions_idempotent.wast | 5 - 6 files changed, 237 insertions(+), 46 deletions(-) create mode 100644 test/lit/passes/optimize-instructions-global-effects-idempotent.wast diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 79e615d83bb..828983fc2ef 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1796,6 +1796,8 @@ struct OptimizeInstructions } void visitRefEq(RefEq* curr) { + Builder builder(*getModule()); + // Check for unreachability. Note we must check both the children and the // ref.eq itself, as in e.g. optimizeTernary, as we only refinalize at the // end, so unreachable children may not update the parent yet. @@ -1832,12 +1834,16 @@ struct OptimizeInstructions skipCast(curr->left, nullableEq); skipCast(curr->right, nullableEq); - // Identical references compare equal. - // (Technically we do not need to check if the inputs are also foldable into - // a single one, but we do not have utility code to handle non-foldable - // cases yet; the foldable case we do handle is the common one of the first - // child being a tee and the second a get of that tee. TODO) + // Identical references compare equal. If the inputs are foldable, we need + // only keep one of them. if (areConsecutiveInputsEqualAndFoldable(curr->left, curr->right)) { + replaceCurrent( + builder.makeSequence(builder.makeDrop(curr->left), + builder.makeConst(Literal::makeOne(Type::i32)))); + return; + } + + if (areConsecutiveInputsEqual(curr->left, curr->right)) { replaceCurrent( getDroppedChildrenAndAppend(curr, Literal::makeOne(Type::i32))); return; @@ -2911,7 +2917,8 @@ struct OptimizeInstructions return true; } - // To fold the right side into the left, it must have no effects. + // To fold the right side into the left, it must have no unremovable + // effects. auto rightMightHaveEffects = true; if (auto* call = right->dynCast()) { // If these are a pair of idempotent calls, then the second has no @@ -2944,7 +2951,7 @@ struct OptimizeInstructions } ShallowEffectAnalyzer parentEffects( getPassOptions(), *getModule(), call); - if (parentEffects.invalidates(childEffects)) { + if (parentEffects.orderedBefore(childEffects)) { return false; } // No effects are possible. diff --git a/test/lit/passes/optimize-instructions-gc-iit.wast b/test/lit/passes/optimize-instructions-gc-iit.wast index feefa3435b7..10dd6593521 100644 --- a/test/lit/passes/optimize-instructions-gc-iit.wast +++ b/test/lit/passes/optimize-instructions-gc-iit.wast @@ -145,12 +145,22 @@ ;; CHECK: (func $ref-eq-ref-cast (type $6) (param $x eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; TNH: (func $ref-eq-ref-cast (type $6) (param $x eqref) ;; TNH-NEXT: (drop - ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: (block (result i32) + ;; TNH-NEXT: (drop + ;; TNH-NEXT: (local.get $x) + ;; TNH-NEXT: ) + ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: ) ;; TNH-NEXT: ) ;; TNH-NEXT: ) (func $ref-eq-ref-cast (param $x eqref) diff --git a/test/lit/passes/optimize-instructions-gc-tnh.wast b/test/lit/passes/optimize-instructions-gc-tnh.wast index 9323488e336..45674df82a2 100644 --- a/test/lit/passes/optimize-instructions-gc-tnh.wast +++ b/test/lit/passes/optimize-instructions-gc-tnh.wast @@ -56,34 +56,48 @@ ) ) - ;; TNH: (func $ref.eq-no (type $8) (param $a eqref) (param $b eqref) (param $any anyref) + ;; TNH: (func $ref.eq-different-traps (type $8) (param $a eqref) (param $b eqref) (param $any anyref) ;; TNH-NEXT: (drop - ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: (block (result i32) + ;; TNH-NEXT: (drop + ;; TNH-NEXT: (ref.cast (ref null $struct) + ;; TNH-NEXT: (local.get $any) + ;; TNH-NEXT: ) + ;; TNH-NEXT: ) + ;; TNH-NEXT: (i32.const 1) + ;; TNH-NEXT: ) ;; TNH-NEXT: ) ;; TNH-NEXT: ) - ;; NO_TNH: (func $ref.eq-no (type $8) (param $a eqref) (param $b eqref) (param $any anyref) + ;; NO_TNH: (func $ref.eq-different-traps (type $8) (param $a eqref) (param $b eqref) (param $any anyref) ;; NO_TNH-NEXT: (drop - ;; NO_TNH-NEXT: (ref.eq - ;; NO_TNH-NEXT: (ref.cast (ref null $struct) - ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: (block (result i32) + ;; NO_TNH-NEXT: (drop + ;; NO_TNH-NEXT: (ref.cast (ref null $struct) + ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) - ;; NO_TNH-NEXT: (ref.cast (ref struct) - ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: (drop + ;; NO_TNH-NEXT: (ref.cast (ref struct) + ;; NO_TNH-NEXT: (local.get $any) + ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) + ;; NO_TNH-NEXT: (i32.const 1) ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) ;; NO_TNH-NEXT: ) - (func $ref.eq-no (param $a (ref null eq)) (param $b (ref null eq)) (param $any anyref) - ;; We must leave the inputs to ref.eq of type eqref or a subtype. + (func $ref.eq-different-traps (param $a (ref null eq)) (param $b (ref null eq)) (param $any anyref) + ;; The right hand side traps on nulls but the left hand side does not. We + ;; can always see that they produce the same value, but we can only fold + ;; the right side away if we assume traps never happen. (drop (ref.eq (ref.cast (ref null $struct) - (local.get $any) ;; *Not* an eqref! + (local.get $any) ) (ref.as_non_null (ref.cast (ref struct) (ref.as_non_null - (local.get $any) ;; *Not* an eqref! + (local.get $any) ) ) ) diff --git a/test/lit/passes/optimize-instructions-gc.wast b/test/lit/passes/optimize-instructions-gc.wast index c8532cf7ae4..0e1676ef063 100644 --- a/test/lit/passes/optimize-instructions-gc.wast +++ b/test/lit/passes/optimize-instructions-gc.wast @@ -57,6 +57,11 @@ ;; CHECK: (import "env" "get-i32" (func $get-i32 (type $8) (result i32))) (import "env" "get-i32" (func $get-i32 (result i32))) + ;; CHECK: (global $g (mut eqref) (ref.i31 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: )) + (global $g (mut eqref) (ref.i31 (i32.const 0))) + ;; These functions test if an `if` with subtyped arms is correctly folded ;; 1. if its `ifTrue` and `ifFalse` arms are identical (can fold) ;; CHECK: (func $if-arms-subtype-fold (type $29) (result anyref) @@ -415,7 +420,12 @@ ;; CHECK-NEXT: (local $lx eqref) ;; CHECK-NEXT: (local $ly eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.eq @@ -427,10 +437,23 @@ ;; CHECK-NEXT: (call $get-eqref) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (local.get $lx) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq (param $x eqref) (param $y eqref) @@ -489,22 +512,49 @@ ;; CHECK-NEXT: (ref.eq ;; CHECK-NEXT: (block (result eqref) ;; CHECK-NEXT: (call $nothing) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (global.get $g) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (block (result eqref) ;; CHECK-NEXT: (call $nothing) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (global.get $g) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (call $nothing) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (call $nothing) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.eq ;; CHECK-NEXT: (struct.new_default $struct) ;; CHECK-NEXT: (struct.new_default $struct) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result eqref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (block (result i32) @@ -518,7 +568,22 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-corner-cases (param $x eqref) - ;; side effects prevent optimization + ;; Side effects generally prevent optimization. Here the call might change + ;; the value of the global. + (drop + (ref.eq + (block (result eqref) + (call $nothing) + (global.get $g) + ) + (block (result eqref) + (call $nothing) + (global.get $g) + ) + ) + ) + ;; But here the call cannot change the value of the local, so we can still + ;; optimize. (drop (ref.eq (block (result eqref) @@ -531,18 +596,18 @@ ) ) ) - ;; allocation prevents optimization + ;; Allocation prevents optimization. (drop (ref.eq (struct.new_default $struct) (struct.new_default $struct) ) ) - ;; but irrelevant allocations do not prevent optimization + ;; But irrelevant allocations do not prevent optimization. (drop (ref.eq (block (result eqref) - ;; an allocation that does not trouble us + ;; An allocation that does not trouble us. (drop (struct.new_default $struct) ) @@ -552,15 +617,15 @@ (drop (struct.new_default $struct) ) - ;; add a nop to make the two inputs to ref.eq not structurally equal, + ;; Add a nop to make the two inputs to ref.eq not structurally equal, ;; but in a way that does not matter (since only the value falling - ;; out does) + ;; out does). (nop) (local.get $x) ) ) ) - ;; a tee does not prevent optimization, as we can fold the tee and the get. + ;; A tee does not prevent optimization, as we can fold the tee and the get. (drop (ref.eq (local.tee $x @@ -573,17 +638,19 @@ ;; CHECK: (func $ref-eq-ref-cast (type $5) (param $x eqref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.eq - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (ref.cast (ref null $struct) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref null $struct) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-ref-cast (param $x eqref) - ;; it is almost valid to look through a cast, except that it might trap so - ;; there is a side effect + ;; We can see that both sides produce the same value if they finish + ;; executing, but we have to preserve the trap. (drop (ref.eq (local.get $x) @@ -899,7 +966,12 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $ref-eq-null (param $x eqref) diff --git a/test/lit/passes/optimize-instructions-global-effects-idempotent.wast b/test/lit/passes/optimize-instructions-global-effects-idempotent.wast new file mode 100644 index 00000000000..dbb6af3da83 --- /dev/null +++ b/test/lit/passes/optimize-instructions-global-effects-idempotent.wast @@ -0,0 +1,93 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt --generate-global-effects --optimize-instructions -all -S -o - %s | filecheck %s + +(module + ;; CHECK: (type $struct (shared (struct (field (mut i32))))) + (type $struct (shared (struct (field (mut i32))))) + + ;; CHECK: (global $g i31ref (ref.i31 + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: )) + + ;; CHECK: (memory $mem 1 1 shared) + (memory $mem 1 1 shared) + + (global $g i31ref (ref.i31 (i32.const 0))) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $acquire-mem (type $1) (param $0 i32) (result eqref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.atomic.load acqrel + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $acquire-mem (param i32) (result eqref) + (drop + (i32.atomic.load acqrel (local.get 0)) + ) + (global.get $g) + ) + + ;; CHECK: (@binaryen.idempotent) + ;; CHECK-NEXT: (func $release-mem (type $1) (param $0 i32) (result eqref) + ;; CHECK-NEXT: (i32.atomic.store acqrel + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: (local.get $0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $g) + ;; CHECK-NEXT: ) + (@binaryen.idempotent) + (func $release-mem (param i32) (result eqref) + (i32.atomic.store acqrel (local.get 0) (local.get 0)) + (global.get $g) + ) + + ;; CHECK: (func $fold (type $2) (param $s (ref $struct)) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $release-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + (func $fold (param $s (ref $struct)) (result i32) + ;; The value loaded by the second struct.get cannot be forced to be + ;; different by the release operation done by the first call, so we can + ;; assume the same value is passed to both calls. That lets us fold them + ;; because they are marked idempotent. + (ref.eq + (call $release-mem (struct.get $struct 0 (local.get $s))) + (call $release-mem (struct.get $struct 0 (local.get $s))) + ) + ) + + ;; CHECK: (func $no-fold (type $2) (param $s (ref $struct)) (result i32) + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (call $acquire-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $acquire-mem + ;; CHECK-NEXT: (struct.get $struct 0 + ;; CHECK-NEXT: (local.get $s) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $no-fold (param $s (ref $struct)) (result i32) + ;; The second struct.get might see a different value because of + ;; synchronization established by the first call, so the calls might receive + ;; different arguments and therefore cannot be folded even though the callee + ;; is marked idempotent. + (ref.eq + (call $acquire-mem (struct.get $struct 0 (local.get $s))) + (call $acquire-mem (struct.get $struct 0 (local.get $s))) + ) + ) +) diff --git a/test/lit/passes/optimize-instructions_idempotent.wast b/test/lit/passes/optimize-instructions_idempotent.wast index 26346b55314..abc999a133e 100644 --- a/test/lit/passes/optimize-instructions_idempotent.wast +++ b/test/lit/passes/optimize-instructions_idempotent.wast @@ -268,11 +268,6 @@ ;; CHECK-NEXT: (global.get $g1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (call $idempotent - ;; CHECK-NEXT: (global.get $g1) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From e7a2f35cb19526064ae7eef41634c3cd3c9279b3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 5 Jun 2026 09:56:15 -0700 Subject: [PATCH 3/3] add comment --- src/passes/OptimizeInstructions.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 828983fc2ef..41e28500ec2 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -2951,6 +2951,8 @@ struct OptimizeInstructions } ShallowEffectAnalyzer parentEffects( getPassOptions(), *getModule(), call); + // Check if the first parent is ordered before the second child (in the + // example above, the first call vs the second global.get). if (parentEffects.orderedBefore(childEffects)) { return false; }