From 7d9f2b93a356aa89186522bd61c5c565718ff555 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 3 Mar 2010 00:35:56 +0000 Subject: [PATCH] This test case: long test(long x) { return (x & 123124) | 3; } Currently compiles to: _test: orl $3, %edi movq %rdi, %rax andq $123127, %rax ret This is because instruction and DAG combiners canonicalize (or (and x, C), D) -> (and (or, D), (C | D)) However, this is only profitable if (C & D) != 0. It gets in the way of the 3-addressification because the input bits are known to be zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97616 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++---- .../InstCombine/InstCombineAndOrXor.cpp | 6 ++-- test/CodeGen/X86/and-or-fold.ll | 28 +++++++++++++------ test/Transforms/InstCombine/xor2.ll | 8 +++--- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8ba06698942..5cd70c9cd0a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1786,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); if (RAND.getNode() != 0) return RAND; - // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF + // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = dyn_cast(N0.getOperand(1))) if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) @@ -2025,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) == 0. if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, - N0.getOperand(0), N1), - DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); + if ((C1->getZExtValue() & N1C->getZExtValue()) != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + N0.getOperand(0), N1), + DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8ddaa16ee0e..86673f80962 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1464,8 +1464,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyOrInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - - + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -1474,7 +1473,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) + // iff (C1 & C2) == 0. if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + (RHS->getValue() & C1->getValue()) != 0 && Op0->hasOneUse()) { Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); @@ -1497,6 +1498,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SelectInst *SI = dyn_cast(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; + if (isa(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll index 7733b8a5baa..836b5f1551c 100644 --- a/test/CodeGen/X86/and-or-fold.ll +++ b/test/CodeGen/X86/and-or-fold.ll @@ -1,14 +1,26 @@ -; RUN: llc < %s -march=x86 | grep and | count 1 +; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck -check-prefix=DARWIN %s +; RUN: opt < %s -O2 | llc -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN-OPT %s ; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1 ; in this case. -define i32 @test6(i32 %x, i16 %y) { - %tmp1 = zext i16 %y to i32 ; [#uses=1] - %tmp2 = and i32 %tmp1, 127 ; [#uses=1] - %tmp4 = shl i32 %x, 16 ; [#uses=1] - %tmp5 = and i32 %tmp4, 16711680 ; [#uses=1] - %tmp6 = or i32 %tmp2, %tmp5 ; [#uses=1] - ret i32 %tmp6 +define i32 @test1(i32 %x, i16 %y) { + %tmp1 = zext i16 %y to i32 + %tmp2 = and i32 %tmp1, 127 + %tmp4 = shl i32 %x, 16 + %tmp5 = and i32 %tmp4, 16711680 + %tmp6 = or i32 %tmp2, %tmp5 + ret i32 %tmp6 +; DARWIN: andl $16711807, %eax } +; The optimizer shouldn't fold this into (and (or, C), D) +; if (C & D) == 0 +define i64 @test2(i64 %x) nounwind readnone ssp { +entry: + %tmp1 = and i64 %x, 123127 + %tmp2 = or i64 %tmp1, 3 + ret i64 %tmp2 +; DARWIN-OPT: andq $123124 +; DARWIN-OPT-NEXT: leaq 3 +} diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll index de3d65dd8cb..67f05efa23d 100644 --- a/test/Transforms/InstCombine/xor2.ll +++ b/test/Transforms/InstCombine/xor2.ll @@ -22,8 +22,8 @@ define i1 @test1(i32 %A) { ; PR1014 define i32 @test2(i32 %tmp1) { ; CHECK: @test2 -; CHECK-NEXT: or i32 %tmp1, 8 -; CHECK-NEXT: and i32 +; CHECK-NEXT: and i32 %tmp1, 32 +; CHECK-NEXT: or i32 %ovm, 8 ; CHECK-NEXT: ret i32 %ovm = and i32 %tmp1, 32 %ov3 = add i32 %ovm, 145 @@ -33,8 +33,8 @@ define i32 @test2(i32 %tmp1) { define i32 @test3(i32 %tmp1) { ; CHECK: @test3 -; CHECK-NEXT: or i32 %tmp1, 8 -; CHECK-NEXT: and i32 +; CHECK-NEXT: and i32 %tmp1, 32 +; CHECK-NEXT: or i32 %tmp, 8 ; CHECK-NEXT: ret i32 %ovm = or i32 %tmp1, 145 %ov31 = and i32 %ovm, 177 -- 2.34.1