From: Evan Cheng Date: Wed, 28 Apr 2010 01:18:01 +0000 (+0000) Subject: Rather than having a ton of patterns for double shift instructions, e.g. SHLD16rrCL... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8b1190a5400d263b5344f1fcd7b54ae1b6263e7c;p=oota-llvm.git Rather than having a ton of patterns for double shift instructions, e.g. SHLD16rrCL, just perform custom dag combine to form x86 specific dag so they match to the same pattern. This also makes sure later dag combine do not cause isel to miss them (e.g. promoting i16 to i32). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102485 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 58f1d88553e..c38b678376e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9595,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + EVT VT = N->getValueType(0); - if (VT != MVT::i64 || !Subtarget->is64Bit()) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) @@ -9607,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) @@ -9629,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(ShAmt0, ShAmt1); } + unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { - if (SumC->getSExtValue() == 64 && + if (SumC->getSExtValue() == Bits && ShAmt1.getOperand(1) == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, @@ -9642,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } else if (ConstantSDNode *ShAmt1C = dyn_cast(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast(ShAmt0); if (ShAmt0C && - ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, @@ -9921,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); - case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index eef2ca07896..7b1cdbcf4ac 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2156,21 +2156,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 245a20e8f0d..751bbb4ffcd 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -489,34 +489,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~KnownZero0 & ~KnownZero1) == 0; }]>; -// 'shld' and 'shrd' instruction patterns. Note that even though these have -// the srl and shl in their patterns, the C++ code must still check for them, -// because predicates are tested before children nodes are explored. - -def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (srl node:$src1, node:$amt1), - (shl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SRL && - N->getOperand(1).getOpcode() == ISD::SHL && - isa(N->getOperand(0).getOperand(1)) && - isa(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - -def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (shl node:$src1, node:$amt1), - (srl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SHL && - N->getOperand(1).getOpcode() == ISD::SRL && - isa(N->getOperand(0).getOperand(1)) && - isa(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - //===----------------------------------------------------------------------===// // Instruction list... // @@ -4620,106 +4592,6 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; -// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) -def : Pat<(or (srl GR32:$src1, CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) -def : Pat<(or (shl GR32:$src1, CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) -def : Pat<(or (srl GR16:$src1, CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) -def : Pat<(or (shl GR16:$src1, CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>;