setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
}
- // Use the default ISD::DBG_STOPPOINT.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing()) {
- setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MEMBARRIER);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
computeRegisterProperties();
+ // Divide and reminder operations have no vector equivalent and can
+ // trap. Do a custom widening for these operations in which we never
+ // generate more divides/remainder than the original vector width.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ if (!isTypeLegal((MVT::SimpleValueType)VT)) {
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
+ }
+ }
+
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
}
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. Reference stack + offset explicitly
+ // if stack pointer is at least 16-byte aligned.
+ unsigned StackAlign = Subtarget->getStackAlignment();
+ if (StackAlign < 16)
+ return SDValue();
+ Offset = MFI->getObjectOffset(FI) + Offset;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getConstant(Offset & ~15, getPointerTy()));
+ Offset %= 16;
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ }
+
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
return SDValue();
+ }
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1);
+ EVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
- unsigned WrapperKind = X86ISD::Wrapper;
+ // Create the TargetBlockAddressAddress node.
+ unsigned char OpFlags =
+ Subtarget->ClassifyBlockAddressReference();
CodeModel::Model M = getTargetMachine().getCodeModel();
+ BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+ /*isTarget=*/true, OpFlags);
+
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- WrapperKind = X86ISD::WrapperRIP;
-
- DebugLoc DL = Op.getDebugLoc();
-
- BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ else
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (isGlobalRelativeToPICBase(OpFlags)) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
return Result;
}
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
}
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setHasCalls(true);
+
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
return SDValue();
SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+
+ // Use sbb x, x to materialize carry bit into a GPR.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, dl, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond),
+ DAG.getConstant(1, MVT::i8));
+
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
}
Cond = NewCond;
}
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
Cond = LowerXALUO(Cond, DAG);
#endif
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl,
+ DAG.GetOrdering(Chain.getNode()));
return CallResult.second;
}
Results.push_back(edx.getValue(1));
return;
}
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
+ return;
+ }
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
- const TargetLowering &TLI) {
- GlobalValue *GV;
- int64_t Offset = 0;
- if (TLI.isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= N && (Offset % N) == 0);
- // DAG combine handles the stack object case.
- return false;
-}
-
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return false;
LastLoadedElt = i;
}
return SDValue();
if (LastLoadedElt == NumElems - 1) {
- if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->isVolatile());
return SDValue();
}
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
if (!Subtarget->hasSSE2())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
}
}
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+ // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
+ // (and (i32 x86isd::setcc_carry), 1)
+ // This eliminates the zext. This transformation is necessary because
+ // ISD::SETCC is always legalized to i8.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() == ISD::AND &&
+ N0.hasOneUse() &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() != X86ISD::SETCC_CARRY)
+ return SDValue();
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 1)
+ return SDValue();
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG);
+ case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
}
return SDValue();