When generating spill and reload code for vector registers on PowerPC,

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index c0ab3d9c8cd5da1d82ce2d07fa422934b3b53e82..7df10980fd9d7930f894740094bfd463de37b289 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -161,7 +161,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
    X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
  
    RegInfo = TM.getRegisterInfo();
-  TD = getTargetData();
+  TD = getDataLayout();
  
    // Set up the TargetLowering object.
    static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -184,7 +184,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  
    // Bypass i32 with i8 on Atom when compiling with O2
    if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
-    addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext()));
+    addBypassSlowDiv(32, 8);
  
    if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
      // Setup Windows compiler runtime calls.
@@ -939,6 +939,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
      setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
  
+    setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
+    setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
+
      setLoadExtAction(ISD::EXTLOAD,              MVT::v2f32, Legal);
    }
  
@@ -1342,7 +1345,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
    // cases like PR2962.  This should be removed when PR2962 is fixed.
    const Function *F = MF.getFunction();
    if (IsZeroVal &&
-      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
      if (Size >= 16 &&
          (Subtarget->isUnalignedMemAccessFast() ||
           ((DstAlign == 0 || DstAlign >= 16) &&
@@ -2010,7 +2013,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
        unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                         TotalNumIntRegs);
  
-      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+      bool NoImplicitFloatOps = Fn->getFnAttributes().
+        hasAttribute(Attributes::NoImplicitFloat);
        assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
               "SSE register cannot be used when SSE is disabled!");
        assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
@@ -2486,7 +2490,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
          OpFlags = X86II::MO_DARWIN_STUB;
        } else if (Subtarget->isPICStyleRIPRel() &&
                   isa<Function>(GV) &&
-                 cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+                 cast<Function>(GV)->getFnAttributes().
+                   hasAttribute(Attributes::NonLazyBind)) {
          // If the function is marked as non-lazy, generate an indirect call
          // which loads from the GOT directly. This avoids runtime overhead
          // at the cost of eager binding (and one extra byte of encoding).
@@ -3541,7 +3546,7 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
  
    if (!MatchEvenMask && !MatchOddMask)
      return SDValue();
-  
+
    SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
  
    SDValue Op0 = SVOp->getOperand(0);
@@ -5159,86 +5164,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
    return SDValue();
  }
  
-// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
-// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
-// constraint of matching input/output vector elements.
-SDValue
-X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  SDNode *N = Op.getNode();
-  EVT VT = Op.getValueType();
-  unsigned NumElts = Op.getNumOperands();
-
-  // Check supported types and sub-targets.
-  //
-  // Only v2f32 -> v2f64 needs special handling.
-  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
-    return SDValue();
-
-  SDValue VecIn;
-  EVT VecInVT;
-  SmallVector<int, 8> Mask;
-  EVT SrcVT = MVT::Other;
-
-  // Check the patterns could be translated into X86vfpext.
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue In = N->getOperand(i);
-    unsigned Opcode = In.getOpcode();
-
-    // Skip if the element is undefined.
-    if (Opcode == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
-    }
-
-    // Quit if one of the elements is not defined from 'fpext'.
-    if (Opcode != ISD::FP_EXTEND)
-      return SDValue();
-
-    // Check how the source of 'fpext' is defined.
-    SDValue L2In = In.getOperand(0);
-    EVT L2InVT = L2In.getValueType();
-
-    // Check the original type
-    if (SrcVT == MVT::Other)
-      SrcVT = L2InVT;
-    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
-      return SDValue();
-
-    // Check whether the value being 'fpext'ed is extracted from the same
-    // source.
-    Opcode = L2In.getOpcode();
-
-    // Quit if it's not extracted with a constant index.
-    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(L2In.getOperand(1)))
-      return SDValue();
-
-    SDValue ExtractedFromVec = L2In.getOperand(0);
-
-    if (VecIn.getNode() == 0) {
-      VecIn = ExtractedFromVec;
-      VecInVT = ExtractedFromVec.getValueType();
-    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
-      return SDValue();
-
-    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
-  }
-
-  // Quit if all operands of BUILD_VECTOR are undefined.
-  if (!VecIn.getNode())
-    return SDValue();
-
-  // Fill the remaining mask as undef.
-  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
-    Mask.push_back(-1);
-
-  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
-                     DAG.getVectorShuffle(VecInVT, DL,
-                                          VecIn, DAG.getUNDEF(VecInVT),
-                                          &Mask[0]));
-}
-
  SDValue
  X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
    DebugLoc dl = Op.getDebugLoc();
@@ -5271,10 +5196,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
    if (Broadcast.getNode())
      return Broadcast;
  
-  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
-  if (FpExt.getNode())
-    return FpExt;
-
    unsigned EVTBits = ExtVT.getSizeInBits();
  
    unsigned NumZero  = 0;
@@ -6053,7 +5974,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
    bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
    bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
  
-  // VPSHUFB may be generated if 
+  // VPSHUFB may be generated if
    // (1) one of input vector is undefined or zeroinitializer.
    // The mask value 0x80 puts 0 in the corresponding slot of the vector.
    // And (2) the mask indexes don't cross the 128-bit lane.
@@ -6629,7 +6550,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    bool HasAVX    = Subtarget->hasAVX();
    bool HasAVX2   = Subtarget->hasAVX2();
    MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  bool OptForSize = MF.getFunction()->getFnAttributes().
+    hasAttribute(Attributes::OptimizeForSize);
  
    assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
  
@@ -8212,6 +8134,20 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
    return FIST;
  }
  
+SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue In = Op.getOperand(0);
+  EVT SVT = In.getValueType();
+
+  assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+                                 In, DAG.getUNDEF(SVT)));
+}
+
  SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
    LLVMContext *Context = DAG.getContext();
    DebugLoc dl = Op.getDebugLoc();
@@ -9649,7 +9585,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
  
    EVT ArgVT = Op.getNode()->getValueType(0);
    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-  uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+  uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
    uint8_t ArgMode;
  
    // Decide which area this value should be read from.
@@ -9669,7 +9605,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
      // Sanity Check: Make sure using fp_offset makes sense.
      assert(!getTargetMachine().Options.UseSoftFloat &&
             !(DAG.getMachineFunction()
-                .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+                .getFunction()->getFnAttributes()
+                .hasAttribute(Attributes::NoImplicitFloat)) &&
             Subtarget->hasSSE1());
    }
  
@@ -10412,6 +10349,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
    DebugLoc dl  = Op.getDebugLoc();
  
    const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
  
    if (Subtarget->is64Bit()) {
      SDValue OutChains[6];
@@ -10420,8 +10358,8 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
      const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.
      const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
  
-    const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
-    const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
+    const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
+    const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
  
      const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
  
@@ -10494,7 +10432,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
  
          for (FunctionType::param_iterator I = FTy->param_begin(),
               E = FTy->param_end(); I != E; ++I, ++Idx)
-          if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+          if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
              // FIXME: should only count parameters that are lowered to integers.
              InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
  
@@ -10523,7 +10461,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
  
      // This is storing the opcode for MOV32ri.
      const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
-    const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
+    const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
      OutChains[0] = DAG.getStore(Root, dl,
                                  DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
                                  Trmp, MachinePointerInfo(TrmpAddr),
@@ -11402,6 +11340,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
    case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
    case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
+  case ISD::FP_EXTEND:          return lowerFP_EXTEND(Op, DAG);
    case ISD::FABS:               return LowerFABS(Op, DAG);
    case ISD::FNEG:               return LowerFNEG(Op, DAG);
    case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
@@ -11530,6 +11469,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
      }
      return;
    }
+  case ISD::FP_ROUND: {
+    SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+    Results.push_back(V);
+    return;
+  }
    case ISD::READCYCLECOUNTER: {
      SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
      SDValue TheChain = N->getOperand(0);
@@ -11724,6 +11668,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
    case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
    case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
+  case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
    case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
    case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
    case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -12110,7 +12055,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
    SrcReg = MI->getOperand(CurOp++).getReg();
  
    const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
-  EVT VT = *RC->vt_begin();
+  MVT::SimpleValueType VT = *RC->vt_begin();
    unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
  
    unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
@@ -12384,9 +12329,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
    // Hi
    MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
    for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
-    if (i == X86::AddrDisp)
+    if (i == X86::AddrDisp) {
        MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
-    else
+      // Don't forget to transfer the target flag.
+      MachineOperand &MO = MIB->getOperand(MIB->getNumOperands()-1);
+      MO.setTargetFlags(MI->getOperand(MemOpndSlot + i).getTargetFlags());
+    } else
        MIB.addOperand(MI->getOperand(MemOpndSlot + i));
    }
    MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -13910,7 +13858,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
      // alignment is valid.
      unsigned Align = LN0->getAlignment();
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    unsigned NewAlign = TLI.getTargetData()->
+    unsigned NewAlign = TLI.getDataLayout()->
        getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
  
      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
@@ -15438,7 +15386,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
      return SDValue();
  
    const Function *F = DAG.getMachineFunction().getFunction();
-  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  bool NoImplicitFloatOps = F->getFnAttributes().
+    hasAttribute(Attributes::NoImplicitFloat);
    bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
                       && Subtarget->hasSSE2();
    if ((VT.isVector() ||