Change:

[oota-llvm.git] / lib / CodeGen / SelectionDAG / TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 06fd55c551522058d00117c7346b4368b1ccc07d..c6eeebe606e9e4cbebee2a9b847baf039c2a924c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) {
    Names[RTLIB::MUL_I32] = "__mulsi3";
    Names[RTLIB::MUL_I64] = "__muldi3";
    Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
    Names[RTLIB::SDIV_I8] = "__divqi3";
    Names[RTLIB::SDIV_I16] = "__divhi3";
    Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) {
    Names[RTLIB::REM_F64] = "fmod";
    Names[RTLIB::REM_F80] = "fmodl";
    Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::FMA_F32] = "fmaf";
+  Names[RTLIB::FMA_F64] = "fma";
+  Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_PPCF128] = "fmal";
    Names[RTLIB::POWI_F32] = "__powisf2";
    Names[RTLIB::POWI_F64] = "__powidf2";
    Names[RTLIB::POWI_F80] = "__powixf2";
@@ -602,6 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
    ExceptionPointerRegister = 0;
    ExceptionSelectorRegister = 0;
    BooleanContents = UndefinedBooleanContent;
+  BooleanVectorContents = UndefinedBooleanContent;
    SchedPreferenceInfo = Sched::Latency;
    JumpBufSize = 0;
    JumpBufAlignment = 0;
@@ -610,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
    PrefLoopAlignment = 0;
    MinStackArgumentAlignment = 1;
    ShouldFoldAtomicFences = false;
+  InsertFencesForAtomic = false;
  
    InitLibcallNames(LibcallRoutineNames);
    InitCmpLibcallCCs(CmpLibcallCCs);
@@ -673,10 +682,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
      NewVT = EltTy;
    IntermediateVT = NewVT;
  
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
    EVT DestVT = TLI->getRegisterType(NewVT);
    RegisterVT = DestVT;
    if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
  
    // Otherwise, promotion or legal types use the same number of registers as
    // the vector decimated to the appropriate level.
@@ -821,26 +836,32 @@ void TargetLowering::computeRegisterProperties() {
      unsigned NElts = VT.getVectorNumElements();
      if (NElts != 1) {
        bool IsLegalWiderType = false;
+      // If we allow the promotion of vector elements using a flag,
+      // then return TypePromoteInteger on vector elements.
+      // First try to promote the elements of integer vectors. If no legal
+      // promotion was found, fallback to the widen-vector method.
+      if (mayPromoteElements)
        for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
          EVT SVT = (MVT::SimpleValueType)nVT;
-
-        // If we allow the promotion of vector elements using a flag,
-        // then return TypePromoteInteger on vector elements.
-        if (mayPromoteElements) {
-          // Promote vectors of integers to vectors with the same number
-          // of elements, with a wider element type.
-          if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
-              && SVT.getVectorNumElements() == NElts &&
-              isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
-            TransformToType[i] = SVT;
-            RegisterTypeForVT[i] = SVT;
-            NumRegistersForVT[i] = 1;
-            ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
-            IsLegalWiderType = true;
-            break;
-          }
+        // Promote vectors of integers to vectors with the same number
+        // of elements, with a wider element type.
+        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
+            && SVT.getVectorNumElements() == NElts &&
+            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+          IsLegalWiderType = true;
+          break;
          }
+      }
+
+      if (IsLegalWiderType) continue;
  
+      // Try to widen the vector.
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
          if (SVT.getVectorElementType() == EltVT &&
              SVT.getVectorNumElements() > NElts &&
              isTypeLegal(SVT)) {
@@ -895,7 +916,8 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  }
  
  
-MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+EVT TargetLowering::getSetCCResultType(EVT VT) const {
+  assert(!VT.isVector() && "No default SetCC type for vectors!");
    return PointerTy.SimpleTy;
  }
  
@@ -959,8 +981,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
  
    EVT DestVT = getRegisterType(Context, NewVT);
    RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
    if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
  
    // Otherwise, promotion or legal types use the same number of registers as
    // the vector decimated to the appropriate level.
@@ -971,7 +999,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
  /// type of the given function.  This does not require a DAG or a return value,
  /// and is suitable for use before any DAGs for the function are constructed.
  /// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
                           SmallVectorImpl<ISD::OutputArg> &Outs,
                           const TargetLowering &TLI,
                           SmallVectorImpl<uint64_t> *Offsets) {
@@ -1029,7 +1057,7 @@ void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
  /// function arguments in the caller parameter area.  This is the actual
  /// alignment, not its logarithm.
-unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const {
    return TD->getCallFrameTypeAlignment(Ty);
  }
  
@@ -1739,33 +1767,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
      break;
    }
    case ISD::AssertZext: {
-    // Demand all the bits of the input that are demanded in the output.
-    // The low bits are obvious; the high bits are demanded because we're
-    // asserting that they're zero here.
-    if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
+    // AssertZext demands all of the high bits, plus any of the low bits
+    // demanded by its users.
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth,
+                                        VT.getSizeInBits());
+    if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
                               KnownZero, KnownOne, TLO, Depth+1))
        return true;
      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
  
-    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    APInt InMask = APInt::getLowBitsSet(BitWidth,
-                                        VT.getSizeInBits());
      KnownZero |= ~InMask & NewMask;
      break;
    }
    case ISD::BITCAST:
-    // If this is an FP->Int bitcast and if the sign bit is the only thing that
-    // is demanded, turn this into a FGETSIGN.
-    if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
-        Op.getOperand(0).getValueType().isFloatingPoint() &&
-        !Op.getOperand(0).getValueType().isVector()) {
-      if (isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32)) {
-        EVT Ty = (isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType())) ?
-          Op.getValueType() : MVT::i32;
+    // If this is an FP->Int bitcast and if the sign bit is the only
+    // thing demanded, turn this into a FGETSIGN.
+    if (!Op.getOperand(0).getValueType().isVector() &&
+        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        Op.getOperand(0).getValueType().isFloatingPoint()) {
+      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+      bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+        EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
          // Make a FGETSIGN + SHL to move the sign bit into the appropriate
          // place.  We expect the SHL to be eliminated by other optimizations.
          SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
-        if (Ty != Op.getValueType())
+        unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+        if (!OpVTLegal && OpVTSizeInBits > 32)
            Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
          unsigned ShVal = Op.getValueType().getSizeInBits()-1;
          SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType());
@@ -1894,7 +1923,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
    // comparisons.
    if (isa<ConstantSDNode>(N0.getNode()))
      return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  
+
    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
      const APInt &C1 = N1C->getAPIntValue();
  
@@ -2164,7 +2193,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
          }
        } else if (N1C->getAPIntValue() == 1 &&
                   (VT == MVT::i1 ||
-                  getBooleanContents() == ZeroOrOneBooleanContent)) {
+                  getBooleanContents(false) == ZeroOrOneBooleanContent)) {
          SDValue Op0 = N0;
          if (Op0.getOpcode() == ISD::TRUNCATE)
            Op0 = Op0.getOperand(0);
@@ -2600,7 +2629,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
  
  TargetLowering::ConstraintType
  TargetLowering::getConstraintType(const std::string &Constraint) const {
-  // FIXME: lots more standard ones to handle.
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      default: break;
@@ -2653,9 +2681,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                    std::string &Constraint,
                                                    std::vector<SDValue> &Ops,
                                                    SelectionDAG &DAG) const {
-  
+
    if (Constraint.length() > 1) return;
-  
+
    char ConstraintLetter = Constraint[0];
    switch (ConstraintLetter) {
    default: break;
@@ -2714,13 +2742,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
    }
  }
  
-std::vector<unsigned> TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  return std::vector<unsigned>();
-}
-
-
  std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
  getRegForInlineAsmConstraint(const std::string &Constraint,
                               EVT VT) const {
@@ -2821,7 +2842,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
        // corresponding argument.
        assert(!CS.getType()->isVoidTy() &&
               "Bad inline asm!");
-      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
          OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
        } else {
          assert(ResNo == 0 && "Asm only has one result!");
@@ -2838,16 +2859,16 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
      }
  
      if (OpInfo.CallOperandVal) {
-      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+      llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
        if (OpInfo.isIndirect) {
-        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+        llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
          if (!PtrTy)
            report_fatal_error("Indirect operand for inline asm not a pointer!");
          OpTy = PtrTy->getElementType();
        }
-      
+
        // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
-      if (const StructType *STy = dyn_cast<StructType>(OpTy))
+      if (StructType *STy = dyn_cast<StructType>(OpTy))
          if (STy->getNumElements() == 1)
            OpTy = STy->getElementType(0);
  
@@ -2947,10 +2968,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
        AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+       std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+         getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+       std::pair<unsigned, const TargetRegisterClass*> InputRC =
+         getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
          if ((OpInfo.ConstraintVT.isInteger() !=
               Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
            report_fatal_error("Unsupported asm: input constraint"
                               " with a matching output constraint of"
                               " incompatible type!");
@@ -3165,7 +3189,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
  /// isLegalAddressingMode - Return true if the addressing mode represented
  /// by AM is legal for this target, for a load/store of the specified type.
  bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           const Type *Ty) const {
+                                           Type *Ty) const {
    // The default implementation of this implements a conservative RISCy, r+r and
    // r+i addr mode.
  
@@ -3196,6 +3220,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
    return true;
  }
  
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                                       SelectionDAG &DAG) const {
+  ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+  APInt d = C->getAPIntValue();
+  assert(d != 0 && "Division by zero!");
+
+  // Shift the value upfront if it is even, so the LSB is one.
+  unsigned ShAmt = d.countTrailingZeros();
+  if (ShAmt) {
+    // TODO: For UDIV use SRL instead of SRA.
+    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+    d = d.ashr(ShAmt);
+  }
+
+  // Calculate the multiplicative inverse, using Newton's method.
+  APInt t, xn = d;
+  while ((t = d*xn) != 1)
+    xn *= APInt(d.getBitWidth(), 2) - t;
+
+  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
  /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
  /// return a DAG expression to select that will generate the same value by
  /// multiplying by a magic number.  See: