Merging r258611:

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelDAGToDAG.cpp
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index a4126ab12656428e521c94912e6be12c62017e5b..6c868880bcac4f760719ce3da759f3f62d5b7aae 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -34,7 +34,6 @@ using namespace llvm;
  namespace {
  
  class AArch64DAGToDAGISel : public SelectionDAGISel {
-  AArch64TargetMachine &TM;
  
    /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
    /// make the right decision when generating code for different targets.
@@ -45,7 +44,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
  public:
    explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
                                 CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
          ForCodeSize(false) {}
  
    const char *getPassName() const override {
@@ -166,9 +165,7 @@ public:
  
    SDNode *SelectBitfieldExtractOp(SDNode *N);
    SDNode *SelectBitfieldInsertOp(SDNode *N);
-
-  SDNode *SelectLIBM(SDNode *N);
-  SDNode *SelectFPConvertWithRound(SDNode *N);
+  SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
  
    SDNode *SelectReadRegister(SDNode *N);
    SDNode *SelectWriteRegister(SDNode *N);
@@ -201,9 +198,6 @@ private:
    }
  
    bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
-
-  SDNode *GenerateInexactFlagIfNeeded(const SDValue &In, unsigned InTyVariant,
-                                      SDLoc DL);
  };
  } // end anonymous namespace
  
@@ -1288,8 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
    SDValue SuperReg = SDValue(Ld, 0);
  
    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
-                              AArch64::qsub3 };
+  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+                                    AArch64::qsub2, AArch64::qsub3 };
    for (unsigned i = 0; i < NumVecs; ++i) {
      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
      if (Narrow)
@@ -1341,8 +1335,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
                  Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
    } else {
      EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-    static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
-                                AArch64::qsub3 };
+    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+                                      AArch64::qsub2, AArch64::qsub3 };
      for (unsigned i = 0; i < NumVecs; ++i) {
        SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
                                                    SuperReg);
@@ -1918,6 +1912,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
  /// Does this tree qualify as an attempt to move a bitfield into position,
  /// essentially "(and (shl VAL, N), Mask)".
  static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+                                    bool BiggerPattern,
                                      SDValue &Src, int &ShiftAmount,
                                      int &MaskWidth) {
    EVT VT = Op.getValueType();
@@ -1940,6 +1935,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
      Op = Op.getOperand(0);
    }
  
+  // Don't match if the SHL has more than one use, since then we'll end up
+  // generating SHL+UBFIZ instead of just keeping SHL+AND.
+  if (!BiggerPattern && !Op.hasOneUse())
+    return false;
+
    uint64_t ShlImm;
    if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
      return false;
@@ -1953,7 +1953,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
  
    // BFI encompasses sufficiently many nodes that it's worth inserting an extra
    // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
-  // amount.
+  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
+  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+  // which case it is not profitable to insert an extra shift.
+  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+    return false;
    Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
  
    return true;
@@ -1970,7 +1974,8 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
  // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
  static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
                                       SDValue &Src, unsigned &ImmR,
-                                     unsigned &ImmS, SelectionDAG *CurDAG) {
+                                     unsigned &ImmS, const APInt &UsefulBits,
+                                     SelectionDAG *CurDAG) {
    assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
  
    // Set Opc
@@ -1984,23 +1989,30 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
  
    // Because of simplify-demanded-bits in DAGCombine, involved masks may not
    // have the expected shape. Try to undo that.
-  APInt UsefulBits;
-  getUsefulBits(SDValue(N, 0), UsefulBits);
  
    unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
    unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
  
-  // OR is commutative, check both possibilities (does llvm provide a
-  // way to do that directely, e.g., via code matcher?)
-  SDValue OrOpd1Val = N->getOperand(1);
-  SDNode *OrOpd0 = N->getOperand(0).getNode();
-  SDNode *OrOpd1 = N->getOperand(1).getNode();
-  for (int i = 0; i < 2;
-       ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+  // OR is commutative, check all combinations of operand order and values of
+  // BiggerPattern, i.e.
+  //     Opd0, Opd1, BiggerPattern=false
+  //     Opd1, Opd0, BiggerPattern=false
+  //     Opd0, Opd1, BiggerPattern=true
+  //     Opd1, Opd0, BiggerPattern=true
+  // Several of these combinations may match, so check with BiggerPattern=false
+  // first since that will produce better results by matching more instructions
+  // and/or inserting fewer extra instructions.
+  for (int I = 0; I < 4; ++I) {
+
+    bool BiggerPattern = I / 2;
+    SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+    SDNode *OrOpd1 = OrOpd1Val.getNode();
+
      unsigned BFXOpc;
      int DstLSB, Width;
      if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
-                            NumberOfIgnoredLowBits, true)) {
+                            NumberOfIgnoredLowBits, BiggerPattern)) {
        // Check that the returned opcode is compatible with the pattern,
        // i.e., same type and zero extended (U and not S)
        if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@@ -2018,8 +2030,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
  
        // If the mask on the insertee is correct, we have a BFXIL operation. We
        // can share the ImmR and ImmS values from the already-computed UBFM.
-    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
-                                       DstLSB, Width)) {
+    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+                                       BiggerPattern,
+                                       Src, DstLSB, Width)) {
        ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
        ImmS = Width - 1;
      } else
@@ -2069,11 +2082,18 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
    unsigned Opc;
    unsigned LSB, MSB;
    SDValue Opd0, Opd1;
+  EVT VT = N->getValueType(0);
+  APInt NUsefulBits;
+  getUsefulBits(SDValue(N, 0), NUsefulBits);
  
-  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+  // If all bits are not useful, just return UNDEF.
+  if (!NUsefulBits)
+    return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
+
+  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
+                                CurDAG))
      return nullptr;
  
-  EVT VT = N->getValueType(0);
    SDLoc dl(N);
    SDValue Ops[] = { Opd0,
                      Opd1,
@@ -2082,156 +2102,37 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
    return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  }
  
-/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
-/// signal on round-to-integer operations if needed. C11 leaves it
-/// implementation-defined whether these operations trigger an inexact
-/// exception. IEEE says they don't.  Unfortunately, Darwin decided they do so
-/// we sometimes have to insert a special instruction just to set the right bit
-/// in FPSR.
-SDNode *AArch64DAGToDAGISel::GenerateInexactFlagIfNeeded(const SDValue &In,
-                                                         unsigned InTyVariant,
-                                                         SDLoc DL) {
-  if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
-    // Pick the right FRINTX using InTyVariant needed to set the flags.
-    // InTyVariant is 0 for 32-bit and 1 for 64-bit.
-    unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
-    return CurDAG->getMachineNode(FRINTXOpcs[InTyVariant], DL,
-                                  In.getValueType(), MVT::Glue, In);
-  }
-  return nullptr;
-}
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+  if (N->getOpcode() != ISD::AND)
+    return nullptr;
  
-SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
    EVT VT = N->getValueType(0);
-  unsigned Variant;
    unsigned Opc;
-
-  if (VT == MVT::f32) {
-    Variant = 0;
-  } else if (VT == MVT::f64) {
-    Variant = 1;
-  } else
-    return nullptr; // Unrecognized argument type. Fall back on default codegen.
-
-  switch (N->getOpcode()) {
-  default:
-    return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
-  case ISD::FCEIL: {
-    unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
-    Opc = FRINTPOpcs[Variant];
-    break;
-  }
-  case ISD::FFLOOR: {
-    unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
-    Opc = FRINTMOpcs[Variant];
-    break;
-  }
-  case ISD::FTRUNC: {
-    unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
-    Opc = FRINTZOpcs[Variant];
-    break;
-  }
-  case ISD::FROUND: {
-    unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
-    Opc = FRINTAOpcs[Variant];
-    break;
-  }
-  }
-
-  SDLoc dl(N);
-  SDValue In = N->getOperand(0);
-  SmallVector<SDValue, 2> Ops;
-  Ops.push_back(In);
-
-  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, Variant, dl))
-    Ops.push_back(SDValue(FRINTXNode, 1));
-
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
-}
-
-/// SelectFPConvertWithRound - Try to combine FP rounding and
-/// FP-INT conversion.
-SDNode *AArch64DAGToDAGISel::SelectFPConvertWithRound(SDNode *N) {
-  SDNode *Op0 = N->getOperand(0).getNode();
-
-  // Return if the round op is used by other nodes, as this would result in two
-  // FRINTX, one each for round and convert.
-  if (!Op0->hasOneUse())
-    return nullptr;
-
-  unsigned InTyVariant;
-  EVT InTy = Op0->getValueType(0);
-  if (InTy == MVT::f32)
-    InTyVariant = 0;
-  else if (InTy == MVT::f64)
-    InTyVariant = 1;
+  if (VT == MVT::i32)
+    Opc = AArch64::UBFMWri;
+  else if (VT == MVT::i64)
+    Opc = AArch64::UBFMXri;
    else
      return nullptr;
  
-  unsigned OutTyVariant;
-  EVT OutTy = N->getValueType(0);
-  if (OutTy == MVT::i32)
-    OutTyVariant = 0;
-  else if (OutTy == MVT::i64)
-    OutTyVariant = 1;
-  else
+  SDValue Op0;
+  int DstLSB, Width;
+  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+                               Op0, DstLSB, Width))
      return nullptr;
  
-  assert((N->getOpcode() == ISD::FP_TO_SINT
-          || N->getOpcode() == ISD::FP_TO_UINT) && "Unexpected opcode!");
-  unsigned FpConVariant = N->getOpcode() == ISD::FP_TO_SINT ? 0 : 1;
-
-  unsigned Opc;
-  switch (Op0->getOpcode()) {
-  default:
-    return nullptr;
-  case ISD::FCEIL: {
-    unsigned FCVTPOpcs[2][2][2] = {
-        { { AArch64::FCVTPSUWSr, AArch64::FCVTPSUXSr },
-          { AArch64::FCVTPSUWDr, AArch64::FCVTPSUXDr } },
-        { { AArch64::FCVTPUUWSr, AArch64::FCVTPUUXSr },
-          { AArch64::FCVTPUUWDr, AArch64::FCVTPUUXDr } } };
-    Opc = FCVTPOpcs[FpConVariant][InTyVariant][OutTyVariant];
-    break;
-  }
-  case ISD::FFLOOR: {
-    unsigned FCVTMOpcs[2][2][2] = {
-        { { AArch64::FCVTMSUWSr, AArch64::FCVTMSUXSr },
-          { AArch64::FCVTMSUWDr, AArch64::FCVTMSUXDr } },
-        { { AArch64::FCVTMUUWSr, AArch64::FCVTMUUXSr },
-          { AArch64::FCVTMUUWDr, AArch64::FCVTMUUXDr } } };
-    Opc = FCVTMOpcs[FpConVariant][InTyVariant][OutTyVariant];
-    break;
-  }
-  case ISD::FTRUNC: {
-    unsigned FCVTZOpcs[2][2][2] = {
-        { { AArch64::FCVTZSUWSr, AArch64::FCVTZSUXSr },
-          { AArch64::FCVTZSUWDr, AArch64::FCVTZSUXDr } },
-        { { AArch64::FCVTZUUWSr, AArch64::FCVTZUUXSr },
-          { AArch64::FCVTZUUWDr, AArch64::FCVTZUUXDr } } };
-    Opc = FCVTZOpcs[FpConVariant][InTyVariant][OutTyVariant];
-    break;
-  }
-  case ISD::FROUND: {
-    unsigned FCVTAOpcs[2][2][2] = {
-        { { AArch64::FCVTASUWSr, AArch64::FCVTASUXSr },
-          { AArch64::FCVTASUWDr, AArch64::FCVTASUXDr } },
-        { { AArch64::FCVTAUUWSr, AArch64::FCVTAUUXSr },
-          { AArch64::FCVTAUUWDr, AArch64::FCVTAUUXDr } } };
-    Opc = FCVTAOpcs[FpConVariant][InTyVariant][OutTyVariant];
-    break;
-  }
-  }
+  // ImmR is the rotate right amount.
+  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+  // ImmS is the most significant bit of the source to be moved.
+  unsigned ImmS = Width - 1;
  
    SDLoc DL(N);
-  SDValue In = Op0->getOperand(0);
-  SmallVector<SDValue, 2> Ops;
-  Ops.push_back(In);
-
-  if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, InTyVariant, DL))
-    Ops.push_back(SDValue(FRINTXNode, 1));
-
-  return CurDAG->getMachineNode(Opc, DL, OutTy, Ops);
+  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+                   CurDAG->getTargetConstant(ImmS, DL, VT)};
+  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
  }
  
  bool
@@ -2370,7 +2271,15 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
      assert (isa<ConstantSDNode>(N->getOperand(2))
                && "Expected a constant integer expression.");
      uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
-    return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
+    unsigned State;
+    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+      assert(Immed < 2 && "Bad imm");
+      State = AArch64::MSRpstateImm1;
+    } else {
+      assert(Immed < 16 && "Bad imm");
+      State = AArch64::MSRpstateImm4;
+    }
+    return CurDAG->getMachineNode(State, DL, MVT::Other,
                                    CurDAG->getTargetConstant(Reg, DL, MVT::i32),
                                    CurDAG->getTargetConstant(Immed, DL, MVT::i16),
                                    N->getOperand(0));
@@ -2443,6 +2352,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
    case ISD::SRA:
      if (SDNode *I = SelectBitfieldExtractOp(Node))
        return I;
+    if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+      return I;
      break;
  
    case ISD::OR:
@@ -3379,20 +3290,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
        return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
      break;
    }
-
-  case ISD::FCEIL:
-  case ISD::FFLOOR:
-  case ISD::FTRUNC:
-  case ISD::FROUND:
-    if (SDNode *I = SelectLIBM(Node))
-      return I;
-    break;
-
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:
-    if (SDNode *I = SelectFPConvertWithRound(Node))
-      return I;
-    break;
    }
  
    // Select the default instruction