[bpf] expand indirect branches

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index ea842a6802098b784f503d1e3a434b38c7ed6f2e..7c9df0e7b502b4646240f2038f04b78c8ca2efdb 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -725,7 +725,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
    setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
  
-  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+  // @llvm.readcyclecounter requires the Performance Monitors extension.
+  // Default to the 0 expansion on unsupported platforms.
+  // FIXME: Technically there are older ARM CPUs that have
+  // implementation-specific ways of obtaining this information.
+  if (Subtarget->hasPerfMon())
+    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
  
    // Only ARMv6 has BSWAP.
    if (!Subtarget->hasV6Ops())
@@ -5045,10 +5050,16 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
    if (M.size() != NumElts && M.size() != NumElts*2)
      return false;
  
-  // If the mask is twice as long as the result then we need to check the upper
-  // and lower parts of the mask
+  // If the mask is twice as long as the input vector then we need to check the
+  // upper and lower parts of the mask with a matching value for WhichResult
+  // FIXME: A mask with only even values will be rejected in case the first
+  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+  // M[0] is used to determine WhichResult
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      for (unsigned j = 0; j < NumElts; j += 2) {
        if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
            (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
@@ -5075,7 +5086,10 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
      return false;
  
    for (unsigned i = 0; i < M.size(); i += NumElts) {
-    WhichResult = M[i] == 0 ? 0 : 1;
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
      for (unsigned j = 0; j < NumElts; j += 2) {
        if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
            (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
@@ -5549,6 +5563,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
        // A shuffle can only come from building a vector from various
        // elements of other vectors.
        return SDValue();
+    } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+      // Furthermore, shuffles require a constant mask, whereas extractelts
+      // accept variable indices.
+      return SDValue();
      }
  
      // Add this element source to the list if it's not already there.
@@ -6367,6 +6385,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
  
  static SDValue
  LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
    // Convert to float
    // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
    // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6397,6 +6417,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
  
  static SDValue
  LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
    SDValue N2;
    // Convert to float.
    // float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6469,6 +6491,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
  }
  
  static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
    EVT VT = Op.getValueType();
    assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
           "unexpected type for custom-lowering ISD::UDIV");
@@ -6645,36 +6668,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
                                      SelectionDAG &DAG,
                                      const ARMSubtarget *Subtarget) {
    SDLoc DL(N);
-  SDValue Cycles32, OutChain;
-
-  if (Subtarget->hasPerfMon()) {
-    // Under Power Management extensions, the cycle-count is:
-    //    mrc p15, #0, <Rt>, c9, c13, #0
-    SDValue Ops[] = { N->getOperand(0), // Chain
-                      DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
-                      DAG.getConstant(15, DL, MVT::i32),
-                      DAG.getConstant(0, DL, MVT::i32),
-                      DAG.getConstant(9, DL, MVT::i32),
-                      DAG.getConstant(13, DL, MVT::i32),
-                      DAG.getConstant(0, DL, MVT::i32)
-    };
-
-    Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
-                           DAG.getVTList(MVT::i32, MVT::Other), Ops);
-    OutChain = Cycles32.getValue(1);
-  } else {
-    // Intrinsic is defined to return 0 on unsupported platforms. Technically
-    // there are older ARM CPUs that have implementation-specific ways of
-    // obtaining this information (FIXME!).
-    Cycles32 = DAG.getConstant(0, DL, MVT::i32);
-    OutChain = DAG.getEntryNode();
-  }
-
+  // Under Power Management extensions, the cycle-count is:
+  //    mrc p15, #0, <Rt>, c9, c13, #0
+  SDValue Ops[] = { N->getOperand(0), // Chain
+                    DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+                    DAG.getConstant(15, DL, MVT::i32),
+                    DAG.getConstant(0, DL, MVT::i32),
+                    DAG.getConstant(9, DL, MVT::i32),
+                    DAG.getConstant(13, DL, MVT::i32),
+                    DAG.getConstant(0, DL, MVT::i32)
+  };
  
-  SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
-                                 Cycles32, DAG.getConstant(0, DL, MVT::i32));
-  Results.push_back(Cycles64);
-  Results.push_back(OutChain);
+  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+                                 DAG.getVTList(MVT::i32, MVT::Other), Ops);
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+                                DAG.getConstant(0, DL, MVT::i32)));
+  Results.push_back(Cycles32.getValue(1));
  }
  
  SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -6931,7 +6940,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
    MachineModuleInfo &MMI = MF->getMMI();
    for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
         ++BB) {
-    if (!BB->isLandingPad()) continue;
+    if (!BB->isEHPad()) continue;
  
      // FIXME: We should assert that the EH_LABEL is the first MI in the landing
      // pad.
@@ -6979,7 +6988,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
  
    // Shove the dispatch's address into the return slot in the function context.
    MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
-  DispatchBB->setIsLandingPad();
+  DispatchBB->setIsEHPad();
  
    MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
    unsigned trap_opcode;
@@ -7245,7 +7254,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
                                                    BB->succ_end());
      while (!Successors.empty()) {
        MachineBasicBlock *SMBB = Successors.pop_back_val();
-      if (SMBB->isLandingPad()) {
+      if (SMBB->isEHPad()) {
          BB->removeSuccessor(SMBB);
          MBBLPads.push_back(SMBB);
        }
@@ -7293,7 +7302,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
    // landing pad now.
    for (SmallVectorImpl<MachineBasicBlock*>::iterator
           I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
-    (*I)->setIsLandingPad(false);
+    (*I)->setIsEHPad(false);
  
    // The instruction is gone now.
    MI->eraseFromParent();
@@ -11426,8 +11435,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
    return true;
  }
  
-bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
-
  Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
                                          ARM_MB::MemBOpt Domain) const {
    Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11523,19 +11530,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
  // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
  // guarantee, see DDI0406C ARM architecture reference manual,
  // sections A8.8.72-74 LDRD)
-bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
    unsigned Size = LI->getType()->getPrimitiveSizeInBits();
-  return (Size == 64) && !Subtarget->isMClass();
+  return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLSC
+                                                  : AtomicExpansionKind::None;
  }
  
  // For the real atomic operations, we have ldrex/strex up to 32 bits,
  // and up to 64 bits on the non-M profiles
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
  ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
    return (Size <= (Subtarget->isMClass() ? 32U : 64U))
-             ? AtomicRMWExpansionKind::LLSC
-             : AtomicRMWExpansionKind::None;
+             ? AtomicExpansionKind::LLSC
+             : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+    AtomicCmpXchgInst *AI) const {
+  return true;
  }
  
  // This has so far only been implemented for MachO.