lib/Target/ARM/ARMISelLowering.cpp

   1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that ARM uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "arm-isel"
  16 #include "ARM.h"
  17 #include "ARMAddressingModes.h"
  18 #include "ARMConstantPoolValue.h"
  19 #include "ARMISelLowering.h"
  20 #include "ARMMachineFunctionInfo.h"
  21 #include "ARMPerfectShuffle.h"
  22 #include "ARMRegisterInfo.h"
  23 #include "ARMSubtarget.h"
  24 #include "ARMTargetMachine.h"
  25 #include "ARMTargetObjectFile.h"
  26 #include "llvm/CallingConv.h"
  27 #include "llvm/Constants.h"
  28 #include "llvm/Function.h"
  29 #include "llvm/GlobalValue.h"
  30 #include "llvm/Instruction.h"
  31 #include "llvm/Intrinsics.h"
  32 #include "llvm/Type.h"
  33 #include "llvm/CodeGen/CallingConvLower.h"
  34 #include "llvm/CodeGen/MachineBasicBlock.h"
  35 #include "llvm/CodeGen/MachineFrameInfo.h"
  36 #include "llvm/CodeGen/MachineFunction.h"
  37 #include "llvm/CodeGen/MachineInstrBuilder.h"
  38 #include "llvm/CodeGen/MachineRegisterInfo.h"
  39 #include "llvm/CodeGen/PseudoSourceValue.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/MC/MCSectionMachO.h"
  42 #include "llvm/Target/TargetOptions.h"
  43 #include "llvm/ADT/VectorExtras.h"
  44 #include "llvm/ADT/Statistic.h"
  45 #include "llvm/Support/CommandLine.h"
  46 #include "llvm/Support/ErrorHandling.h"
  47 #include "llvm/Support/MathExtras.h"
  48 #include "llvm/Support/raw_ostream.h"
  49 #include <sstream>
  50 using namespace llvm;
  51
  52 STATISTIC(NumTailCalls, "Number of tail calls");
  53
  54 // This option should go away when tail calls fully work.
  55 static cl::opt<bool>
  56 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
  57   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
  58   cl::init(false));
  59
  60 // This option should go away when Machine LICM is smart enough to hoist a
  61 // reg-to-reg VDUP.
  62 static cl::opt<bool>
  63 EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
  64   cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
  65   cl::init(false));
  66
  67 static cl::opt<bool>
  68 EnableARMLongCalls("arm-long-calls", cl::Hidden,
  69   cl::desc("Generate calls via indirect call instructions"),
  70   cl::init(false));
  71
  72 static cl::opt<bool>
  73 ARMInterworking("arm-interworking", cl::Hidden,
  74   cl::desc("Enable / disable ARM interworking (for debugging only)"),
  75   cl::init(true));
  76
  77 static cl::opt<bool>
  78 EnableARMCodePlacement("arm-code-placement", cl::Hidden,
  79   cl::desc("Enable code placement pass for ARM"),
  80   cl::init(false));
  81
  82 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  83                                    CCValAssign::LocInfo &LocInfo,
  84                                    ISD::ArgFlagsTy &ArgFlags,
  85                                    CCState &State);
  86 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  87                                     CCValAssign::LocInfo &LocInfo,
  88                                     ISD::ArgFlagsTy &ArgFlags,
  89                                     CCState &State);
  90 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  91                                       CCValAssign::LocInfo &LocInfo,
  92                                       ISD::ArgFlagsTy &ArgFlags,
  93                                       CCState &State);
  94 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
  95                                        CCValAssign::LocInfo &LocInfo,
  96                                        ISD::ArgFlagsTy &ArgFlags,
  97                                        CCState &State);
  98
  99 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
 100                                        EVT PromotedBitwiseVT) {
 101   if (VT != PromotedLdStVT) {
 102     setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
 103     AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
 104                        PromotedLdStVT.getSimpleVT());
 105
 106     setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
 107     AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
 108                        PromotedLdStVT.getSimpleVT());
 109   }
 110
 111   EVT ElemTy = VT.getVectorElementType();
 112   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
 113     setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
 114   if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
 115     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
 116   if (ElemTy != MVT::i32) {
 117     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
 118     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
 119     setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
 120     setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
 121   }
 122   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
 123   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
 124   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
 125   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
 126   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
 127   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
 128   if (VT.isInteger()) {
 129     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
 130     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
 131     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
 132     setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
 133     setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
 134   }
 135   setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
 136
 137   // Promote all bit-wise operations.
 138   if (VT.isInteger() && VT != PromotedBitwiseVT) {
 139     setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
 140     AddPromotedToType (ISD::AND, VT.getSimpleVT(),
 141                        PromotedBitwiseVT.getSimpleVT());
 142     setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
 143     AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
 144                        PromotedBitwiseVT.getSimpleVT());
 145     setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
 146     AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
 147                        PromotedBitwiseVT.getSimpleVT());
 148   }
 149
 150   // Neon does not support vector divide/remainder operations.
 151   setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
 152   setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
 153   setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
 154   setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
 155   setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
 156   setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
 157 }
 158
 159 void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
 160   addRegisterClass(VT, ARM::DPRRegisterClass);
 161   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 162 }
 163
 164 void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
 165   addRegisterClass(VT, ARM::QPRRegisterClass);
 166   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 167 }
 168
 169 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 170   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
 171     return new TargetLoweringObjectFileMachO();
 172
 173   return new ARMElfTargetObjectFile();
 174 }
 175
 176 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 177     : TargetLowering(TM, createTLOF(TM)) {
 178   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 179   RegInfo = TM.getRegisterInfo();
 180
 181   if (Subtarget->isTargetDarwin()) {
 182     // Uses VFP for Thumb libfuncs if available.
 183     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
 184       // Single-precision floating-point arithmetic.
 185       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
 186       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
 187       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
 188       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
 189
 190       // Double-precision floating-point arithmetic.
 191       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
 192       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
 193       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
 194       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
 195
 196       // Single-precision comparisons.
 197       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
 198       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
 199       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
 200       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
 201       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
 202       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
 203       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
 204       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
 205
 206       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
 207       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
 208       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
 209       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
 210       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
 211       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
 212       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
 213       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
 214
 215       // Double-precision comparisons.
 216       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
 217       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
 218       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
 219       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
 220       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
 221       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
 222       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
 223       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
 224
 225       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
 226       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
 227       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
 228       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
 229       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
 230       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
 231       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
 232       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
 233
 234       // Floating-point to integer conversions.
 235       // i64 conversions are done via library routines even when generating VFP
 236       // instructions, so use the same ones.
 237       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
 238       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
 239       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
 240       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
 241
 242       // Conversions between floating types.
 243       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
 244       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
 245
 246       // Integer to floating-point conversions.
 247       // i64 conversions are done via library routines even when generating VFP
 248       // instructions, so use the same ones.
 249       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
 250       // e.g., __floatunsidf vs. __floatunssidfvfp.
 251       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
 252       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
 253       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
 254       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
 255     }
 256   }
 257
 258   // These libcalls are not available in 32-bit.
 259   setLibcallName(RTLIB::SHL_I128, 0);
 260   setLibcallName(RTLIB::SRL_I128, 0);
 261   setLibcallName(RTLIB::SRA_I128, 0);
 262
 263   // Libcalls should use the AAPCS base standard ABI, even if hard float
 264   // is in effect, as per the ARM RTABI specification, section 4.1.2.
 265   if (Subtarget->isAAPCS_ABI()) {
 266     for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
 267       setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
 268                             CallingConv::ARM_AAPCS);
 269     }
 270   }
 271
 272   if (Subtarget->isThumb1Only())
 273     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
 274   else
 275     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
 276   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 277     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
 278     if (!Subtarget->isFPOnlySP())
 279       addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
 280
 281     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 282   }
 283
 284   if (Subtarget->hasNEON()) {
 285     addDRTypeForNEON(MVT::v2f32);
 286     addDRTypeForNEON(MVT::v8i8);
 287     addDRTypeForNEON(MVT::v4i16);
 288     addDRTypeForNEON(MVT::v2i32);
 289     addDRTypeForNEON(MVT::v1i64);
 290
 291     addQRTypeForNEON(MVT::v4f32);
 292     addQRTypeForNEON(MVT::v2f64);
 293     addQRTypeForNEON(MVT::v16i8);
 294     addQRTypeForNEON(MVT::v8i16);
 295     addQRTypeForNEON(MVT::v4i32);
 296     addQRTypeForNEON(MVT::v2i64);
 297
 298     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
 299     // neither Neon nor VFP support any arithmetic operations on it.
 300     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 301     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 302     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 303     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
 304     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
 305     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
 306     setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
 307     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
 308     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 309     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
 310     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
 311     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
 312     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
 313     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
 314     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
 315     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
 316     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
 317     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
 318     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
 319     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
 320     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
 321     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
 322     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
 323     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
 324
 325     setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
 326
 327     // Neon does not support some operations on v1i64 and v2i64 types.
 328     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
 329     setOperationAction(ISD::MUL, MVT::v2i64, Expand);
 330     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
 331     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
 332
 333     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 334     setTargetDAGCombine(ISD::SHL);
 335     setTargetDAGCombine(ISD::SRL);
 336     setTargetDAGCombine(ISD::SRA);
 337     setTargetDAGCombine(ISD::SIGN_EXTEND);
 338     setTargetDAGCombine(ISD::ZERO_EXTEND);
 339     setTargetDAGCombine(ISD::ANY_EXTEND);
 340     setTargetDAGCombine(ISD::SELECT_CC);
 341   }
 342
 343   computeRegisterProperties();
 344
 345   // ARM does not have f32 extending load.
 346   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 347
 348   // ARM does not have i1 sign extending load.
 349   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 350
 351   // ARM supports all 4 flavors of integer indexed load / store.
 352   if (!Subtarget->isThumb1Only()) {
 353     for (unsigned im = (unsigned)ISD::PRE_INC;
 354          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
 355       setIndexedLoadAction(im,  MVT::i1,  Legal);
 356       setIndexedLoadAction(im,  MVT::i8,  Legal);
 357       setIndexedLoadAction(im,  MVT::i16, Legal);
 358       setIndexedLoadAction(im,  MVT::i32, Legal);
 359       setIndexedStoreAction(im, MVT::i1,  Legal);
 360       setIndexedStoreAction(im, MVT::i8,  Legal);
 361       setIndexedStoreAction(im, MVT::i16, Legal);
 362       setIndexedStoreAction(im, MVT::i32, Legal);
 363     }
 364   }
 365
 366   // i64 operation support.
 367   if (Subtarget->isThumb1Only()) {
 368     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 369     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 370     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
 371     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 372     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 373   } else {
 374     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 375     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 376     if (!Subtarget->hasV6Ops())
 377       setOperationAction(ISD::MULHS, MVT::i32, Expand);
 378   }
 379   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 380   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 381   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 382   setOperationAction(ISD::SRL,       MVT::i64, Custom);
 383   setOperationAction(ISD::SRA,       MVT::i64, Custom);
 384
 385   // ARM does not have ROTL.
 386   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
 387   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
 388   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
 389   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
 390     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 391
 392   // Only ARMv6 has BSWAP.
 393   if (!Subtarget->hasV6Ops())
 394     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 395
 396   // These are expanded into libcalls.
 397   if (!Subtarget->hasDivide()) {
 398     // v7M has a hardware divider
 399     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
 400     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
 401   }
 402   setOperationAction(ISD::SREM,  MVT::i32, Expand);
 403   setOperationAction(ISD::UREM,  MVT::i32, Expand);
 404   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 405   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 406
 407   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
 408   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
 409   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
 410   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 411   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 412
 413   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 414
 415   // Use the default implementation.
 416   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 417   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
 418   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
 419   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
 420   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
 421   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 422   setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
 423   // FIXME: Shouldn't need this, since no register is used, but the legalizer
 424   // doesn't yet know how to not do that for SjLj.
 425   setExceptionSelectorRegister(ARM::R0);
 426   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
 427   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
 428   // the default expansion.
 429   if (Subtarget->hasDataBarrier() ||
 430       (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
 431     // membarrier needs custom lowering; the rest are legal and handled
 432     // normally.
 433     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
 434   } else {
 435     // Set them all for expansion, which will force libcalls.
 436     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 437     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
 438     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
 439     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
 440     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
 441     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
 442     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
 443     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
 444     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
 445     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
 446     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
 447     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
 448     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
 449     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
 450     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
 451     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
 452     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
 453     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
 454     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
 455     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
 456     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
 457     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
 458     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
 459     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
 460     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
 461     // Since the libcalls include locking, fold in the fences
 462     setShouldFoldAtomicFences(true);
 463   }
 464   // 64-bit versions are always libcalls (for now)
 465   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
 466   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
 467   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
 468   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
 469   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
 470   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
 471   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
 472   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
 473
 474   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
 475   if (!Subtarget->hasV6Ops()) {
 476     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
 477     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
 478   }
 479   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 480
 481   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 482     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
 483     // iff target supports vfp2.
 484     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
 485     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 486   }
 487
 488   // We want to custom lower some of our intrinsics.
 489   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 490   if (Subtarget->isTargetDarwin()) {
 491     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 492     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 493   }
 494
 495   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
 496   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
 497   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
 498   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
 499   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
 500   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
 501   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 502   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 503   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 504
 505   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
 506   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
 507   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
 508   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
 509   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
 510
 511   // We don't support sin/cos/fmod/copysign/pow
 512   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
 513   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
 514   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
 515   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
 516   setOperationAction(ISD::FREM,      MVT::f64, Expand);
 517   setOperationAction(ISD::FREM,      MVT::f32, Expand);
 518   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 519     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
 520     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 521   }
 522   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
 523   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 524
 525   // Various VFP goodness
 526   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
 527     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
 528     if (Subtarget->hasVFP2()) {
 529       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 530       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 531       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 532       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 533     }
 534     // Special handling for half-precision FP.
 535     if (!Subtarget->hasFP16()) {
 536       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
 537       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
 538     }
 539   }
 540
 541   // We have target-specific dag combine patterns for the following nodes:
 542   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
 543   setTargetDAGCombine(ISD::ADD);
 544   setTargetDAGCombine(ISD::SUB);
 545   setTargetDAGCombine(ISD::MUL);
 546
 547   if (Subtarget->hasV6T2Ops())
 548     setTargetDAGCombine(ISD::OR);
 549
 550   setStackPointerRegisterToSaveRestore(ARM::SP);
 551
 552   if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
 553     setSchedulingPreference(Sched::RegPressure);
 554   else
 555     setSchedulingPreference(Sched::Hybrid);
 556
 557   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
 558
 559   // On ARM arguments smaller than 4 bytes are extended, so all arguments
 560   // are at least 4 bytes aligned.
 561   setMinStackArgumentAlignment(4);
 562
 563   if (EnableARMCodePlacement)
 564     benefitFromCodePlacementOpt = true;
 565 }
 566
 567 std::pair<const TargetRegisterClass*, uint8_t>
 568 ARMTargetLowering::findRepresentativeClass(EVT VT) const{
 569   const TargetRegisterClass *RRC = 0;
 570   uint8_t Cost = 1;
 571   switch (VT.getSimpleVT().SimpleTy) {
 572   default:
 573     return TargetLowering::findRepresentativeClass(VT);
 574   // Use DPR as representative register class for all floating point
 575   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
 576   // the cost is 1 for both f32 and f64.
 577   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
 578   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
 579     RRC = ARM::DPRRegisterClass;
 580     break;
 581   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
 582   case MVT::v4f32: case MVT::v2f64:
 583     RRC = ARM::DPRRegisterClass;
 584     Cost = 2;
 585     break;
 586   case MVT::v4i64:
 587     RRC = ARM::DPRRegisterClass;
 588     Cost = 4;
 589     break;
 590   case MVT::v8i64:
 591     RRC = ARM::DPRRegisterClass;
 592     Cost = 8;
 593     break;
 594   }
 595   return std::make_pair(RRC, Cost);
 596 }
 597
 598 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 599   switch (Opcode) {
 600   default: return 0;
 601   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
 602   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
 603   case ARMISD::CALL:          return "ARMISD::CALL";
 604   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
 605   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
 606   case ARMISD::tCALL:         return "ARMISD::tCALL";
 607   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
 608   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
 609   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
 610   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
 611   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
 612   case ARMISD::CMP:           return "ARMISD::CMP";
 613   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
 614   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
 615   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
 616   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
 617   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 618   case ARMISD::CMOV:          return "ARMISD::CMOV";
 619   case ARMISD::CNEG:          return "ARMISD::CNEG";
 620
 621   case ARMISD::RBIT:          return "ARMISD::RBIT";
 622
 623   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
 624   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
 625   case ARMISD::SITOF:         return "ARMISD::SITOF";
 626   case ARMISD::UITOF:         return "ARMISD::UITOF";
 627
 628   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
 629   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
 630   case ARMISD::RRX:           return "ARMISD::RRX";
 631
 632   case ARMISD::VMOVRRD:         return "ARMISD::VMOVRRD";
 633   case ARMISD::VMOVDRR:         return "ARMISD::VMOVDRR";
 634
 635   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
 636   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
 637
 638   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
 639
 640   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 641
 642   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
 643
 644   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
 645   case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
 646
 647   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
 648   case ARMISD::VCGE:          return "ARMISD::VCGE";
 649   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
 650   case ARMISD::VCGT:          return "ARMISD::VCGT";
 651   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
 652   case ARMISD::VTST:          return "ARMISD::VTST";
 653
 654   case ARMISD::VSHL:          return "ARMISD::VSHL";
 655   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
 656   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
 657   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
 658   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
 659   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
 660   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
 661   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
 662   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
 663   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
 664   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
 665   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
 666   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
 667   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
 668   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
 669   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
 670   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
 671   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
 672   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
 673   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
 674   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
 675   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
 676   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
 677   case ARMISD::VDUP:          return "ARMISD::VDUP";
 678   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
 679   case ARMISD::VEXT:          return "ARMISD::VEXT";
 680   case ARMISD::VREV64:        return "ARMISD::VREV64";
 681   case ARMISD::VREV32:        return "ARMISD::VREV32";
 682   case ARMISD::VREV16:        return "ARMISD::VREV16";
 683   case ARMISD::VZIP:          return "ARMISD::VZIP";
 684   case ARMISD::VUZP:          return "ARMISD::VUZP";
 685   case ARMISD::VTRN:          return "ARMISD::VTRN";
 686   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
 687   case ARMISD::FMAX:          return "ARMISD::FMAX";
 688   case ARMISD::FMIN:          return "ARMISD::FMIN";
 689   case ARMISD::BFI:           return "ARMISD::BFI";
 690   }
 691 }
 692
 693 /// getRegClassFor - Return the register class that should be used for the
 694 /// specified value type.
 695 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
 696   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
 697   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
 698   // load / store 4 to 8 consecutive D registers.
 699   if (Subtarget->hasNEON()) {
 700     if (VT == MVT::v4i64)
 701       return ARM::QQPRRegisterClass;
 702     else if (VT == MVT::v8i64)
 703       return ARM::QQQQPRRegisterClass;
 704   }
 705   return TargetLowering::getRegClassFor(VT);
 706 }
 707
 708 // Create a fast isel object.
 709 FastISel *
 710 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
 711   return ARM::createFastISel(funcInfo);
 712 }
 713
 714 /// getFunctionAlignment - Return the Log2 alignment of this function.
 715 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
 716   return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
 717 }
 718
 719 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
 720 /// be used for loads / stores from the global.
 721 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
 722   return (Subtarget->isThumb1Only() ? 127 : 4095);
 723 }
 724
 725 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
 726   unsigned NumVals = N->getNumValues();
 727   if (!NumVals)
 728     return Sched::RegPressure;
 729
 730   for (unsigned i = 0; i != NumVals; ++i) {
 731     EVT VT = N->getValueType(i);
 732     if (VT.isFloatingPoint() || VT.isVector())
 733       return Sched::Latency;
 734   }
 735
 736   if (!N->isMachineOpcode())
 737     return Sched::RegPressure;
 738
 739   // Load are scheduled for latency even if there instruction itinerary
 740   // is not available.
 741   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 742   const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
 743   if (TID.mayLoad())
 744     return Sched::Latency;
 745
 746   const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
 747   if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
 748     return Sched::Latency;
 749   return Sched::RegPressure;
 750 }
 751
 752 unsigned
 753 ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
 754                                        MachineFunction &MF) const {
 755   switch (RC->getID()) {
 756   default:
 757     return 0;
 758   case ARM::tGPRRegClassID:
 759     return RegInfo->hasFP(MF) ? 4 : 5;
 760   case ARM::GPRRegClassID: {
 761     unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
 762     return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
 763   }
 764   case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
 765   case ARM::DPRRegClassID:
 766     return 32 - 10;
 767   }
 768 }
 769
 770 //===----------------------------------------------------------------------===//
 771 // Lowering Code
 772 //===----------------------------------------------------------------------===//
 773
 774 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
 775 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
 776   switch (CC) {
 777   default: llvm_unreachable("Unknown condition code!");
 778   case ISD::SETNE:  return ARMCC::NE;
 779   case ISD::SETEQ:  return ARMCC::EQ;
 780   case ISD::SETGT:  return ARMCC::GT;
 781   case ISD::SETGE:  return ARMCC::GE;
 782   case ISD::SETLT:  return ARMCC::LT;
 783   case ISD::SETLE:  return ARMCC::LE;
 784   case ISD::SETUGT: return ARMCC::HI;
 785   case ISD::SETUGE: return ARMCC::HS;
 786   case ISD::SETULT: return ARMCC::LO;
 787   case ISD::SETULE: return ARMCC::LS;
 788   }
 789 }
 790
 791 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
 792 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 793                         ARMCC::CondCodes &CondCode2) {
 794   CondCode2 = ARMCC::AL;
 795   switch (CC) {
 796   default: llvm_unreachable("Unknown FP condition!");
 797   case ISD::SETEQ:
 798   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
 799   case ISD::SETGT:
 800   case ISD::SETOGT: CondCode = ARMCC::GT; break;
 801   case ISD::SETGE:
 802   case ISD::SETOGE: CondCode = ARMCC::GE; break;
 803   case ISD::SETOLT: CondCode = ARMCC::MI; break;
 804   case ISD::SETOLE: CondCode = ARMCC::LS; break;
 805   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
 806   case ISD::SETO:   CondCode = ARMCC::VC; break;
 807   case ISD::SETUO:  CondCode = ARMCC::VS; break;
 808   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
 809   case ISD::SETUGT: CondCode = ARMCC::HI; break;
 810   case ISD::SETUGE: CondCode = ARMCC::PL; break;
 811   case ISD::SETLT:
 812   case ISD::SETULT: CondCode = ARMCC::LT; break;
 813   case ISD::SETLE:
 814   case ISD::SETULE: CondCode = ARMCC::LE; break;
 815   case ISD::SETNE:
 816   case ISD::SETUNE: CondCode = ARMCC::NE; break;
 817   }
 818 }
 819
 820 //===----------------------------------------------------------------------===//
 821 //                      Calling Convention Implementation
 822 //===----------------------------------------------------------------------===//
 823
 824 #include "ARMGenCallingConv.inc"
 825
 826 // APCS f64 is in register pairs, possibly split to stack
 827 static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 828                           CCValAssign::LocInfo &LocInfo,
 829                           CCState &State, bool CanFail) {
 830   static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 831
 832   // Try to get the first register.
 833   if (unsigned Reg = State.AllocateReg(RegList, 4))
 834     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 835   else {
 836     // For the 2nd half of a v2f64, do not fail.
 837     if (CanFail)
 838       return false;
 839
 840     // Put the whole thing on the stack.
 841     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 842                                            State.AllocateStack(8, 4),
 843                                            LocVT, LocInfo));
 844     return true;
 845   }
 846
 847   // Try to get the second register.
 848   if (unsigned Reg = State.AllocateReg(RegList, 4))
 849     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 850   else
 851     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 852                                            State.AllocateStack(4, 4),
 853                                            LocVT, LocInfo));
 854   return true;
 855 }
 856
 857 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 858                                    CCValAssign::LocInfo &LocInfo,
 859                                    ISD::ArgFlagsTy &ArgFlags,
 860                                    CCState &State) {
 861   if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 862     return false;
 863   if (LocVT == MVT::v2f64 &&
 864       !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 865     return false;
 866   return true;  // we handled it
 867 }
 868
 869 // AAPCS f64 is in aligned register pairs
 870 static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 871                            CCValAssign::LocInfo &LocInfo,
 872                            CCState &State, bool CanFail) {
 873   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 874   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 875   static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
 876
 877   unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
 878   if (Reg == 0) {
 879     // For the 2nd half of a v2f64, do not just fail.
 880     if (CanFail)
 881       return false;
 882
 883     // Put the whole thing on the stack.
 884     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 885                                            State.AllocateStack(8, 8),
 886                                            LocVT, LocInfo));
 887     return true;
 888   }
 889
 890   unsigned i;
 891   for (i = 0; i < 2; ++i)
 892     if (HiRegList[i] == Reg)
 893       break;
 894
 895   unsigned T = State.AllocateReg(LoRegList[i]);
 896   (void)T;
 897   assert(T == LoRegList[i] && "Could not allocate register");
 898
 899   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 900   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 901                                          LocVT, LocInfo));
 902   return true;
 903 }
 904
 905 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 906                                     CCValAssign::LocInfo &LocInfo,
 907                                     ISD::ArgFlagsTy &ArgFlags,
 908                                     CCState &State) {
 909   if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 910     return false;
 911   if (LocVT == MVT::v2f64 &&
 912       !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 913     return false;
 914   return true;  // we handled it
 915 }
 916
 917 static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 918                          CCValAssign::LocInfo &LocInfo, CCState &State) {
 919   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 920   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 921
 922   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
 923   if (Reg == 0)
 924     return false; // we didn't handle it
 925
 926   unsigned i;
 927   for (i = 0; i < 2; ++i)
 928     if (HiRegList[i] == Reg)
 929       break;
 930
 931   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 932   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 933                                          LocVT, LocInfo));
 934   return true;
 935 }
 936
 937 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 938                                       CCValAssign::LocInfo &LocInfo,
 939                                       ISD::ArgFlagsTy &ArgFlags,
 940                                       CCState &State) {
 941   if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 942     return false;
 943   if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 944     return false;
 945   return true;  // we handled it
 946 }
 947
 948 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
 949                                        CCValAssign::LocInfo &LocInfo,
 950                                        ISD::ArgFlagsTy &ArgFlags,
 951                                        CCState &State) {
 952   return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
 953                                    State);
 954 }
 955
 956 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 957 /// given CallingConvention value.
 958 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
 959                                                  bool Return,
 960                                                  bool isVarArg) const {
 961   switch (CC) {
 962   default:
 963     llvm_unreachable("Unsupported calling convention");
 964   case CallingConv::C:
 965   case CallingConv::Fast:
 966     // Use target triple & subtarget features to do actual dispatch.
 967     if (Subtarget->isAAPCS_ABI()) {
 968       if (Subtarget->hasVFP2() &&
 969           FloatABIType == FloatABI::Hard && !isVarArg)
 970         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 971       else
 972         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 973     } else
 974         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 975   case CallingConv::ARM_AAPCS_VFP:
 976     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 977   case CallingConv::ARM_AAPCS:
 978     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 979   case CallingConv::ARM_APCS:
 980     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 981   }
 982 }
 983
 984 /// LowerCallResult - Lower the result values of a call into the
 985 /// appropriate copies out of appropriate physical registers.
 986 SDValue
 987 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 988                                    CallingConv::ID CallConv, bool isVarArg,
 989                                    const SmallVectorImpl<ISD::InputArg> &Ins,
 990                                    DebugLoc dl, SelectionDAG &DAG,
 991                                    SmallVectorImpl<SDValue> &InVals) const {
 992
 993   // Assign locations to each value returned by this call.
 994   SmallVector<CCValAssign, 16> RVLocs;
 995   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
 996                  RVLocs, *DAG.getContext());
 997   CCInfo.AnalyzeCallResult(Ins,
 998                            CCAssignFnForNode(CallConv, /* Return*/ true,
 999                                              isVarArg));
1000
1001   // Copy all of the result registers out of their specified physreg.
1002   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1003     CCValAssign VA = RVLocs[i];
1004
1005     SDValue Val;
1006     if (VA.needsCustom()) {
1007       // Handle f64 or half of a v2f64.
1008       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1009                                       InFlag);
1010       Chain = Lo.getValue(1);
1011       InFlag = Lo.getValue(2);
1012       VA = RVLocs[++i]; // skip ahead to next loc
1013       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1014                                       InFlag);
1015       Chain = Hi.getValue(1);
1016       InFlag = Hi.getValue(2);
1017       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1018
1019       if (VA.getLocVT() == MVT::v2f64) {
1020         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1021         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1022                           DAG.getConstant(0, MVT::i32));
1023
1024         VA = RVLocs[++i]; // skip ahead to next loc
1025         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1026         Chain = Lo.getValue(1);
1027         InFlag = Lo.getValue(2);
1028         VA = RVLocs[++i]; // skip ahead to next loc
1029         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1030         Chain = Hi.getValue(1);
1031         InFlag = Hi.getValue(2);
1032         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1033         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1034                           DAG.getConstant(1, MVT::i32));
1035       }
1036     } else {
1037       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1038                                InFlag);
1039       Chain = Val.getValue(1);
1040       InFlag = Val.getValue(2);
1041     }
1042
1043     switch (VA.getLocInfo()) {
1044     default: llvm_unreachable("Unknown loc info!");
1045     case CCValAssign::Full: break;
1046     case CCValAssign::BCvt:
1047       Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
1048       break;
1049     }
1050
1051     InVals.push_back(Val);
1052   }
1053
1054   return Chain;
1055 }
1056
1057 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1058 /// by "Src" to address "Dst" of size "Size".  Alignment information is
1059 /// specified by the specific parameter attribute.  The copy will be passed as
1060 /// a byval function parameter.
1061 /// Sometimes what we are copying is the end of a larger object, the part that
1062 /// does not fit in registers.
1063 static SDValue
1064 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1065                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1066                           DebugLoc dl) {
1067   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1068   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
1069                        /*isVolatile=*/false, /*AlwaysInline=*/false,
1070                        NULL, 0, NULL, 0);
1071 }
1072
1073 /// LowerMemOpCallTo - Store the argument to the stack.
1074 SDValue
1075 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
1076                                     SDValue StackPtr, SDValue Arg,
1077                                     DebugLoc dl, SelectionDAG &DAG,
1078                                     const CCValAssign &VA,
1079                                     ISD::ArgFlagsTy Flags) const {
1080   unsigned LocMemOffset = VA.getLocMemOffset();
1081   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1082   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
1083   if (Flags.isByVal()) {
1084     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1085   }
1086   return DAG.getStore(Chain, dl, Arg, PtrOff,
1087                       PseudoSourceValue::getStack(), LocMemOffset,
1088                       false, false, 0);
1089 }
1090
1091 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
1092                                          SDValue Chain, SDValue &Arg,
1093                                          RegsToPassVector &RegsToPass,
1094                                          CCValAssign &VA, CCValAssign &NextVA,
1095                                          SDValue &StackPtr,
1096                                          SmallVector<SDValue, 8> &MemOpChains,
1097                                          ISD::ArgFlagsTy Flags) const {
1098
1099   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1100                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
1101   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
1102
1103   if (NextVA.isRegLoc())
1104     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
1105   else {
1106     assert(NextVA.isMemLoc());
1107     if (StackPtr.getNode() == 0)
1108       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1109
1110     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
1111                                            dl, DAG, NextVA,
1112                                            Flags));
1113   }
1114 }
1115
1116 /// LowerCall - Lowering a call into a callseq_start <-
1117 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1118 /// nodes.
1119 SDValue
1120 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1121                              CallingConv::ID CallConv, bool isVarArg,
1122                              bool &isTailCall,
1123                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1124                              const SmallVectorImpl<SDValue> &OutVals,
1125                              const SmallVectorImpl<ISD::InputArg> &Ins,
1126                              DebugLoc dl, SelectionDAG &DAG,
1127                              SmallVectorImpl<SDValue> &InVals) const {
1128   MachineFunction &MF = DAG.getMachineFunction();
1129   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1130   bool IsSibCall = false;
1131   // Temporarily disable tail calls so things don't break.
1132   if (!EnableARMTailCalls)
1133     isTailCall = false;
1134   if (isTailCall) {
1135     // Check if it's really possible to do a tail call.
1136     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1137                     isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
1138                                                    Outs, OutVals, Ins, DAG);
1139     // We don't support GuaranteedTailCallOpt for ARM, only automatically
1140     // detected sibcalls.
1141     if (isTailCall) {
1142       ++NumTailCalls;
1143       IsSibCall = true;
1144     }
1145   }
1146
1147   // Analyze operands of the call, assigning locations to each operand.
1148   SmallVector<CCValAssign, 16> ArgLocs;
1149   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1150                  *DAG.getContext());
1151   CCInfo.AnalyzeCallOperands(Outs,
1152                              CCAssignFnForNode(CallConv, /* Return*/ false,
1153                                                isVarArg));
1154
1155   // Get a count of how many bytes are to be pushed on the stack.
1156   unsigned NumBytes = CCInfo.getNextStackOffset();
1157
1158   // For tail calls, memory operands are available in our caller's stack.
1159   if (IsSibCall)
1160     NumBytes = 0;
1161
1162   // Adjust the stack pointer for the new arguments...
1163   // These operations are automatically eliminated by the prolog/epilog pass
1164   if (!IsSibCall)
1165     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1166
1167   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
1168
1169   RegsToPassVector RegsToPass;
1170   SmallVector<SDValue, 8> MemOpChains;
1171
1172   // Walk the register/memloc assignments, inserting copies/loads.  In the case
1173   // of tail call optimization, arguments are handled later.
1174   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1175        i != e;
1176        ++i, ++realArgIdx) {
1177     CCValAssign &VA = ArgLocs[i];
1178     SDValue Arg = OutVals[realArgIdx];
1179     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1180
1181     // Promote the value if needed.
1182     switch (VA.getLocInfo()) {
1183     default: llvm_unreachable("Unknown loc info!");
1184     case CCValAssign::Full: break;
1185     case CCValAssign::SExt:
1186       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1187       break;
1188     case CCValAssign::ZExt:
1189       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1190       break;
1191     case CCValAssign::AExt:
1192       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1193       break;
1194     case CCValAssign::BCvt:
1195       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1196       break;
1197     }
1198
1199     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1200     if (VA.needsCustom()) {
1201       if (VA.getLocVT() == MVT::v2f64) {
1202         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1203                                   DAG.getConstant(0, MVT::i32));
1204         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1205                                   DAG.getConstant(1, MVT::i32));
1206
1207         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1208                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1209
1210         VA = ArgLocs[++i]; // skip ahead to next loc
1211         if (VA.isRegLoc()) {
1212           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1213                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1214         } else {
1215           assert(VA.isMemLoc());
1216
1217           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1218                                                  dl, DAG, VA, Flags));
1219         }
1220       } else {
1221         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1222                          StackPtr, MemOpChains, Flags);
1223       }
1224     } else if (VA.isRegLoc()) {
1225       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1226     } else if (!IsSibCall) {
1227       assert(VA.isMemLoc());
1228
1229       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1230                                              dl, DAG, VA, Flags));
1231     }
1232   }
1233
1234   if (!MemOpChains.empty())
1235     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1236                         &MemOpChains[0], MemOpChains.size());
1237
1238   // Build a sequence of copy-to-reg nodes chained together with token chain
1239   // and flag operands which copy the outgoing args into the appropriate regs.
1240   SDValue InFlag;
1241   // Tail call byval lowering might overwrite argument registers so in case of
1242   // tail call optimization the copies to registers are lowered later.
1243   if (!isTailCall)
1244     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1245       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1246                                RegsToPass[i].second, InFlag);
1247       InFlag = Chain.getValue(1);
1248     }
1249
1250   // For tail calls lower the arguments to the 'real' stack slot.
1251   if (isTailCall) {
1252     // Force all the incoming stack arguments to be loaded from the stack
1253     // before any new outgoing arguments are stored to the stack, because the
1254     // outgoing stack slots may alias the incoming argument stack slots, and
1255     // the alias isn't otherwise explicit. This is slightly more conservative
1256     // than necessary, because it means that each store effectively depends
1257     // on every argument instead of just those arguments it would clobber.
1258
1259     // Do not flag preceeding copytoreg stuff together with the following stuff.
1260     InFlag = SDValue();
1261     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1262       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1263                                RegsToPass[i].second, InFlag);
1264       InFlag = Chain.getValue(1);
1265     }
1266     InFlag =SDValue();
1267   }
1268
1269   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1270   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1271   // node so that legalize doesn't hack it.
1272   bool isDirect = false;
1273   bool isARMFunc = false;
1274   bool isLocalARMFunc = false;
1275   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1276
1277   if (EnableARMLongCalls) {
1278     assert (getTargetMachine().getRelocationModel() == Reloc::Static
1279             && "long-calls with non-static relocation model!");
1280     // Handle a global address or an external symbol. If it's not one of
1281     // those, the target's already in a register, so we don't need to do
1282     // anything extra.
1283     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1284       const GlobalValue *GV = G->getGlobal();
1285       // Create a constant pool entry for the callee address
1286       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1287       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1288                                                            ARMPCLabelIndex,
1289                                                            ARMCP::CPValue, 0);
1290       // Get the address of the callee into a register
1291       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1292       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1293       Callee = DAG.getLoad(getPointerTy(), dl,
1294                            DAG.getEntryNode(), CPAddr,
1295                            PseudoSourceValue::getConstantPool(), 0,
1296                            false, false, 0);
1297     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1298       const char *Sym = S->getSymbol();
1299
1300       // Create a constant pool entry for the callee address
1301       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1302       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1303                                                        Sym, ARMPCLabelIndex, 0);
1304       // Get the address of the callee into a register
1305       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1306       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1307       Callee = DAG.getLoad(getPointerTy(), dl,
1308                            DAG.getEntryNode(), CPAddr,
1309                            PseudoSourceValue::getConstantPool(), 0,
1310                            false, false, 0);
1311     }
1312   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1313     const GlobalValue *GV = G->getGlobal();
1314     isDirect = true;
1315     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1316     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1317                    getTargetMachine().getRelocationModel() != Reloc::Static;
1318     isARMFunc = !Subtarget->isThumb() || isStub;
1319     // ARM call to a local ARM function is predicable.
1320     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
1321     // tBX takes a register source operand.
1322     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1323       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1324       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1325                                                            ARMPCLabelIndex,
1326                                                            ARMCP::CPValue, 4);
1327       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1328       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1329       Callee = DAG.getLoad(getPointerTy(), dl,
1330                            DAG.getEntryNode(), CPAddr,
1331                            PseudoSourceValue::getConstantPool(), 0,
1332                            false, false, 0);
1333       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1334       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1335                            getPointerTy(), Callee, PICLabel);
1336     } else
1337       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
1338   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1339     isDirect = true;
1340     bool isStub = Subtarget->isTargetDarwin() &&
1341                   getTargetMachine().getRelocationModel() != Reloc::Static;
1342     isARMFunc = !Subtarget->isThumb() || isStub;
1343     // tBX takes a register source operand.
1344     const char *Sym = S->getSymbol();
1345     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1346       unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1347       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1348                                                        Sym, ARMPCLabelIndex, 4);
1349       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1350       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1351       Callee = DAG.getLoad(getPointerTy(), dl,
1352                            DAG.getEntryNode(), CPAddr,
1353                            PseudoSourceValue::getConstantPool(), 0,
1354                            false, false, 0);
1355       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1356       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1357                            getPointerTy(), Callee, PICLabel);
1358     } else
1359       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1360   }
1361
1362   // FIXME: handle tail calls differently.
1363   unsigned CallOpc;
1364   if (Subtarget->isThumb()) {
1365     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1366       CallOpc = ARMISD::CALL_NOLINK;
1367     else
1368       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1369   } else {
1370     CallOpc = (isDirect || Subtarget->hasV5TOps())
1371       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1372       : ARMISD::CALL_NOLINK;
1373   }
1374
1375   std::vector<SDValue> Ops;
1376   Ops.push_back(Chain);
1377   Ops.push_back(Callee);
1378
1379   // Add argument registers to the end of the list so that they are known live
1380   // into the call.
1381   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1382     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1383                                   RegsToPass[i].second.getValueType()));
1384
1385   if (InFlag.getNode())
1386     Ops.push_back(InFlag);
1387
1388   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1389   if (isTailCall)
1390     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
1391
1392   // Returns a chain and a flag for retval copy to use.
1393   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
1394   InFlag = Chain.getValue(1);
1395
1396   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1397                              DAG.getIntPtrConstant(0, true), InFlag);
1398   if (!Ins.empty())
1399     InFlag = Chain.getValue(1);
1400
1401   // Handle result values, copying them out of physregs into vregs that we
1402   // return.
1403   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1404                          dl, DAG, InVals);
1405 }
1406
1407 /// MatchingStackOffset - Return true if the given stack call argument is
1408 /// already available in the same position (relatively) of the caller's
1409 /// incoming argument stack.
1410 static
1411 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
1412                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
1413                          const ARMInstrInfo *TII) {
1414   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
1415   int FI = INT_MAX;
1416   if (Arg.getOpcode() == ISD::CopyFromReg) {
1417     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
1418     if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
1419       return false;
1420     MachineInstr *Def = MRI->getVRegDef(VR);
1421     if (!Def)
1422       return false;
1423     if (!Flags.isByVal()) {
1424       if (!TII->isLoadFromStackSlot(Def, FI))
1425         return false;
1426     } else {
1427       return false;
1428     }
1429   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
1430     if (Flags.isByVal())
1431       // ByVal argument is passed in as a pointer but it's now being
1432       // dereferenced. e.g.
1433       // define @foo(%struct.X* %A) {
1434       //   tail call @bar(%struct.X* byval %A)
1435       // }
1436       return false;
1437     SDValue Ptr = Ld->getBasePtr();
1438     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
1439     if (!FINode)
1440       return false;
1441     FI = FINode->getIndex();
1442   } else
1443     return false;
1444
1445   assert(FI != INT_MAX);
1446   if (!MFI->isFixedObjectIndex(FI))
1447     return false;
1448   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
1449 }
1450
1451 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
1452 /// for tail call optimization. Targets which want to do tail call
1453 /// optimization should implement this function.
1454 bool
1455 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
1456                                                      CallingConv::ID CalleeCC,
1457                                                      bool isVarArg,
1458                                                      bool isCalleeStructRet,
1459                                                      bool isCallerStructRet,
1460                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1461                                     const SmallVectorImpl<SDValue> &OutVals,
1462                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1463                                                      SelectionDAG& DAG) const {
1464   const Function *CallerF = DAG.getMachineFunction().getFunction();
1465   CallingConv::ID CallerCC = CallerF->getCallingConv();
1466   bool CCMatch = CallerCC == CalleeCC;
1467
1468   // Look for obvious safe cases to perform tail call optimization that do not
1469   // require ABI changes. This is what gcc calls sibcall.
1470
1471   // Do not sibcall optimize vararg calls unless the call site is not passing
1472   // any arguments.
1473   if (isVarArg && !Outs.empty())
1474     return false;
1475
1476   // Also avoid sibcall optimization if either caller or callee uses struct
1477   // return semantics.
1478   if (isCalleeStructRet || isCallerStructRet)
1479     return false;
1480
1481   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
1482   // emitEpilogue is not ready for them.
1483   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
1484   // LR.  This means if we need to reload LR, it takes an extra instructions,
1485   // which outweighs the value of the tail call; but here we don't know yet
1486   // whether LR is going to be used.  Probably the right approach is to
1487   // generate the tail call here and turn it back into CALL/RET in
1488   // emitEpilogue if LR is used.
1489   if (Subtarget->isThumb1Only())
1490     return false;
1491
1492   // For the moment, we can only do this to functions defined in this
1493   // compilation, or to indirect calls.  A Thumb B to an ARM function,
1494   // or vice versa, is not easily fixed up in the linker unlike BL.
1495   // (We could do this by loading the address of the callee into a register;
1496   // that is an extra instruction over the direct call and burns a register
1497   // as well, so is not likely to be a win.)
1498
1499   // It might be safe to remove this restriction on non-Darwin.
1500
1501   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
1502   // but we need to make sure there are enough registers; the only valid
1503   // registers are the 4 used for parameters.  We don't currently do this
1504   // case.
1505   if (isa<ExternalSymbolSDNode>(Callee))
1506       return false;
1507
1508   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1509     const GlobalValue *GV = G->getGlobal();
1510     if (GV->isDeclaration() || GV->isWeakForLinker())
1511       return false;
1512   }
1513
1514   // If the calling conventions do not match, then we'd better make sure the
1515   // results are returned in the same way as what the caller expects.
1516   if (!CCMatch) {
1517     SmallVector<CCValAssign, 16> RVLocs1;
1518     CCState CCInfo1(CalleeCC, false, getTargetMachine(),
1519                     RVLocs1, *DAG.getContext());
1520     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
1521
1522     SmallVector<CCValAssign, 16> RVLocs2;
1523     CCState CCInfo2(CallerCC, false, getTargetMachine(),
1524                     RVLocs2, *DAG.getContext());
1525     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
1526
1527     if (RVLocs1.size() != RVLocs2.size())
1528       return false;
1529     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
1530       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
1531         return false;
1532       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
1533         return false;
1534       if (RVLocs1[i].isRegLoc()) {
1535         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
1536           return false;
1537       } else {
1538         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
1539           return false;
1540       }
1541     }
1542   }
1543
1544   // If the callee takes no arguments then go on to check the results of the
1545   // call.
1546   if (!Outs.empty()) {
1547     // Check if stack adjustment is needed. For now, do not do this if any
1548     // argument is passed on the stack.
1549     SmallVector<CCValAssign, 16> ArgLocs;
1550     CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
1551                    ArgLocs, *DAG.getContext());
1552     CCInfo.AnalyzeCallOperands(Outs,
1553                                CCAssignFnForNode(CalleeCC, false, isVarArg));
1554     if (CCInfo.getNextStackOffset()) {
1555       MachineFunction &MF = DAG.getMachineFunction();
1556
1557       // Check if the arguments are already laid out in the right way as
1558       // the caller's fixed stack objects.
1559       MachineFrameInfo *MFI = MF.getFrameInfo();
1560       const MachineRegisterInfo *MRI = &MF.getRegInfo();
1561       const ARMInstrInfo *TII =
1562         ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
1563       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1564            i != e;
1565            ++i, ++realArgIdx) {
1566         CCValAssign &VA = ArgLocs[i];
1567         EVT RegVT = VA.getLocVT();
1568         SDValue Arg = OutVals[realArgIdx];
1569         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1570         if (VA.getLocInfo() == CCValAssign::Indirect)
1571           return false;
1572         if (VA.needsCustom()) {
1573           // f64 and vector types are split into multiple registers or
1574           // register/stack-slot combinations.  The types will not match
1575           // the registers; give up on memory f64 refs until we figure
1576           // out what to do about this.
1577           if (!VA.isRegLoc())
1578             return false;
1579           if (!ArgLocs[++i].isRegLoc())
1580             return false;
1581           if (RegVT == MVT::v2f64) {
1582             if (!ArgLocs[++i].isRegLoc())
1583               return false;
1584             if (!ArgLocs[++i].isRegLoc())
1585               return false;
1586           }
1587         } else if (!VA.isRegLoc()) {
1588           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
1589                                    MFI, MRI, TII))
1590             return false;
1591         }
1592       }
1593     }
1594   }
1595
1596   return true;
1597 }
1598
1599 SDValue
1600 ARMTargetLowering::LowerReturn(SDValue Chain,
1601                                CallingConv::ID CallConv, bool isVarArg,
1602                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1603                                const SmallVectorImpl<SDValue> &OutVals,
1604                                DebugLoc dl, SelectionDAG &DAG) const {
1605
1606   // CCValAssign - represent the assignment of the return value to a location.
1607   SmallVector<CCValAssign, 16> RVLocs;
1608
1609   // CCState - Info about the registers and stack slots.
1610   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1611                  *DAG.getContext());
1612
1613   // Analyze outgoing return values.
1614   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1615                                                isVarArg));
1616
1617   // If this is the first return lowered for this function, add
1618   // the regs to the liveout set for the function.
1619   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1620     for (unsigned i = 0; i != RVLocs.size(); ++i)
1621       if (RVLocs[i].isRegLoc())
1622         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1623   }
1624
1625   SDValue Flag;
1626
1627   // Copy the result values into the output registers.
1628   for (unsigned i = 0, realRVLocIdx = 0;
1629        i != RVLocs.size();
1630        ++i, ++realRVLocIdx) {
1631     CCValAssign &VA = RVLocs[i];
1632     assert(VA.isRegLoc() && "Can only return in registers!");
1633
1634     SDValue Arg = OutVals[realRVLocIdx];
1635
1636     switch (VA.getLocInfo()) {
1637     default: llvm_unreachable("Unknown loc info!");
1638     case CCValAssign::Full: break;
1639     case CCValAssign::BCvt:
1640       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1641       break;
1642     }
1643
1644     if (VA.needsCustom()) {
1645       if (VA.getLocVT() == MVT::v2f64) {
1646         // Extract the first half and return it in two registers.
1647         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1648                                    DAG.getConstant(0, MVT::i32));
1649         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1650                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
1651
1652         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1653         Flag = Chain.getValue(1);
1654         VA = RVLocs[++i]; // skip ahead to next loc
1655         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1656                                  HalfGPRs.getValue(1), Flag);
1657         Flag = Chain.getValue(1);
1658         VA = RVLocs[++i]; // skip ahead to next loc
1659
1660         // Extract the 2nd half and fall through to handle it as an f64 value.
1661         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1662                           DAG.getConstant(1, MVT::i32));
1663       }
1664       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1665       // available.
1666       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1667                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1668       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1669       Flag = Chain.getValue(1);
1670       VA = RVLocs[++i]; // skip ahead to next loc
1671       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1672                                Flag);
1673     } else
1674       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1675
1676     // Guarantee that all emitted copies are
1677     // stuck together, avoiding something bad.
1678     Flag = Chain.getValue(1);
1679   }
1680
1681   SDValue result;
1682   if (Flag.getNode())
1683     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1684   else // Return Void
1685     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1686
1687   return result;
1688 }
1689
1690 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1691 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1692 // one of the above mentioned nodes. It has to be wrapped because otherwise
1693 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1694 // be used to form addressing mode. These wrapped nodes will be selected
1695 // into MOVi.
1696 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1697   EVT PtrVT = Op.getValueType();
1698   // FIXME there is no actual debug info here
1699   DebugLoc dl = Op.getDebugLoc();
1700   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1701   SDValue Res;
1702   if (CP->isMachineConstantPoolEntry())
1703     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1704                                     CP->getAlignment());
1705   else
1706     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1707                                     CP->getAlignment());
1708   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1709 }
1710
1711 unsigned ARMTargetLowering::getJumpTableEncoding() const {
1712   return MachineJumpTableInfo::EK_Inline;
1713 }
1714
1715 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1716                                              SelectionDAG &DAG) const {
1717   MachineFunction &MF = DAG.getMachineFunction();
1718   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1719   unsigned ARMPCLabelIndex = 0;
1720   DebugLoc DL = Op.getDebugLoc();
1721   EVT PtrVT = getPointerTy();
1722   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1723   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1724   SDValue CPAddr;
1725   if (RelocM == Reloc::Static) {
1726     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1727   } else {
1728     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1729     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1730     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1731                                                          ARMCP::CPBlockAddress,
1732                                                          PCAdj);
1733     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1734   }
1735   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1736   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1737                                PseudoSourceValue::getConstantPool(), 0,
1738                                false, false, 0);
1739   if (RelocM == Reloc::Static)
1740     return Result;
1741   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1742   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1743 }
1744
1745 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1746 SDValue
1747 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1748                                                  SelectionDAG &DAG) const {
1749   DebugLoc dl = GA->getDebugLoc();
1750   EVT PtrVT = getPointerTy();
1751   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1752   MachineFunction &MF = DAG.getMachineFunction();
1753   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1754   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1755   ARMConstantPoolValue *CPV =
1756     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1757                              ARMCP::CPValue, PCAdj, "tlsgd", true);
1758   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1759   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1760   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1761                          PseudoSourceValue::getConstantPool(), 0,
1762                          false, false, 0);
1763   SDValue Chain = Argument.getValue(1);
1764
1765   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1766   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1767
1768   // call __tls_get_addr.
1769   ArgListTy Args;
1770   ArgListEntry Entry;
1771   Entry.Node = Argument;
1772   Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1773   Args.push_back(Entry);
1774   // FIXME: is there useful debug info available here?
1775   std::pair<SDValue, SDValue> CallResult =
1776     LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1777                 false, false, false, false,
1778                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1779                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1780   return CallResult.first;
1781 }
1782
1783 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1784 // "local exec" model.
1785 SDValue
1786 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1787                                         SelectionDAG &DAG) const {
1788   const GlobalValue *GV = GA->getGlobal();
1789   DebugLoc dl = GA->getDebugLoc();
1790   SDValue Offset;
1791   SDValue Chain = DAG.getEntryNode();
1792   EVT PtrVT = getPointerTy();
1793   // Get the Thread Pointer
1794   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1795
1796   if (GV->isDeclaration()) {
1797     MachineFunction &MF = DAG.getMachineFunction();
1798     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1799     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1800     // Initial exec model.
1801     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1802     ARMConstantPoolValue *CPV =
1803       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1804                                ARMCP::CPValue, PCAdj, "gottpoff", true);
1805     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1806     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1807     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1808                          PseudoSourceValue::getConstantPool(), 0,
1809                          false, false, 0);
1810     Chain = Offset.getValue(1);
1811
1812     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1813     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1814
1815     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1816                          PseudoSourceValue::getConstantPool(), 0,
1817                          false, false, 0);
1818   } else {
1819     // local exec model
1820     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1821     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1822     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1823     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1824                          PseudoSourceValue::getConstantPool(), 0,
1825                          false, false, 0);
1826   }
1827
1828   // The address of the thread local variable is the add of the thread
1829   // pointer with the offset of the variable.
1830   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1831 }
1832
1833 SDValue
1834 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1835   // TODO: implement the "local dynamic" model
1836   assert(Subtarget->isTargetELF() &&
1837          "TLS not implemented for non-ELF targets");
1838   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1839   // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1840   // otherwise use the "Local Exec" TLS Model
1841   if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1842     return LowerToTLSGeneralDynamicModel(GA, DAG);
1843   else
1844     return LowerToTLSExecModels(GA, DAG);
1845 }
1846
1847 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1848                                                  SelectionDAG &DAG) const {
1849   EVT PtrVT = getPointerTy();
1850   DebugLoc dl = Op.getDebugLoc();
1851   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1852   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1853   if (RelocM == Reloc::PIC_) {
1854     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1855     ARMConstantPoolValue *CPV =
1856       new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1857     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1858     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1859     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1860                                  CPAddr,
1861                                  PseudoSourceValue::getConstantPool(), 0,
1862                                  false, false, 0);
1863     SDValue Chain = Result.getValue(1);
1864     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1865     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1866     if (!UseGOTOFF)
1867       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1868                            PseudoSourceValue::getGOT(), 0,
1869                            false, false, 0);
1870     return Result;
1871   } else {
1872     // If we have T2 ops, we can materialize the address directly via movt/movw
1873     // pair. This is always cheaper.
1874     if (Subtarget->useMovt()) {
1875       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1876                          DAG.getTargetGlobalAddress(GV, dl, PtrVT));
1877     } else {
1878       SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1879       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1880       return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1881                          PseudoSourceValue::getConstantPool(), 0,
1882                          false, false, 0);
1883     }
1884   }
1885 }
1886
1887 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1888                                                     SelectionDAG &DAG) const {
1889   MachineFunction &MF = DAG.getMachineFunction();
1890   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1891   unsigned ARMPCLabelIndex = 0;
1892   EVT PtrVT = getPointerTy();
1893   DebugLoc dl = Op.getDebugLoc();
1894   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1895   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1896   SDValue CPAddr;
1897   if (RelocM == Reloc::Static)
1898     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1899   else {
1900     ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1901     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1902     ARMConstantPoolValue *CPV =
1903       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1904     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1905   }
1906   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1907
1908   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1909                                PseudoSourceValue::getConstantPool(), 0,
1910                                false, false, 0);
1911   SDValue Chain = Result.getValue(1);
1912
1913   if (RelocM == Reloc::PIC_) {
1914     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1915     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1916   }
1917
1918   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1919     Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1920                          PseudoSourceValue::getGOT(), 0,
1921                          false, false, 0);
1922
1923   return Result;
1924 }
1925
1926 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1927                                                     SelectionDAG &DAG) const {
1928   assert(Subtarget->isTargetELF() &&
1929          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1930   MachineFunction &MF = DAG.getMachineFunction();
1931   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1932   unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1933   EVT PtrVT = getPointerTy();
1934   DebugLoc dl = Op.getDebugLoc();
1935   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1936   ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1937                                                        "_GLOBAL_OFFSET_TABLE_",
1938                                                        ARMPCLabelIndex, PCAdj);
1939   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1940   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1941   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1942                                PseudoSourceValue::getConstantPool(), 0,
1943                                false, false, 0);
1944   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1945   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1946 }
1947
1948 SDValue
1949 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
1950   DebugLoc dl = Op.getDebugLoc();
1951   SDValue Val = DAG.getConstant(0, MVT::i32);
1952   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
1953                      Op.getOperand(1), Val);
1954 }
1955
1956 SDValue
1957 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
1958   DebugLoc dl = Op.getDebugLoc();
1959   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
1960                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
1961 }
1962
1963 SDValue
1964 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1965                                           const ARMSubtarget *Subtarget) const {
1966   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1967   DebugLoc dl = Op.getDebugLoc();
1968   switch (IntNo) {
1969   default: return SDValue();    // Don't custom lower most intrinsics.
1970   case Intrinsic::arm_thread_pointer: {
1971     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1972     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1973   }
1974   case Intrinsic::eh_sjlj_lsda: {
1975     MachineFunction &MF = DAG.getMachineFunction();
1976     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1977     unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1978     EVT PtrVT = getPointerTy();
1979     DebugLoc dl = Op.getDebugLoc();
1980     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1981     SDValue CPAddr;
1982     unsigned PCAdj = (RelocM != Reloc::PIC_)
1983       ? 0 : (Subtarget->isThumb() ? 4 : 8);
1984     ARMConstantPoolValue *CPV =
1985       new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1986                                ARMCP::CPLSDA, PCAdj);
1987     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1988     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1989     SDValue Result =
1990       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1991                   PseudoSourceValue::getConstantPool(), 0,
1992                   false, false, 0);
1993
1994     if (RelocM == Reloc::PIC_) {
1995       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1996       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1997     }
1998     return Result;
1999   }
2000   }
2001 }
2002
2003 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
2004                                const ARMSubtarget *Subtarget) {
2005   DebugLoc dl = Op.getDebugLoc();
2006   SDValue Op5 = Op.getOperand(5);
2007   unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
2008   // Some subtargets which have dmb and dsb instructions can handle barriers
2009   // directly. Some ARMv6 cpus can support them with the help of mcr
2010   // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
2011   // never get here.
2012   unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
2013   if (Subtarget->hasDataBarrier())
2014     return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
2015   else {
2016     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
2017            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
2018     return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
2019                        DAG.getConstant(0, MVT::i32));
2020   }
2021 }
2022
2023 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
2024   MachineFunction &MF = DAG.getMachineFunction();
2025   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
2026
2027   // vastart just stores the address of the VarArgsFrameIndex slot into the
2028   // memory location argument.
2029   DebugLoc dl = Op.getDebugLoc();
2030   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2031   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2032   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2033   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
2034                       false, false, 0);
2035 }
2036
2037 SDValue
2038 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
2039                                         SDValue &Root, SelectionDAG &DAG,
2040                                         DebugLoc dl) const {
2041   MachineFunction &MF = DAG.getMachineFunction();
2042   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2043
2044   TargetRegisterClass *RC;
2045   if (AFI->isThumb1OnlyFunction())
2046     RC = ARM::tGPRRegisterClass;
2047   else
2048     RC = ARM::GPRRegisterClass;
2049
2050   // Transform the arguments stored in physical registers into virtual ones.
2051   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2052   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2053
2054   SDValue ArgValue2;
2055   if (NextVA.isMemLoc()) {
2056     MachineFrameInfo *MFI = MF.getFrameInfo();
2057     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
2058
2059     // Create load node to retrieve arguments from the stack.
2060     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2061     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
2062                             PseudoSourceValue::getFixedStack(FI), 0,
2063                             false, false, 0);
2064   } else {
2065     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2066     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
2067   }
2068
2069   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
2070 }
2071
2072 SDValue
2073 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
2074                                         CallingConv::ID CallConv, bool isVarArg,
2075                                         const SmallVectorImpl<ISD::InputArg>
2076                                           &Ins,
2077                                         DebugLoc dl, SelectionDAG &DAG,
2078                                         SmallVectorImpl<SDValue> &InVals)
2079                                           const {
2080
2081   MachineFunction &MF = DAG.getMachineFunction();
2082   MachineFrameInfo *MFI = MF.getFrameInfo();
2083
2084   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2085
2086   // Assign locations to all of the incoming arguments.
2087   SmallVector<CCValAssign, 16> ArgLocs;
2088   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
2089                  *DAG.getContext());
2090   CCInfo.AnalyzeFormalArguments(Ins,
2091                                 CCAssignFnForNode(CallConv, /* Return*/ false,
2092                                                   isVarArg));
2093
2094   SmallVector<SDValue, 16> ArgValues;
2095
2096   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2097     CCValAssign &VA = ArgLocs[i];
2098
2099     // Arguments stored in registers.
2100     if (VA.isRegLoc()) {
2101       EVT RegVT = VA.getLocVT();
2102
2103       SDValue ArgValue;
2104       if (VA.needsCustom()) {
2105         // f64 and vector types are split up into multiple registers or
2106         // combinations of registers and stack slots.
2107         if (VA.getLocVT() == MVT::v2f64) {
2108           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
2109                                                    Chain, DAG, dl);
2110           VA = ArgLocs[++i]; // skip ahead to next loc
2111           SDValue ArgValue2;
2112           if (VA.isMemLoc()) {
2113             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
2114             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2115             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
2116                                     PseudoSourceValue::getFixedStack(FI), 0,
2117                                     false, false, 0);
2118           } else {
2119             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
2120                                              Chain, DAG, dl);
2121           }
2122           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2123           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2124                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
2125           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
2126                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
2127         } else
2128           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
2129
2130       } else {
2131         TargetRegisterClass *RC;
2132
2133         if (RegVT == MVT::f32)
2134           RC = ARM::SPRRegisterClass;
2135         else if (RegVT == MVT::f64)
2136           RC = ARM::DPRRegisterClass;
2137         else if (RegVT == MVT::v2f64)
2138           RC = ARM::QPRRegisterClass;
2139         else if (RegVT == MVT::i32)
2140           RC = (AFI->isThumb1OnlyFunction() ?
2141                 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
2142         else
2143           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2144
2145         // Transform the arguments in physical registers into virtual ones.
2146         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2147         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2148       }
2149
2150       // If this is an 8 or 16-bit value, it is really passed promoted
2151       // to 32 bits.  Insert an assert[sz]ext to capture this, then
2152       // truncate to the right size.
2153       switch (VA.getLocInfo()) {
2154       default: llvm_unreachable("Unknown loc info!");
2155       case CCValAssign::Full: break;
2156       case CCValAssign::BCvt:
2157         ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
2158         break;
2159       case CCValAssign::SExt:
2160         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2161                                DAG.getValueType(VA.getValVT()));
2162         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2163         break;
2164       case CCValAssign::ZExt:
2165         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2166                                DAG.getValueType(VA.getValVT()));
2167         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2168         break;
2169       }
2170
2171       InVals.push_back(ArgValue);
2172
2173     } else { // VA.isRegLoc()
2174
2175       // sanity check
2176       assert(VA.isMemLoc());
2177       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
2178
2179       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
2180       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
2181
2182       // Create load nodes to retrieve arguments from the stack.
2183       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2184       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2185                                    PseudoSourceValue::getFixedStack(FI), 0,
2186                                    false, false, 0));
2187     }
2188   }
2189
2190   // varargs
2191   if (isVarArg) {
2192     static const unsigned GPRArgRegs[] = {
2193       ARM::R0, ARM::R1, ARM::R2, ARM::R3
2194     };
2195
2196     unsigned NumGPRs = CCInfo.getFirstUnallocated
2197       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
2198
2199     unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
2200     unsigned VARegSize = (4 - NumGPRs) * 4;
2201     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
2202     unsigned ArgOffset = CCInfo.getNextStackOffset();
2203     if (VARegSaveSize) {
2204       // If this function is vararg, store any remaining integer argument regs
2205       // to their spots on the stack so that they may be loaded by deferencing
2206       // the result of va_next.
2207       AFI->setVarArgsRegSaveSize(VARegSaveSize);
2208       AFI->setVarArgsFrameIndex(
2209         MFI->CreateFixedObject(VARegSaveSize,
2210                                ArgOffset + VARegSaveSize - VARegSize,
2211                                true));
2212       SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
2213                                       getPointerTy());
2214
2215       SmallVector<SDValue, 4> MemOps;
2216       for (; NumGPRs < 4; ++NumGPRs) {
2217         TargetRegisterClass *RC;
2218         if (AFI->isThumb1OnlyFunction())
2219           RC = ARM::tGPRRegisterClass;
2220         else
2221           RC = ARM::GPRRegisterClass;
2222
2223         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
2224         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2225         SDValue Store =
2226           DAG.getStore(Val.getValue(1), dl, Val, FIN,
2227                PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
2228                0, false, false, 0);
2229         MemOps.push_back(Store);
2230         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
2231                           DAG.getConstant(4, getPointerTy()));
2232       }
2233       if (!MemOps.empty())
2234         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2235                             &MemOps[0], MemOps.size());
2236     } else
2237       // This will point to the next argument passed via stack.
2238       AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
2239   }
2240
2241   return Chain;
2242 }
2243
2244 /// isFloatingPointZero - Return true if this is +0.0.
2245 static bool isFloatingPointZero(SDValue Op) {
2246   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
2247     return CFP->getValueAPF().isPosZero();
2248   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
2249     // Maybe this has already been legalized into the constant pool?
2250     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
2251       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
2252       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
2253         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
2254           return CFP->getValueAPF().isPosZero();
2255     }
2256   }
2257   return false;
2258 }
2259
2260 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
2261 /// the given operands.
2262 SDValue
2263 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2264                              SDValue &ARMcc, SelectionDAG &DAG,
2265                              DebugLoc dl) const {
2266   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2267     unsigned C = RHSC->getZExtValue();
2268     if (!isLegalICmpImmediate(C)) {
2269       // Constant does not fit, try adjusting it by one?
2270       switch (CC) {
2271       default: break;
2272       case ISD::SETLT:
2273       case ISD::SETGE:
2274         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
2275           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2276           RHS = DAG.getConstant(C-1, MVT::i32);
2277         }
2278         break;
2279       case ISD::SETULT:
2280       case ISD::SETUGE:
2281         if (C != 0 && isLegalICmpImmediate(C-1)) {
2282           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2283           RHS = DAG.getConstant(C-1, MVT::i32);
2284         }
2285         break;
2286       case ISD::SETLE:
2287       case ISD::SETGT:
2288         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
2289           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2290           RHS = DAG.getConstant(C+1, MVT::i32);
2291         }
2292         break;
2293       case ISD::SETULE:
2294       case ISD::SETUGT:
2295         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
2296           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2297           RHS = DAG.getConstant(C+1, MVT::i32);
2298         }
2299         break;
2300       }
2301     }
2302   }
2303
2304   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2305   ARMISD::NodeType CompareType;
2306   switch (CondCode) {
2307   default:
2308     CompareType = ARMISD::CMP;
2309     break;
2310   case ARMCC::EQ:
2311   case ARMCC::NE:
2312     // Uses only Z Flag
2313     CompareType = ARMISD::CMPZ;
2314     break;
2315   }
2316   ARMcc = DAG.getConstant(CondCode, MVT::i32);
2317   return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
2318 }
2319
2320 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
2321 SDValue
2322 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
2323                              DebugLoc dl) const {
2324   SDValue Cmp;
2325   if (!isFloatingPointZero(RHS))
2326     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
2327   else
2328     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
2329   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
2330 }
2331
2332 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
2333   SDValue Cond = Op.getOperand(0);
2334   SDValue SelectTrue = Op.getOperand(1);
2335   SDValue SelectFalse = Op.getOperand(2);
2336   DebugLoc dl = Op.getDebugLoc();
2337
2338   // Convert:
2339   //
2340   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
2341   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
2342   //
2343   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
2344     const ConstantSDNode *CMOVTrue =
2345       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
2346     const ConstantSDNode *CMOVFalse =
2347       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
2348
2349     if (CMOVTrue && CMOVFalse) {
2350       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
2351       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
2352
2353       SDValue True;
2354       SDValue False;
2355       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
2356         True = SelectTrue;
2357         False = SelectFalse;
2358       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
2359         True = SelectFalse;
2360         False = SelectTrue;
2361       }
2362
2363       if (True.getNode() && False.getNode()) {
2364         EVT VT = Cond.getValueType();
2365         SDValue ARMcc = Cond.getOperand(2);
2366         SDValue CCR = Cond.getOperand(3);
2367         SDValue Cmp = Cond.getOperand(4);
2368         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
2369       }
2370     }
2371   }
2372
2373   return DAG.getSelectCC(dl, Cond,
2374                          DAG.getConstant(0, Cond.getValueType()),
2375                          SelectTrue, SelectFalse, ISD::SETNE);
2376 }
2377
2378 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
2379   EVT VT = Op.getValueType();
2380   SDValue LHS = Op.getOperand(0);
2381   SDValue RHS = Op.getOperand(1);
2382   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2383   SDValue TrueVal = Op.getOperand(2);
2384   SDValue FalseVal = Op.getOperand(3);
2385   DebugLoc dl = Op.getDebugLoc();
2386
2387   if (LHS.getValueType() == MVT::i32) {
2388     SDValue ARMcc;
2389     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2390     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2391     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
2392   }
2393
2394   ARMCC::CondCodes CondCode, CondCode2;
2395   FPCCToARMCC(CC, CondCode, CondCode2);
2396
2397   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2398   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2399   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2400   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
2401                                ARMcc, CCR, Cmp);
2402   if (CondCode2 != ARMCC::AL) {
2403     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
2404     // FIXME: Needs another CMP because flag can have but one use.
2405     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
2406     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
2407                          Result, TrueVal, ARMcc2, CCR, Cmp2);
2408   }
2409   return Result;
2410 }
2411
2412 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
2413 /// to morph to an integer compare sequence.
2414 static bool canChangeToInt(SDValue Op, bool &SeenZero,
2415                            const ARMSubtarget *Subtarget) {
2416   SDNode *N = Op.getNode();
2417   if (!N->hasOneUse())
2418     // Otherwise it requires moving the value from fp to integer registers.
2419     return false;
2420   if (!N->getNumValues())
2421     return false;
2422   EVT VT = Op.getValueType();
2423   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
2424     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
2425     // vmrs are very slow, e.g. cortex-a8.
2426     return false;
2427
2428   if (isFloatingPointZero(Op)) {
2429     SeenZero = true;
2430     return true;
2431   }
2432   return ISD::isNormalLoad(N);
2433 }
2434
2435 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
2436   if (isFloatingPointZero(Op))
2437     return DAG.getConstant(0, MVT::i32);
2438
2439   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
2440     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2441                        Ld->getChain(), Ld->getBasePtr(),
2442                        Ld->getSrcValue(), Ld->getSrcValueOffset(),
2443                        Ld->isVolatile(), Ld->isNonTemporal(),
2444                        Ld->getAlignment());
2445
2446   llvm_unreachable("Unknown VFP cmp argument!");
2447 }
2448
2449 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
2450                            SDValue &RetVal1, SDValue &RetVal2) {
2451   if (isFloatingPointZero(Op)) {
2452     RetVal1 = DAG.getConstant(0, MVT::i32);
2453     RetVal2 = DAG.getConstant(0, MVT::i32);
2454     return;
2455   }
2456
2457   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
2458     SDValue Ptr = Ld->getBasePtr();
2459     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2460                           Ld->getChain(), Ptr,
2461                           Ld->getSrcValue(), Ld->getSrcValueOffset(),
2462                           Ld->isVolatile(), Ld->isNonTemporal(),
2463                           Ld->getAlignment());
2464
2465     EVT PtrType = Ptr.getValueType();
2466     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
2467     SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
2468                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
2469     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
2470                           Ld->getChain(), NewPtr,
2471                           Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
2472                           Ld->isVolatile(), Ld->isNonTemporal(),
2473                           NewAlign);
2474     return;
2475   }
2476
2477   llvm_unreachable("Unknown VFP cmp argument!");
2478 }
2479
2480 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
2481 /// f32 and even f64 comparisons to integer ones.
2482 SDValue
2483 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
2484   SDValue Chain = Op.getOperand(0);
2485   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2486   SDValue LHS = Op.getOperand(2);
2487   SDValue RHS = Op.getOperand(3);
2488   SDValue Dest = Op.getOperand(4);
2489   DebugLoc dl = Op.getDebugLoc();
2490
2491   bool SeenZero = false;
2492   if (canChangeToInt(LHS, SeenZero, Subtarget) &&
2493       canChangeToInt(RHS, SeenZero, Subtarget) &&
2494       // If one of the operand is zero, it's safe to ignore the NaN case since
2495       // we only care about equality comparisons.
2496       (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
2497     // If unsafe fp math optimization is enabled and there are no othter uses of
2498     // the CMP operands, and the condition code is EQ oe NE, we can optimize it
2499     // to an integer comparison.
2500     if (CC == ISD::SETOEQ)
2501       CC = ISD::SETEQ;
2502     else if (CC == ISD::SETUNE)
2503       CC = ISD::SETNE;
2504
2505     SDValue ARMcc;
2506     if (LHS.getValueType() == MVT::f32) {
2507       LHS = bitcastf32Toi32(LHS, DAG);
2508       RHS = bitcastf32Toi32(RHS, DAG);
2509       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2510       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2511       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2512                          Chain, Dest, ARMcc, CCR, Cmp);
2513     }
2514
2515     SDValue LHS1, LHS2;
2516     SDValue RHS1, RHS2;
2517     expandf64Toi32(LHS, DAG, LHS1, LHS2);
2518     expandf64Toi32(RHS, DAG, RHS1, RHS2);
2519     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
2520     ARMcc = DAG.getConstant(CondCode, MVT::i32);
2521     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2522     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
2523     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
2524   }
2525
2526   return SDValue();
2527 }
2528
2529 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2530   SDValue Chain = Op.getOperand(0);
2531   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2532   SDValue LHS = Op.getOperand(2);
2533   SDValue RHS = Op.getOperand(3);
2534   SDValue Dest = Op.getOperand(4);
2535   DebugLoc dl = Op.getDebugLoc();
2536
2537   if (LHS.getValueType() == MVT::i32) {
2538     SDValue ARMcc;
2539     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
2540     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2541     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
2542                        Chain, Dest, ARMcc, CCR, Cmp);
2543   }
2544
2545   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
2546
2547   if (UnsafeFPMath &&
2548       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
2549        CC == ISD::SETNE || CC == ISD::SETUNE)) {
2550     SDValue Result = OptimizeVFPBrcond(Op, DAG);
2551     if (Result.getNode())
2552       return Result;
2553   }
2554
2555   ARMCC::CondCodes CondCode, CondCode2;
2556   FPCCToARMCC(CC, CondCode, CondCode2);
2557
2558   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
2559   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
2560   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2561   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2562   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
2563   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2564   if (CondCode2 != ARMCC::AL) {
2565     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
2566     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
2567     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2568   }
2569   return Res;
2570 }
2571
2572 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2573   SDValue Chain = Op.getOperand(0);
2574   SDValue Table = Op.getOperand(1);
2575   SDValue Index = Op.getOperand(2);
2576   DebugLoc dl = Op.getDebugLoc();
2577
2578   EVT PTy = getPointerTy();
2579   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2580   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2581   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2582   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2583   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2584   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2585   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2586   if (Subtarget->isThumb2()) {
2587     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2588     // which does another jump to the destination. This also makes it easier
2589     // to translate it to TBB / TBH later.
2590     // FIXME: This might not work if the function is extremely large.
2591     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2592                        Addr, Op.getOperand(2), JTI, UId);
2593   }
2594   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2595     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2596                        PseudoSourceValue::getJumpTable(), 0,
2597                        false, false, 0);
2598     Chain = Addr.getValue(1);
2599     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2600     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2601   } else {
2602     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2603                        PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2604     Chain = Addr.getValue(1);
2605     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2606   }
2607 }
2608
2609 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2610   DebugLoc dl = Op.getDebugLoc();
2611   unsigned Opc;
2612
2613   switch (Op.getOpcode()) {
2614   default:
2615     assert(0 && "Invalid opcode!");
2616   case ISD::FP_TO_SINT:
2617     Opc = ARMISD::FTOSI;
2618     break;
2619   case ISD::FP_TO_UINT:
2620     Opc = ARMISD::FTOUI;
2621     break;
2622   }
2623   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2624   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2625 }
2626
2627 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2628   EVT VT = Op.getValueType();
2629   DebugLoc dl = Op.getDebugLoc();
2630   unsigned Opc;
2631
2632   switch (Op.getOpcode()) {
2633   default:
2634     assert(0 && "Invalid opcode!");
2635   case ISD::SINT_TO_FP:
2636     Opc = ARMISD::SITOF;
2637     break;
2638   case ISD::UINT_TO_FP:
2639     Opc = ARMISD::UITOF;
2640     break;
2641   }
2642
2643   Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2644   return DAG.getNode(Opc, dl, VT, Op);
2645 }
2646
2647 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
2648   // Implement fcopysign with a fabs and a conditional fneg.
2649   SDValue Tmp0 = Op.getOperand(0);
2650   SDValue Tmp1 = Op.getOperand(1);
2651   DebugLoc dl = Op.getDebugLoc();
2652   EVT VT = Op.getValueType();
2653   EVT SrcVT = Tmp1.getValueType();
2654   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2655   SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
2656   SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
2657   SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
2658   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2659   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
2660 }
2661
2662 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
2663   MachineFunction &MF = DAG.getMachineFunction();
2664   MachineFrameInfo *MFI = MF.getFrameInfo();
2665   MFI->setReturnAddressIsTaken(true);
2666
2667   EVT VT = Op.getValueType();
2668   DebugLoc dl = Op.getDebugLoc();
2669   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2670   if (Depth) {
2671     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
2672     SDValue Offset = DAG.getConstant(4, MVT::i32);
2673     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
2674                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
2675                        NULL, 0, false, false, 0);
2676   }
2677
2678   // Return LR, which contains the return address. Mark it an implicit live-in.
2679   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
2680   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2681 }
2682
2683 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2684   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2685   MFI->setFrameAddressIsTaken(true);
2686
2687   EVT VT = Op.getValueType();
2688   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
2689   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2690   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2691     ? ARM::R7 : ARM::R11;
2692   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2693   while (Depth--)
2694     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2695                             false, false, 0);
2696   return FrameAddr;
2697 }
2698
2699 /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2700 /// expand a bit convert where either the source or destination type is i64 to
2701 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
2702 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
2703 /// vectors), since the legalizer won't know what to do with that.
2704 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2705   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2706   DebugLoc dl = N->getDebugLoc();
2707   SDValue Op = N->getOperand(0);
2708
2709   // This function is only supposed to be called for i64 types, either as the
2710   // source or destination of the bit convert.
2711   EVT SrcVT = Op.getValueType();
2712   EVT DstVT = N->getValueType(0);
2713   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2714          "ExpandBIT_CONVERT called for non-i64 type");
2715
2716   // Turn i64->f64 into VMOVDRR.
2717   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2718     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2719                              DAG.getConstant(0, MVT::i32));
2720     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2721                              DAG.getConstant(1, MVT::i32));
2722     return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
2723                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
2724   }
2725
2726   // Turn f64->i64 into VMOVRRD.
2727   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2728     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2729                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2730     // Merge the pieces into a single i64 value.
2731     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2732   }
2733
2734   return SDValue();
2735 }
2736
2737 /// getZeroVector - Returns a vector of specified type with all zero elements.
2738 /// Zero vectors are used to represent vector negation and in those cases
2739 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
2740 /// not support i64 elements, so sometimes the zero vectors will need to be
2741 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
2742 /// zero vector.
2743 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2744   assert(VT.isVector() && "Expected a vector type");
2745   // The canonical modified immediate encoding of a zero vector is....0!
2746   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
2747   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
2748   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
2749   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
2750 }
2751
2752 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2753 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2754 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2755                                                 SelectionDAG &DAG) const {
2756   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2757   EVT VT = Op.getValueType();
2758   unsigned VTBits = VT.getSizeInBits();
2759   DebugLoc dl = Op.getDebugLoc();
2760   SDValue ShOpLo = Op.getOperand(0);
2761   SDValue ShOpHi = Op.getOperand(1);
2762   SDValue ShAmt  = Op.getOperand(2);
2763   SDValue ARMcc;
2764   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2765
2766   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2767
2768   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2769                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2770   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2771   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2772                                    DAG.getConstant(VTBits, MVT::i32));
2773   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2774   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2775   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2776
2777   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2778   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2779                           ARMcc, DAG, dl);
2780   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2781   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
2782                            CCR, Cmp);
2783
2784   SDValue Ops[2] = { Lo, Hi };
2785   return DAG.getMergeValues(Ops, 2, dl);
2786 }
2787
2788 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2789 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2790 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2791                                                SelectionDAG &DAG) const {
2792   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2793   EVT VT = Op.getValueType();
2794   unsigned VTBits = VT.getSizeInBits();
2795   DebugLoc dl = Op.getDebugLoc();
2796   SDValue ShOpLo = Op.getOperand(0);
2797   SDValue ShOpHi = Op.getOperand(1);
2798   SDValue ShAmt  = Op.getOperand(2);
2799   SDValue ARMcc;
2800
2801   assert(Op.getOpcode() == ISD::SHL_PARTS);
2802   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2803                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
2804   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2805   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2806                                    DAG.getConstant(VTBits, MVT::i32));
2807   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2808   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2809
2810   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2811   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2812   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2813                           ARMcc, DAG, dl);
2814   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2815   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
2816                            CCR, Cmp);
2817
2818   SDValue Ops[2] = { Lo, Hi };
2819   return DAG.getMergeValues(Ops, 2, dl);
2820 }
2821
2822 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2823                                             SelectionDAG &DAG) const {
2824   // The rounding mode is in bits 23:22 of the FPSCR.
2825   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2826   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2827   // so that the shift + and get folded into a bitfield extract.
2828   DebugLoc dl = Op.getDebugLoc();
2829   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
2830                               DAG.getConstant(Intrinsic::arm_get_fpscr,
2831                                               MVT::i32));
2832   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
2833                                   DAG.getConstant(1U << 22, MVT::i32));
2834   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2835                               DAG.getConstant(22, MVT::i32));
2836   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2837                      DAG.getConstant(3, MVT::i32));
2838 }
2839
2840 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2841                          const ARMSubtarget *ST) {
2842   EVT VT = N->getValueType(0);
2843   DebugLoc dl = N->getDebugLoc();
2844
2845   if (!ST->hasV6T2Ops())
2846     return SDValue();
2847
2848   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2849   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2850 }
2851
2852 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2853                           const ARMSubtarget *ST) {
2854   EVT VT = N->getValueType(0);
2855   DebugLoc dl = N->getDebugLoc();
2856
2857   // Lower vector shifts on NEON to use VSHL.
2858   if (VT.isVector()) {
2859     assert(ST->hasNEON() && "unexpected vector shift");
2860
2861     // Left shifts translate directly to the vshiftu intrinsic.
2862     if (N->getOpcode() == ISD::SHL)
2863       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2864                          DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2865                          N->getOperand(0), N->getOperand(1));
2866
2867     assert((N->getOpcode() == ISD::SRA ||
2868             N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2869
2870     // NEON uses the same intrinsics for both left and right shifts.  For
2871     // right shifts, the shift amounts are negative, so negate the vector of
2872     // shift amounts.
2873     EVT ShiftVT = N->getOperand(1).getValueType();
2874     SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2875                                        getZeroVector(ShiftVT, DAG, dl),
2876                                        N->getOperand(1));
2877     Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2878                                Intrinsic::arm_neon_vshifts :
2879                                Intrinsic::arm_neon_vshiftu);
2880     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2881                        DAG.getConstant(vshiftInt, MVT::i32),
2882                        N->getOperand(0), NegatedCount);
2883   }
2884
2885   // We can get here for a node like i32 = ISD::SHL i32, i64
2886   if (VT != MVT::i64)
2887     return SDValue();
2888
2889   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2890          "Unknown shift to lower!");
2891
2892   // We only lower SRA, SRL of 1 here, all others use generic lowering.
2893   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2894       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2895     return SDValue();
2896
2897   // If we are in thumb mode, we don't have RRX.
2898   if (ST->isThumb1Only()) return SDValue();
2899
2900   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2901   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2902                            DAG.getConstant(0, MVT::i32));
2903   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2904                            DAG.getConstant(1, MVT::i32));
2905
2906   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2907   // captures the result into a carry flag.
2908   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2909   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2910
2911   // The low part is an ARMISD::RRX operand, which shifts the carry in.
2912   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2913
2914   // Merge the pieces into a single i64 value.
2915  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2916 }
2917
2918 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2919   SDValue TmpOp0, TmpOp1;
2920   bool Invert = false;
2921   bool Swap = false;
2922   unsigned Opc = 0;
2923
2924   SDValue Op0 = Op.getOperand(0);
2925   SDValue Op1 = Op.getOperand(1);
2926   SDValue CC = Op.getOperand(2);
2927   EVT VT = Op.getValueType();
2928   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2929   DebugLoc dl = Op.getDebugLoc();
2930
2931   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2932     switch (SetCCOpcode) {
2933     default: llvm_unreachable("Illegal FP comparison"); break;
2934     case ISD::SETUNE:
2935     case ISD::SETNE:  Invert = true; // Fallthrough
2936     case ISD::SETOEQ:
2937     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2938     case ISD::SETOLT:
2939     case ISD::SETLT: Swap = true; // Fallthrough
2940     case ISD::SETOGT:
2941     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2942     case ISD::SETOLE:
2943     case ISD::SETLE:  Swap = true; // Fallthrough
2944     case ISD::SETOGE:
2945     case ISD::SETGE: Opc = ARMISD::VCGE; break;
2946     case ISD::SETUGE: Swap = true; // Fallthrough
2947     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2948     case ISD::SETUGT: Swap = true; // Fallthrough
2949     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2950     case ISD::SETUEQ: Invert = true; // Fallthrough
2951     case ISD::SETONE:
2952       // Expand this to (OLT | OGT).
2953       TmpOp0 = Op0;
2954       TmpOp1 = Op1;
2955       Opc = ISD::OR;
2956       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2957       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2958       break;
2959     case ISD::SETUO: Invert = true; // Fallthrough
2960     case ISD::SETO:
2961       // Expand this to (OLT | OGE).
2962       TmpOp0 = Op0;
2963       TmpOp1 = Op1;
2964       Opc = ISD::OR;
2965       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2966       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2967       break;
2968     }
2969   } else {
2970     // Integer comparisons.
2971     switch (SetCCOpcode) {
2972     default: llvm_unreachable("Illegal integer comparison"); break;
2973     case ISD::SETNE:  Invert = true;
2974     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2975     case ISD::SETLT:  Swap = true;
2976     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2977     case ISD::SETLE:  Swap = true;
2978     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2979     case ISD::SETULT: Swap = true;
2980     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2981     case ISD::SETULE: Swap = true;
2982     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2983     }
2984
2985     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2986     if (Opc == ARMISD::VCEQ) {
2987
2988       SDValue AndOp;
2989       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2990         AndOp = Op0;
2991       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2992         AndOp = Op1;
2993
2994       // Ignore bitconvert.
2995       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2996         AndOp = AndOp.getOperand(0);
2997
2998       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2999         Opc = ARMISD::VTST;
3000         Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
3001         Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
3002         Invert = !Invert;
3003       }
3004     }
3005   }
3006
3007   if (Swap)
3008     std::swap(Op0, Op1);
3009
3010   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
3011
3012   if (Invert)
3013     Result = DAG.getNOT(dl, Result, VT);
3014
3015   return Result;
3016 }
3017
3018 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
3019 /// valid vector constant for a NEON instruction with a "modified immediate"
3020 /// operand (e.g., VMOV).  If so, return the encoded value.
3021 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
3022                                  unsigned SplatBitSize, SelectionDAG &DAG,
3023                                  EVT &VT, bool is128Bits, bool isVMOV) {
3024   unsigned OpCmode, Imm;
3025
3026   // SplatBitSize is set to the smallest size that splats the vector, so a
3027   // zero vector will always have SplatBitSize == 8.  However, NEON modified
3028   // immediate instructions others than VMOV do not support the 8-bit encoding
3029   // of a zero vector, and the default encoding of zero is supposed to be the
3030   // 32-bit version.
3031   if (SplatBits == 0)
3032     SplatBitSize = 32;
3033
3034   switch (SplatBitSize) {
3035   case 8:
3036     if (!isVMOV)
3037       return SDValue();
3038     // Any 1-byte value is OK.  Op=0, Cmode=1110.
3039     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
3040     OpCmode = 0xe;
3041     Imm = SplatBits;
3042     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
3043     break;
3044
3045   case 16:
3046     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
3047     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
3048     if ((SplatBits & ~0xff) == 0) {
3049       // Value = 0x00nn: Op=x, Cmode=100x.
3050       OpCmode = 0x8;
3051       Imm = SplatBits;
3052       break;
3053     }
3054     if ((SplatBits & ~0xff00) == 0) {
3055       // Value = 0xnn00: Op=x, Cmode=101x.
3056       OpCmode = 0xa;
3057       Imm = SplatBits >> 8;
3058       break;
3059     }
3060     return SDValue();
3061
3062   case 32:
3063     // NEON's 32-bit VMOV supports splat values where:
3064     // * only one byte is nonzero, or
3065     // * the least significant byte is 0xff and the second byte is nonzero, or
3066     // * the least significant 2 bytes are 0xff and the third is nonzero.
3067     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
3068     if ((SplatBits & ~0xff) == 0) {
3069       // Value = 0x000000nn: Op=x, Cmode=000x.
3070       OpCmode = 0;
3071       Imm = SplatBits;
3072       break;
3073     }
3074     if ((SplatBits & ~0xff00) == 0) {
3075       // Value = 0x0000nn00: Op=x, Cmode=001x.
3076       OpCmode = 0x2;
3077       Imm = SplatBits >> 8;
3078       break;
3079     }
3080     if ((SplatBits & ~0xff0000) == 0) {
3081       // Value = 0x00nn0000: Op=x, Cmode=010x.
3082       OpCmode = 0x4;
3083       Imm = SplatBits >> 16;
3084       break;
3085     }
3086     if ((SplatBits & ~0xff000000) == 0) {
3087       // Value = 0xnn000000: Op=x, Cmode=011x.
3088       OpCmode = 0x6;
3089       Imm = SplatBits >> 24;
3090       break;
3091     }
3092
3093     if ((SplatBits & ~0xffff) == 0 &&
3094         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
3095       // Value = 0x0000nnff: Op=x, Cmode=1100.
3096       OpCmode = 0xc;
3097       Imm = SplatBits >> 8;
3098       SplatBits |= 0xff;
3099       break;
3100     }
3101
3102     if ((SplatBits & ~0xffffff) == 0 &&
3103         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
3104       // Value = 0x00nnffff: Op=x, Cmode=1101.
3105       OpCmode = 0xd;
3106       Imm = SplatBits >> 16;
3107       SplatBits |= 0xffff;
3108       break;
3109     }
3110
3111     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
3112     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
3113     // VMOV.I32.  A (very) minor optimization would be to replicate the value
3114     // and fall through here to test for a valid 64-bit splat.  But, then the
3115     // caller would also need to check and handle the change in size.
3116     return SDValue();
3117
3118   case 64: {
3119     if (!isVMOV)
3120       return SDValue();
3121     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
3122     uint64_t BitMask = 0xff;
3123     uint64_t Val = 0;
3124     unsigned ImmMask = 1;
3125     Imm = 0;
3126     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
3127       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
3128         Val |= BitMask;
3129         Imm |= ImmMask;
3130       } else if ((SplatBits & BitMask) != 0) {
3131         return SDValue();
3132       }
3133       BitMask <<= 8;
3134       ImmMask <<= 1;
3135     }
3136     // Op=1, Cmode=1110.
3137     OpCmode = 0x1e;
3138     SplatBits = Val;
3139     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
3140     break;
3141   }
3142
3143   default:
3144     llvm_unreachable("unexpected size for isNEONModifiedImm");
3145     return SDValue();
3146   }
3147
3148   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
3149   return DAG.getTargetConstant(EncodedVal, MVT::i32);
3150 }
3151
3152 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
3153                        bool &ReverseVEXT, unsigned &Imm) {
3154   unsigned NumElts = VT.getVectorNumElements();
3155   ReverseVEXT = false;
3156
3157   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
3158   if (M[0] < 0)
3159     return false;
3160
3161   Imm = M[0];
3162
3163   // If this is a VEXT shuffle, the immediate value is the index of the first
3164   // element.  The other shuffle indices must be the successive elements after
3165   // the first one.
3166   unsigned ExpectedElt = Imm;
3167   for (unsigned i = 1; i < NumElts; ++i) {
3168     // Increment the expected index.  If it wraps around, it may still be
3169     // a VEXT but the source vectors must be swapped.
3170     ExpectedElt += 1;
3171     if (ExpectedElt == NumElts * 2) {
3172       ExpectedElt = 0;
3173       ReverseVEXT = true;
3174     }
3175
3176     if (M[i] < 0) continue; // ignore UNDEF indices
3177     if (ExpectedElt != static_cast<unsigned>(M[i]))
3178       return false;
3179   }
3180
3181   // Adjust the index value if the source operands will be swapped.
3182   if (ReverseVEXT)
3183     Imm -= NumElts;
3184
3185   return true;
3186 }
3187
3188 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
3189 /// instruction with the specified blocksize.  (The order of the elements
3190 /// within each block of the vector is reversed.)
3191 static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
3192                        unsigned BlockSize) {
3193   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
3194          "Only possible block sizes for VREV are: 16, 32, 64");
3195
3196   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3197   if (EltSz == 64)
3198     return false;
3199
3200   unsigned NumElts = VT.getVectorNumElements();
3201   unsigned BlockElts = M[0] + 1;
3202   // If the first shuffle index is UNDEF, be optimistic.
3203   if (M[0] < 0)
3204     BlockElts = BlockSize / EltSz;
3205
3206   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
3207     return false;
3208
3209   for (unsigned i = 0; i < NumElts; ++i) {
3210     if (M[i] < 0) continue; // ignore UNDEF indices
3211     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
3212       return false;
3213   }
3214
3215   return true;
3216 }
3217
3218 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
3219                        unsigned &WhichResult) {
3220   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3221   if (EltSz == 64)
3222     return false;
3223
3224   unsigned NumElts = VT.getVectorNumElements();
3225   WhichResult = (M[0] == 0 ? 0 : 1);
3226   for (unsigned i = 0; i < NumElts; i += 2) {
3227     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3228         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
3229       return false;
3230   }
3231   return true;
3232 }
3233
3234 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
3235 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3236 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
3237 static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3238                                 unsigned &WhichResult) {
3239   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3240   if (EltSz == 64)
3241     return false;
3242
3243   unsigned NumElts = VT.getVectorNumElements();
3244   WhichResult = (M[0] == 0 ? 0 : 1);
3245   for (unsigned i = 0; i < NumElts; i += 2) {
3246     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
3247         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
3248       return false;
3249   }
3250   return true;
3251 }
3252
3253 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
3254                        unsigned &WhichResult) {
3255   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3256   if (EltSz == 64)
3257     return false;
3258
3259   unsigned NumElts = VT.getVectorNumElements();
3260   WhichResult = (M[0] == 0 ? 0 : 1);
3261   for (unsigned i = 0; i != NumElts; ++i) {
3262     if (M[i] < 0) continue; // ignore UNDEF indices
3263     if ((unsigned) M[i] != 2 * i + WhichResult)
3264       return false;
3265   }
3266
3267   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3268   if (VT.is64BitVector() && EltSz == 32)
3269     return false;
3270
3271   return true;
3272 }
3273
3274 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
3275 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3276 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
3277 static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3278                                 unsigned &WhichResult) {
3279   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3280   if (EltSz == 64)
3281     return false;
3282
3283   unsigned Half = VT.getVectorNumElements() / 2;
3284   WhichResult = (M[0] == 0 ? 0 : 1);
3285   for (unsigned j = 0; j != 2; ++j) {
3286     unsigned Idx = WhichResult;
3287     for (unsigned i = 0; i != Half; ++i) {
3288       int MIdx = M[i + j * Half];
3289       if (MIdx >= 0 && (unsigned) MIdx != Idx)
3290         return false;
3291       Idx += 2;
3292     }
3293   }
3294
3295   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3296   if (VT.is64BitVector() && EltSz == 32)
3297     return false;
3298
3299   return true;
3300 }
3301
3302 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
3303                        unsigned &WhichResult) {
3304   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3305   if (EltSz == 64)
3306     return false;
3307
3308   unsigned NumElts = VT.getVectorNumElements();
3309   WhichResult = (M[0] == 0 ? 0 : 1);
3310   unsigned Idx = WhichResult * NumElts / 2;
3311   for (unsigned i = 0; i != NumElts; i += 2) {
3312     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3313         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
3314       return false;
3315     Idx += 1;
3316   }
3317
3318   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3319   if (VT.is64BitVector() && EltSz == 32)
3320     return false;
3321
3322   return true;
3323 }
3324
3325 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
3326 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
3327 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
3328 static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
3329                                 unsigned &WhichResult) {
3330   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
3331   if (EltSz == 64)
3332     return false;
3333
3334   unsigned NumElts = VT.getVectorNumElements();
3335   WhichResult = (M[0] == 0 ? 0 : 1);
3336   unsigned Idx = WhichResult * NumElts / 2;
3337   for (unsigned i = 0; i != NumElts; i += 2) {
3338     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
3339         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
3340       return false;
3341     Idx += 1;
3342   }
3343
3344   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
3345   if (VT.is64BitVector() && EltSz == 32)
3346     return false;
3347
3348   return true;
3349 }
3350
3351 // If N is an integer constant that can be moved into a register in one
3352 // instruction, return an SDValue of such a constant (will become a MOV
3353 // instruction).  Otherwise return null.
3354 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
3355                                      const ARMSubtarget *ST, DebugLoc dl) {
3356   uint64_t Val;
3357   if (!isa<ConstantSDNode>(N))
3358     return SDValue();
3359   Val = cast<ConstantSDNode>(N)->getZExtValue();
3360
3361   if (ST->isThumb1Only()) {
3362     if (Val <= 255 || ~Val <= 255)
3363       return DAG.getConstant(Val, MVT::i32);
3364   } else {
3365     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
3366       return DAG.getConstant(Val, MVT::i32);
3367   }
3368   return SDValue();
3369 }
3370
3371 // If this is a case we can't handle, return null and let the default
3372 // expansion code take care of it.
3373 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
3374                                  const ARMSubtarget *ST) {
3375   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
3376   DebugLoc dl = Op.getDebugLoc();
3377   EVT VT = Op.getValueType();
3378
3379   APInt SplatBits, SplatUndef;
3380   unsigned SplatBitSize;
3381   bool HasAnyUndefs;
3382   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
3383     if (SplatBitSize <= 64) {
3384       // Check if an immediate VMOV works.
3385       EVT VmovVT;
3386       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
3387                                       SplatUndef.getZExtValue(), SplatBitSize,
3388                                       DAG, VmovVT, VT.is128BitVector(), true);
3389       if (Val.getNode()) {
3390         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
3391         return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3392       }
3393
3394       // Try an immediate VMVN.
3395       uint64_t NegatedImm = (SplatBits.getZExtValue() ^
3396                              ((1LL << SplatBitSize) - 1));
3397       Val = isNEONModifiedImm(NegatedImm,
3398                                       SplatUndef.getZExtValue(), SplatBitSize,
3399                                       DAG, VmovVT, VT.is128BitVector(), false);
3400       if (Val.getNode()) {
3401         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
3402         return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
3403       }
3404     }
3405   }
3406
3407   // Scan through the operands to see if only one value is used.
3408   unsigned NumElts = VT.getVectorNumElements();
3409   bool isOnlyLowElement = true;
3410   bool usesOnlyOneValue = true;
3411   bool isConstant = true;
3412   SDValue Value;
3413   for (unsigned i = 0; i < NumElts; ++i) {
3414     SDValue V = Op.getOperand(i);
3415     if (V.getOpcode() == ISD::UNDEF)
3416       continue;
3417     if (i > 0)
3418       isOnlyLowElement = false;
3419     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
3420       isConstant = false;
3421
3422     if (!Value.getNode())
3423       Value = V;
3424     else if (V != Value)
3425       usesOnlyOneValue = false;
3426   }
3427
3428   if (!Value.getNode())
3429     return DAG.getUNDEF(VT);
3430
3431   if (isOnlyLowElement)
3432     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
3433
3434   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3435
3436   if (EnableARMVDUPsplat) {
3437     // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
3438     // i32 and try again.
3439     if (usesOnlyOneValue && EltSize <= 32) {
3440       if (!isConstant)
3441         return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3442       if (VT.getVectorElementType().isFloatingPoint()) {
3443         SmallVector<SDValue, 8> Ops;
3444         for (unsigned i = 0; i < NumElts; ++i)
3445           Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
3446                                     Op.getOperand(i)));
3447         SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
3448                                   NumElts);
3449         return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
3450                            LowerBUILD_VECTOR(Val, DAG, ST));
3451       }
3452       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
3453       if (Val.getNode())
3454         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
3455     }
3456   }
3457
3458   // If all elements are constants and the case above didn't get hit, fall back
3459   // to the default expansion, which will generate a load from the constant
3460   // pool.
3461   if (isConstant)
3462     return SDValue();
3463
3464   if (!EnableARMVDUPsplat) {
3465     // Use VDUP for non-constant splats.
3466     if (usesOnlyOneValue && EltSize <= 32)
3467       return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
3468   }
3469
3470   // Vectors with 32- or 64-bit elements can be built by directly assigning
3471   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
3472   // will be legalized.
3473   if (EltSize >= 32) {
3474     // Do the expansion with floating-point types, since that is what the VFP
3475     // registers are defined to use, and since i64 is not legal.
3476     EVT EltVT = EVT::getFloatingPointVT(EltSize);
3477     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3478     SmallVector<SDValue, 8> Ops;
3479     for (unsigned i = 0; i < NumElts; ++i)
3480       Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
3481     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3482     return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3483   }
3484
3485   return SDValue();
3486 }
3487
3488 /// isShuffleMaskLegal - Targets can use this to indicate that they only
3489 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3490 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3491 /// are assumed to be legal.
3492 bool
3493 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
3494                                       EVT VT) const {
3495   if (VT.getVectorNumElements() == 4 &&
3496       (VT.is128BitVector() || VT.is64BitVector())) {
3497     unsigned PFIndexes[4];
3498     for (unsigned i = 0; i != 4; ++i) {
3499       if (M[i] < 0)
3500         PFIndexes[i] = 8;
3501       else
3502         PFIndexes[i] = M[i];
3503     }
3504
3505     // Compute the index in the perfect shuffle table.
3506     unsigned PFTableIndex =
3507       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3508     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3509     unsigned Cost = (PFEntry >> 30);
3510
3511     if (Cost <= 4)
3512       return true;
3513   }
3514
3515   bool ReverseVEXT;
3516   unsigned Imm, WhichResult;
3517
3518   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3519   return (EltSize >= 32 ||
3520           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
3521           isVREVMask(M, VT, 64) ||
3522           isVREVMask(M, VT, 32) ||
3523           isVREVMask(M, VT, 16) ||
3524           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
3525           isVTRNMask(M, VT, WhichResult) ||
3526           isVUZPMask(M, VT, WhichResult) ||
3527           isVZIPMask(M, VT, WhichResult) ||
3528           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
3529           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
3530           isVZIP_v_undef_Mask(M, VT, WhichResult));
3531 }
3532
3533 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3534 /// the specified operations to build the shuffle.
3535 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3536                                       SDValue RHS, SelectionDAG &DAG,
3537                                       DebugLoc dl) {
3538   unsigned OpNum = (PFEntry >> 26) & 0x0F;
3539   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3540   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3541
3542   enum {
3543     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3544     OP_VREV,
3545     OP_VDUP0,
3546     OP_VDUP1,
3547     OP_VDUP2,
3548     OP_VDUP3,
3549     OP_VEXT1,
3550     OP_VEXT2,
3551     OP_VEXT3,
3552     OP_VUZPL, // VUZP, left result
3553     OP_VUZPR, // VUZP, right result
3554     OP_VZIPL, // VZIP, left result
3555     OP_VZIPR, // VZIP, right result
3556     OP_VTRNL, // VTRN, left result
3557     OP_VTRNR  // VTRN, right result
3558   };
3559
3560   if (OpNum == OP_COPY) {
3561     if (LHSID == (1*9+2)*9+3) return LHS;
3562     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3563     return RHS;
3564   }
3565
3566   SDValue OpLHS, OpRHS;
3567   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3568   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3569   EVT VT = OpLHS.getValueType();
3570
3571   switch (OpNum) {
3572   default: llvm_unreachable("Unknown shuffle opcode!");
3573   case OP_VREV:
3574     return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
3575   case OP_VDUP0:
3576   case OP_VDUP1:
3577   case OP_VDUP2:
3578   case OP_VDUP3:
3579     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
3580                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3581   case OP_VEXT1:
3582   case OP_VEXT2:
3583   case OP_VEXT3:
3584     return DAG.getNode(ARMISD::VEXT, dl, VT,
3585                        OpLHS, OpRHS,
3586                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3587   case OP_VUZPL:
3588   case OP_VUZPR:
3589     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3590                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3591   case OP_VZIPL:
3592   case OP_VZIPR:
3593     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3594                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3595   case OP_VTRNL:
3596   case OP_VTRNR:
3597     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3598                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3599   }
3600 }
3601
3602 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3603   SDValue V1 = Op.getOperand(0);
3604   SDValue V2 = Op.getOperand(1);
3605   DebugLoc dl = Op.getDebugLoc();
3606   EVT VT = Op.getValueType();
3607   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3608   SmallVector<int, 8> ShuffleMask;
3609
3610   // Convert shuffles that are directly supported on NEON to target-specific
3611   // DAG nodes, instead of keeping them as shuffles and matching them again
3612   // during code selection.  This is more efficient and avoids the possibility
3613   // of inconsistencies between legalization and selection.
3614   // FIXME: floating-point vectors should be canonicalized to integer vectors
3615   // of the same time so that they get CSEd properly.
3616   SVN->getMask(ShuffleMask);
3617
3618   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3619   if (EltSize <= 32) {
3620     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3621       int Lane = SVN->getSplatIndex();
3622       // If this is undef splat, generate it via "just" vdup, if possible.
3623       if (Lane == -1) Lane = 0;
3624
3625       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3626         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3627       }
3628       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3629                          DAG.getConstant(Lane, MVT::i32));
3630     }
3631
3632     bool ReverseVEXT;
3633     unsigned Imm;
3634     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3635       if (ReverseVEXT)
3636         std::swap(V1, V2);
3637       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3638                          DAG.getConstant(Imm, MVT::i32));
3639     }
3640
3641     if (isVREVMask(ShuffleMask, VT, 64))
3642       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3643     if (isVREVMask(ShuffleMask, VT, 32))
3644       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3645     if (isVREVMask(ShuffleMask, VT, 16))
3646       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3647
3648     // Check for Neon shuffles that modify both input vectors in place.
3649     // If both results are used, i.e., if there are two shuffles with the same
3650     // source operands and with masks corresponding to both results of one of
3651     // these operations, DAG memoization will ensure that a single node is
3652     // used for both shuffles.
3653     unsigned WhichResult;
3654     if (isVTRNMask(ShuffleMask, VT, WhichResult))
3655       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3656                          V1, V2).getValue(WhichResult);
3657     if (isVUZPMask(ShuffleMask, VT, WhichResult))
3658       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3659                          V1, V2).getValue(WhichResult);
3660     if (isVZIPMask(ShuffleMask, VT, WhichResult))
3661       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3662                          V1, V2).getValue(WhichResult);
3663
3664     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3665       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3666                          V1, V1).getValue(WhichResult);
3667     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3668       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3669                          V1, V1).getValue(WhichResult);
3670     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3671       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3672                          V1, V1).getValue(WhichResult);
3673   }
3674
3675   // If the shuffle is not directly supported and it has 4 elements, use
3676   // the PerfectShuffle-generated table to synthesize it from other shuffles.
3677   unsigned NumElts = VT.getVectorNumElements();
3678   if (NumElts == 4) {
3679     unsigned PFIndexes[4];
3680     for (unsigned i = 0; i != 4; ++i) {
3681       if (ShuffleMask[i] < 0)
3682         PFIndexes[i] = 8;
3683       else
3684         PFIndexes[i] = ShuffleMask[i];
3685     }
3686
3687     // Compute the index in the perfect shuffle table.
3688     unsigned PFTableIndex =
3689       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3690     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3691     unsigned Cost = (PFEntry >> 30);
3692
3693     if (Cost <= 4)
3694       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3695   }
3696
3697   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
3698   if (EltSize >= 32) {
3699     // Do the expansion with floating-point types, since that is what the VFP
3700     // registers are defined to use, and since i64 is not legal.
3701     EVT EltVT = EVT::getFloatingPointVT(EltSize);
3702     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
3703     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
3704     V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
3705     SmallVector<SDValue, 8> Ops;
3706     for (unsigned i = 0; i < NumElts; ++i) {
3707       if (ShuffleMask[i] < 0)
3708         Ops.push_back(DAG.getUNDEF(EltVT));
3709       else
3710         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
3711                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
3712                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
3713                                                   MVT::i32)));
3714     }
3715     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
3716     return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
3717   }
3718
3719   return SDValue();
3720 }
3721
3722 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3723   EVT VT = Op.getValueType();
3724   DebugLoc dl = Op.getDebugLoc();
3725   SDValue Vec = Op.getOperand(0);
3726   SDValue Lane = Op.getOperand(1);
3727   assert(VT == MVT::i32 &&
3728          Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3729          "unexpected type for custom-lowering vector extract");
3730   return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3731 }
3732
3733 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3734   // The only time a CONCAT_VECTORS operation can have legal types is when
3735   // two 64-bit vectors are concatenated to a 128-bit vector.
3736   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3737          "unexpected CONCAT_VECTORS");
3738   DebugLoc dl = Op.getDebugLoc();
3739   SDValue Val = DAG.getUNDEF(MVT::v2f64);
3740   SDValue Op0 = Op.getOperand(0);
3741   SDValue Op1 = Op.getOperand(1);
3742   if (Op0.getOpcode() != ISD::UNDEF)
3743     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3744                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3745                       DAG.getIntPtrConstant(0));
3746   if (Op1.getOpcode() != ISD::UNDEF)
3747     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3748                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3749                       DAG.getIntPtrConstant(1));
3750   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3751 }
3752
3753 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3754   switch (Op.getOpcode()) {
3755   default: llvm_unreachable("Don't know how to custom lower this!");
3756   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
3757   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
3758   case ISD::GlobalAddress:
3759     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3760       LowerGlobalAddressELF(Op, DAG);
3761   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3762   case ISD::SELECT:        return LowerSELECT(Op, DAG);
3763   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
3764   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
3765   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
3766   case ISD::VASTART:       return LowerVASTART(Op, DAG);
3767   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
3768   case ISD::SINT_TO_FP:
3769   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
3770   case ISD::FP_TO_SINT:
3771   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
3772   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
3773   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
3774   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
3775   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3776   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
3777   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
3778   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3779                                                                Subtarget);
3780   case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
3781   case ISD::SHL:
3782   case ISD::SRL:
3783   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
3784   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
3785   case ISD::SRL_PARTS:
3786   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
3787   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3788   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
3789   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
3790   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3791   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3792   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3793   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
3794   }
3795   return SDValue();
3796 }
3797
3798 /// ReplaceNodeResults - Replace the results of node with an illegal result
3799 /// type with new values built out of custom code.
3800 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3801                                            SmallVectorImpl<SDValue>&Results,
3802                                            SelectionDAG &DAG) const {
3803   SDValue Res;
3804   switch (N->getOpcode()) {
3805   default:
3806     llvm_unreachable("Don't know how to custom expand this!");
3807     break;
3808   case ISD::BIT_CONVERT:
3809     Res = ExpandBIT_CONVERT(N, DAG);
3810     break;
3811   case ISD::SRL:
3812   case ISD::SRA:
3813     Res = LowerShift(N, DAG, Subtarget);
3814     break;
3815   }
3816   if (Res.getNode())
3817     Results.push_back(Res);
3818 }
3819
3820 //===----------------------------------------------------------------------===//
3821 //                           ARM Scheduler Hooks
3822 //===----------------------------------------------------------------------===//
3823
3824 MachineBasicBlock *
3825 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3826                                      MachineBasicBlock *BB,
3827                                      unsigned Size) const {
3828   unsigned dest    = MI->getOperand(0).getReg();
3829   unsigned ptr     = MI->getOperand(1).getReg();
3830   unsigned oldval  = MI->getOperand(2).getReg();
3831   unsigned newval  = MI->getOperand(3).getReg();
3832   unsigned scratch = BB->getParent()->getRegInfo()
3833     .createVirtualRegister(ARM::GPRRegisterClass);
3834   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3835   DebugLoc dl = MI->getDebugLoc();
3836   bool isThumb2 = Subtarget->isThumb2();
3837
3838   unsigned ldrOpc, strOpc;
3839   switch (Size) {
3840   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3841   case 1:
3842     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3843     strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3844     break;
3845   case 2:
3846     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3847     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3848     break;
3849   case 4:
3850     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3851     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3852     break;
3853   }
3854
3855   MachineFunction *MF = BB->getParent();
3856   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3857   MachineFunction::iterator It = BB;
3858   ++It; // insert the new blocks after the current block
3859
3860   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3861   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3862   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3863   MF->insert(It, loop1MBB);
3864   MF->insert(It, loop2MBB);
3865   MF->insert(It, exitMBB);
3866
3867   // Transfer the remainder of BB and its successor edges to exitMBB.
3868   exitMBB->splice(exitMBB->begin(), BB,
3869                   llvm::next(MachineBasicBlock::iterator(MI)),
3870                   BB->end());
3871   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
3872
3873   //  thisMBB:
3874   //   ...
3875   //   fallthrough --> loop1MBB
3876   BB->addSuccessor(loop1MBB);
3877
3878   // loop1MBB:
3879   //   ldrex dest, [ptr]
3880   //   cmp dest, oldval
3881   //   bne exitMBB
3882   BB = loop1MBB;
3883   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3884   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3885                  .addReg(dest).addReg(oldval));
3886   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3887     .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3888   BB->addSuccessor(loop2MBB);
3889   BB->addSuccessor(exitMBB);
3890
3891   // loop2MBB:
3892   //   strex scratch, newval, [ptr]
3893   //   cmp scratch, #0
3894   //   bne loop1MBB
3895   BB = loop2MBB;
3896   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3897                  .addReg(ptr));
3898   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3899                  .addReg(scratch).addImm(0));
3900   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3901     .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3902   BB->addSuccessor(loop1MBB);
3903   BB->addSuccessor(exitMBB);
3904
3905   //  exitMBB:
3906   //   ...
3907   BB = exitMBB;
3908
3909   MI->eraseFromParent();   // The instruction is gone now.
3910
3911   return BB;
3912 }
3913
3914 MachineBasicBlock *
3915 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3916                                     unsigned Size, unsigned BinOpcode) const {
3917   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3918   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3919
3920   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3921   MachineFunction *MF = BB->getParent();
3922   MachineFunction::iterator It = BB;
3923   ++It;
3924
3925   unsigned dest = MI->getOperand(0).getReg();
3926   unsigned ptr = MI->getOperand(1).getReg();
3927   unsigned incr = MI->getOperand(2).getReg();
3928   DebugLoc dl = MI->getDebugLoc();
3929
3930   bool isThumb2 = Subtarget->isThumb2();
3931   unsigned ldrOpc, strOpc;
3932   switch (Size) {
3933   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3934   case 1:
3935     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3936     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3937     break;
3938   case 2:
3939     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3940     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3941     break;
3942   case 4:
3943     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3944     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3945     break;
3946   }
3947
3948   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3949   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3950   MF->insert(It, loopMBB);
3951   MF->insert(It, exitMBB);
3952
3953   // Transfer the remainder of BB and its successor edges to exitMBB.
3954   exitMBB->splice(exitMBB->begin(), BB,
3955                   llvm::next(MachineBasicBlock::iterator(MI)),
3956                   BB->end());
3957   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
3958
3959   MachineRegisterInfo &RegInfo = MF->getRegInfo();
3960   unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3961   unsigned scratch2 = (!BinOpcode) ? incr :
3962     RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3963
3964   //  thisMBB:
3965   //   ...
3966   //   fallthrough --> loopMBB
3967   BB->addSuccessor(loopMBB);
3968
3969   //  loopMBB:
3970   //   ldrex dest, ptr
3971   //   <binop> scratch2, dest, incr
3972   //   strex scratch, scratch2, ptr
3973   //   cmp scratch, #0
3974   //   bne- loopMBB
3975   //   fallthrough --> exitMBB
3976   BB = loopMBB;
3977   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3978   if (BinOpcode) {
3979     // operand order needs to go the other way for NAND
3980     if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
3981       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3982                      addReg(incr).addReg(dest)).addReg(0);
3983     else
3984       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3985                      addReg(dest).addReg(incr)).addReg(0);
3986   }
3987
3988   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
3989                  .addReg(ptr));
3990   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3991                  .addReg(scratch).addImm(0));
3992   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3993     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3994
3995   BB->addSuccessor(loopMBB);
3996   BB->addSuccessor(exitMBB);
3997
3998   //  exitMBB:
3999   //   ...
4000   BB = exitMBB;
4001
4002   MI->eraseFromParent();   // The instruction is gone now.
4003
4004   return BB;
4005 }
4006
4007 static
4008 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
4009   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
4010        E = MBB->succ_end(); I != E; ++I)
4011     if (*I != Succ)
4012       return *I;
4013   llvm_unreachable("Expecting a BB with two successors!");
4014 }
4015
4016 MachineBasicBlock *
4017 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4018                                                MachineBasicBlock *BB) const {
4019   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4020   DebugLoc dl = MI->getDebugLoc();
4021   bool isThumb2 = Subtarget->isThumb2();
4022   switch (MI->getOpcode()) {
4023   default:
4024     MI->dump();
4025     llvm_unreachable("Unexpected instr type to insert");
4026
4027   case ARM::ATOMIC_LOAD_ADD_I8:
4028      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4029   case ARM::ATOMIC_LOAD_ADD_I16:
4030      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4031   case ARM::ATOMIC_LOAD_ADD_I32:
4032      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
4033
4034   case ARM::ATOMIC_LOAD_AND_I8:
4035      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4036   case ARM::ATOMIC_LOAD_AND_I16:
4037      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4038   case ARM::ATOMIC_LOAD_AND_I32:
4039      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
4040
4041   case ARM::ATOMIC_LOAD_OR_I8:
4042      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4043   case ARM::ATOMIC_LOAD_OR_I16:
4044      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4045   case ARM::ATOMIC_LOAD_OR_I32:
4046      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
4047
4048   case ARM::ATOMIC_LOAD_XOR_I8:
4049      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4050   case ARM::ATOMIC_LOAD_XOR_I16:
4051      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4052   case ARM::ATOMIC_LOAD_XOR_I32:
4053      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
4054
4055   case ARM::ATOMIC_LOAD_NAND_I8:
4056      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4057   case ARM::ATOMIC_LOAD_NAND_I16:
4058      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4059   case ARM::ATOMIC_LOAD_NAND_I32:
4060      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
4061
4062   case ARM::ATOMIC_LOAD_SUB_I8:
4063      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4064   case ARM::ATOMIC_LOAD_SUB_I16:
4065      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4066   case ARM::ATOMIC_LOAD_SUB_I32:
4067      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
4068
4069   case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
4070   case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
4071   case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
4072
4073   case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
4074   case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
4075   case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
4076
4077   case ARM::tMOVCCr_pseudo: {
4078     // To "insert" a SELECT_CC instruction, we actually have to insert the
4079     // diamond control-flow pattern.  The incoming instruction knows the
4080     // destination vreg to set, the condition code register to branch on, the
4081     // true/false values to select between, and a branch opcode to use.
4082     const BasicBlock *LLVM_BB = BB->getBasicBlock();
4083     MachineFunction::iterator It = BB;
4084     ++It;
4085
4086     //  thisMBB:
4087     //  ...
4088     //   TrueVal = ...
4089     //   cmpTY ccX, r1, r2
4090     //   bCC copy1MBB
4091     //   fallthrough --> copy0MBB
4092     MachineBasicBlock *thisMBB  = BB;
4093     MachineFunction *F = BB->getParent();
4094     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4095     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
4096     F->insert(It, copy0MBB);
4097     F->insert(It, sinkMBB);
4098
4099     // Transfer the remainder of BB and its successor edges to sinkMBB.
4100     sinkMBB->splice(sinkMBB->begin(), BB,
4101                     llvm::next(MachineBasicBlock::iterator(MI)),
4102                     BB->end());
4103     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
4104
4105     BB->addSuccessor(copy0MBB);
4106     BB->addSuccessor(sinkMBB);
4107
4108     BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
4109       .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
4110
4111     //  copy0MBB:
4112     //   %FalseValue = ...
4113     //   # fallthrough to sinkMBB
4114     BB = copy0MBB;
4115
4116     // Update machine-CFG edges
4117     BB->addSuccessor(sinkMBB);
4118
4119     //  sinkMBB:
4120     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4121     //  ...
4122     BB = sinkMBB;
4123     BuildMI(*BB, BB->begin(), dl,
4124             TII->get(ARM::PHI), MI->getOperand(0).getReg())
4125       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4126       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4127
4128     MI->eraseFromParent();   // The pseudo instruction is gone now.
4129     return BB;
4130   }
4131
4132   case ARM::BCCi64:
4133   case ARM::BCCZi64: {
4134     // Compare both parts that make up the double comparison separately for
4135     // equality.
4136     bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
4137
4138     unsigned LHS1 = MI->getOperand(1).getReg();
4139     unsigned LHS2 = MI->getOperand(2).getReg();
4140     if (RHSisZero) {
4141       AddDefaultPred(BuildMI(BB, dl,
4142                              TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4143                      .addReg(LHS1).addImm(0));
4144       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
4145         .addReg(LHS2).addImm(0)
4146         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4147     } else {
4148       unsigned RHS1 = MI->getOperand(3).getReg();
4149       unsigned RHS2 = MI->getOperand(4).getReg();
4150       AddDefaultPred(BuildMI(BB, dl,
4151                              TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4152                      .addReg(LHS1).addReg(RHS1));
4153       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
4154         .addReg(LHS2).addReg(RHS2)
4155         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
4156     }
4157
4158     MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
4159     MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
4160     if (MI->getOperand(0).getImm() == ARMCC::NE)
4161       std::swap(destMBB, exitMBB);
4162
4163     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
4164       .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
4165     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
4166       .addMBB(exitMBB);
4167
4168     MI->eraseFromParent();   // The pseudo instruction is gone now.
4169     return BB;
4170   }
4171   }
4172 }
4173
4174 //===----------------------------------------------------------------------===//
4175 //                           ARM Optimization Hooks
4176 //===----------------------------------------------------------------------===//
4177
4178 static
4179 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
4180                             TargetLowering::DAGCombinerInfo &DCI) {
4181   SelectionDAG &DAG = DCI.DAG;
4182   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4183   EVT VT = N->getValueType(0);
4184   unsigned Opc = N->getOpcode();
4185   bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
4186   SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
4187   SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
4188   ISD::CondCode CC = ISD::SETCC_INVALID;
4189
4190   if (isSlctCC) {
4191     CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
4192   } else {
4193     SDValue CCOp = Slct.getOperand(0);
4194     if (CCOp.getOpcode() == ISD::SETCC)
4195       CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
4196   }
4197
4198   bool DoXform = false;
4199   bool InvCC = false;
4200   assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
4201           "Bad input!");
4202
4203   if (LHS.getOpcode() == ISD::Constant &&
4204       cast<ConstantSDNode>(LHS)->isNullValue()) {
4205     DoXform = true;
4206   } else if (CC != ISD::SETCC_INVALID &&
4207              RHS.getOpcode() == ISD::Constant &&
4208              cast<ConstantSDNode>(RHS)->isNullValue()) {
4209     std::swap(LHS, RHS);
4210     SDValue Op0 = Slct.getOperand(0);
4211     EVT OpVT = isSlctCC ? Op0.getValueType() :
4212                           Op0.getOperand(0).getValueType();
4213     bool isInt = OpVT.isInteger();
4214     CC = ISD::getSetCCInverse(CC, isInt);
4215
4216     if (!TLI.isCondCodeLegal(CC, OpVT))
4217       return SDValue();         // Inverse operator isn't legal.
4218
4219     DoXform = true;
4220     InvCC = true;
4221   }
4222
4223   if (DoXform) {
4224     SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
4225     if (isSlctCC)
4226       return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
4227                              Slct.getOperand(0), Slct.getOperand(1), CC);
4228     SDValue CCOp = Slct.getOperand(0);
4229     if (InvCC)
4230       CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
4231                           CCOp.getOperand(0), CCOp.getOperand(1), CC);
4232     return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
4233                        CCOp, OtherOp, Result);
4234   }
4235   return SDValue();
4236 }
4237
4238 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
4239 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
4240 /// called with the default operands, and if that fails, with commuted
4241 /// operands.
4242 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
4243                                          TargetLowering::DAGCombinerInfo &DCI) {
4244   SelectionDAG &DAG = DCI.DAG;
4245
4246   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
4247   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
4248     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
4249     if (Result.getNode()) return Result;
4250   }
4251
4252   // fold (add (arm_neon_vabd a, b) c) -> (arm_neon_vaba c, a, b)
4253   EVT VT = N->getValueType(0);
4254   if (N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && VT.isInteger()) {
4255     unsigned IntNo = cast<ConstantSDNode>(N0.getOperand(0))->getZExtValue();
4256     if (IntNo == Intrinsic::arm_neon_vabds)
4257       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT,
4258                          DAG.getConstant(Intrinsic::arm_neon_vabas, MVT::i32),
4259                          N1, N0.getOperand(1), N0.getOperand(2));
4260     if (IntNo == Intrinsic::arm_neon_vabdu)
4261       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT,
4262                          DAG.getConstant(Intrinsic::arm_neon_vabau, MVT::i32),
4263                          N1, N0.getOperand(1), N0.getOperand(2));
4264   }
4265
4266   return SDValue();
4267 }
4268
4269 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
4270 ///
4271 static SDValue PerformADDCombine(SDNode *N,
4272                                  TargetLowering::DAGCombinerInfo &DCI) {
4273   SDValue N0 = N->getOperand(0);
4274   SDValue N1 = N->getOperand(1);
4275
4276   // First try with the default operand order.
4277   SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
4278   if (Result.getNode())
4279     return Result;
4280
4281   // If that didn't work, try again with the operands commuted.
4282   return PerformADDCombineWithOperands(N, N1, N0, DCI);
4283 }
4284
4285 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
4286 ///
4287 static SDValue PerformSUBCombine(SDNode *N,
4288                                  TargetLowering::DAGCombinerInfo &DCI) {
4289   SDValue N0 = N->getOperand(0);
4290   SDValue N1 = N->getOperand(1);
4291
4292   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
4293   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
4294     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
4295     if (Result.getNode()) return Result;
4296   }
4297
4298   return SDValue();
4299 }
4300
4301 static SDValue PerformMULCombine(SDNode *N,
4302                                  TargetLowering::DAGCombinerInfo &DCI,
4303                                  const ARMSubtarget *Subtarget) {
4304   SelectionDAG &DAG = DCI.DAG;
4305
4306   if (Subtarget->isThumb1Only())
4307     return SDValue();
4308
4309   if (DAG.getMachineFunction().
4310       getFunction()->hasFnAttr(Attribute::OptimizeForSize))
4311     return SDValue();
4312
4313   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
4314     return SDValue();
4315
4316   EVT VT = N->getValueType(0);
4317   if (VT != MVT::i32)
4318     return SDValue();
4319
4320   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
4321   if (!C)
4322     return SDValue();
4323
4324   uint64_t MulAmt = C->getZExtValue();
4325   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
4326   ShiftAmt = ShiftAmt & (32 - 1);
4327   SDValue V = N->getOperand(0);
4328   DebugLoc DL = N->getDebugLoc();
4329
4330   SDValue Res;
4331   MulAmt >>= ShiftAmt;
4332   if (isPowerOf2_32(MulAmt - 1)) {
4333     // (mul x, 2^N + 1) => (add (shl x, N), x)
4334     Res = DAG.getNode(ISD::ADD, DL, VT,
4335                       V, DAG.getNode(ISD::SHL, DL, VT,
4336                                      V, DAG.getConstant(Log2_32(MulAmt-1),
4337                                                         MVT::i32)));
4338   } else if (isPowerOf2_32(MulAmt + 1)) {
4339     // (mul x, 2^N - 1) => (sub (shl x, N), x)
4340     Res = DAG.getNode(ISD::SUB, DL, VT,
4341                       DAG.getNode(ISD::SHL, DL, VT,
4342                                   V, DAG.getConstant(Log2_32(MulAmt+1),
4343                                                      MVT::i32)),
4344                                                      V);
4345   } else
4346     return SDValue();
4347
4348   if (ShiftAmt != 0)
4349     Res = DAG.getNode(ISD::SHL, DL, VT, Res,
4350                       DAG.getConstant(ShiftAmt, MVT::i32));
4351
4352   // Do not add new nodes to DAG combiner worklist.
4353   DCI.CombineTo(N, Res, false);
4354   return SDValue();
4355 }
4356
4357 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
4358 static SDValue PerformORCombine(SDNode *N,
4359                                 TargetLowering::DAGCombinerInfo &DCI,
4360                                 const ARMSubtarget *Subtarget) {
4361   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
4362   // reasonable.
4363
4364   // BFI is only available on V6T2+
4365   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
4366     return SDValue();
4367
4368   SelectionDAG &DAG = DCI.DAG;
4369   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4370   DebugLoc DL = N->getDebugLoc();
4371   // 1) or (and A, mask), val => ARMbfi A, val, mask
4372   //      iff (val & mask) == val
4373   //
4374   // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4375   //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
4376   //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4377   //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
4378   //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
4379   //  (i.e., copy a bitfield value into another bitfield of the same width)
4380   if (N0.getOpcode() != ISD::AND)
4381     return SDValue();
4382
4383   EVT VT = N->getValueType(0);
4384   if (VT != MVT::i32)
4385     return SDValue();
4386
4387
4388   // The value and the mask need to be constants so we can verify this is
4389   // actually a bitfield set. If the mask is 0xffff, we can do better
4390   // via a movt instruction, so don't use BFI in that case.
4391   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4392   if (!C)
4393     return SDValue();
4394   unsigned Mask = C->getZExtValue();
4395   if (Mask == 0xffff)
4396     return SDValue();
4397   SDValue Res;
4398   // Case (1): or (and A, mask), val => ARMbfi A, val, mask
4399   if ((C = dyn_cast<ConstantSDNode>(N1))) {
4400     unsigned Val = C->getZExtValue();
4401     if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
4402       return SDValue();
4403     Val >>= CountTrailingZeros_32(~Mask);
4404
4405     Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
4406                       DAG.getConstant(Val, MVT::i32),
4407                       DAG.getConstant(Mask, MVT::i32));
4408
4409     // Do not add new nodes to DAG combiner worklist.
4410     DCI.CombineTo(N, Res, false);
4411   } else if (N1.getOpcode() == ISD::AND) {
4412     // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
4413     C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4414     if (!C)
4415       return SDValue();
4416     unsigned Mask2 = C->getZExtValue();
4417
4418     if (ARM::isBitFieldInvertedMask(Mask) &&
4419         ARM::isBitFieldInvertedMask(~Mask2) &&
4420         (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
4421       // The pack halfword instruction works better for masks that fit it,
4422       // so use that when it's available.
4423       if (Subtarget->hasT2ExtractPack() &&
4424           (Mask == 0xffff || Mask == 0xffff0000))
4425         return SDValue();
4426       // 2a
4427       unsigned lsb = CountTrailingZeros_32(Mask2);
4428       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
4429                         DAG.getConstant(lsb, MVT::i32));
4430       Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
4431                         DAG.getConstant(Mask, MVT::i32));
4432       // Do not add new nodes to DAG combiner worklist.
4433       DCI.CombineTo(N, Res, false);
4434     } else if (ARM::isBitFieldInvertedMask(~Mask) &&
4435                ARM::isBitFieldInvertedMask(Mask2) &&
4436                (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
4437       // The pack halfword instruction works better for masks that fit it,
4438       // so use that when it's available.
4439       if (Subtarget->hasT2ExtractPack() &&
4440           (Mask2 == 0xffff || Mask2 == 0xffff0000))
4441         return SDValue();
4442       // 2b
4443       unsigned lsb = CountTrailingZeros_32(Mask);
4444       Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4445                         DAG.getConstant(lsb, MVT::i32));
4446       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
4447                                 DAG.getConstant(Mask2, MVT::i32));
4448       // Do not add new nodes to DAG combiner worklist.
4449       DCI.CombineTo(N, Res, false);
4450     }
4451   }
4452
4453   return SDValue();
4454 }
4455
4456 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
4457 /// ARMISD::VMOVRRD.
4458 static SDValue PerformVMOVRRDCombine(SDNode *N,
4459                                    TargetLowering::DAGCombinerInfo &DCI) {
4460   // fmrrd(fmdrr x, y) -> x,y
4461   SDValue InDouble = N->getOperand(0);
4462   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
4463     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
4464   return SDValue();
4465 }
4466
4467 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
4468 /// ARMISD::VDUPLANE.
4469 static SDValue PerformVDUPLANECombine(SDNode *N,
4470                                       TargetLowering::DAGCombinerInfo &DCI) {
4471   // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
4472   // redundant.
4473   SDValue Op = N->getOperand(0);
4474   EVT VT = N->getValueType(0);
4475
4476   // Ignore bit_converts.
4477   while (Op.getOpcode() == ISD::BIT_CONVERT)
4478     Op = Op.getOperand(0);
4479   if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
4480     return SDValue();
4481
4482   // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
4483   unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
4484   // The canonical VMOV for a zero vector uses a 32-bit element size.
4485   unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4486   unsigned EltBits;
4487   if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
4488     EltSize = 8;
4489   if (EltSize > VT.getVectorElementType().getSizeInBits())
4490     return SDValue();
4491
4492   SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
4493   return DCI.CombineTo(N, Res, false);
4494 }
4495
4496 /// getVShiftImm - Check if this is a valid build_vector for the immediate
4497 /// operand of a vector shift operation, where all the elements of the
4498 /// build_vector must have the same constant integer value.
4499 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
4500   // Ignore bit_converts.
4501   while (Op.getOpcode() == ISD::BIT_CONVERT)
4502     Op = Op.getOperand(0);
4503   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4504   APInt SplatBits, SplatUndef;
4505   unsigned SplatBitSize;
4506   bool HasAnyUndefs;
4507   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
4508                                       HasAnyUndefs, ElementBits) ||
4509       SplatBitSize > ElementBits)
4510     return false;
4511   Cnt = SplatBits.getSExtValue();
4512   return true;
4513 }
4514
4515 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
4516 /// operand of a vector shift left operation.  That value must be in the range:
4517 ///   0 <= Value < ElementBits for a left shift; or
4518 ///   0 <= Value <= ElementBits for a long left shift.
4519 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
4520   assert(VT.isVector() && "vector shift count is not a vector type");
4521   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4522   if (! getVShiftImm(Op, ElementBits, Cnt))
4523     return false;
4524   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
4525 }
4526
4527 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
4528 /// operand of a vector shift right operation.  For a shift opcode, the value
4529 /// is positive, but for an intrinsic the value count must be negative. The
4530 /// absolute value must be in the range:
4531 ///   1 <= |Value| <= ElementBits for a right shift; or
4532 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
4533 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
4534                          int64_t &Cnt) {
4535   assert(VT.isVector() && "vector shift count is not a vector type");
4536   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
4537   if (! getVShiftImm(Op, ElementBits, Cnt))
4538     return false;
4539   if (isIntrinsic)
4540     Cnt = -Cnt;
4541   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
4542 }
4543
4544 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
4545 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
4546   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
4547   switch (IntNo) {
4548   default:
4549     // Don't do anything for most intrinsics.
4550     break;
4551
4552   // Vector shifts: check for immediate versions and lower them.
4553   // Note: This is done during DAG combining instead of DAG legalizing because
4554   // the build_vectors for 64-bit vector element shift counts are generally
4555   // not legal, and it is hard to see their values after they get legalized to
4556   // loads from a constant pool.
4557   case Intrinsic::arm_neon_vshifts:
4558   case Intrinsic::arm_neon_vshiftu:
4559   case Intrinsic::arm_neon_vshiftls:
4560   case Intrinsic::arm_neon_vshiftlu:
4561   case Intrinsic::arm_neon_vshiftn:
4562   case Intrinsic::arm_neon_vrshifts:
4563   case Intrinsic::arm_neon_vrshiftu:
4564   case Intrinsic::arm_neon_vrshiftn:
4565   case Intrinsic::arm_neon_vqshifts:
4566   case Intrinsic::arm_neon_vqshiftu:
4567   case Intrinsic::arm_neon_vqshiftsu:
4568   case Intrinsic::arm_neon_vqshiftns:
4569   case Intrinsic::arm_neon_vqshiftnu:
4570   case Intrinsic::arm_neon_vqshiftnsu:
4571   case Intrinsic::arm_neon_vqrshiftns:
4572   case Intrinsic::arm_neon_vqrshiftnu:
4573   case Intrinsic::arm_neon_vqrshiftnsu: {
4574     EVT VT = N->getOperand(1).getValueType();
4575     int64_t Cnt;
4576     unsigned VShiftOpc = 0;
4577
4578     switch (IntNo) {
4579     case Intrinsic::arm_neon_vshifts:
4580     case Intrinsic::arm_neon_vshiftu:
4581       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
4582         VShiftOpc = ARMISD::VSHL;
4583         break;
4584       }
4585       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
4586         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
4587                      ARMISD::VSHRs : ARMISD::VSHRu);
4588         break;
4589       }
4590       return SDValue();
4591
4592     case Intrinsic::arm_neon_vshiftls:
4593     case Intrinsic::arm_neon_vshiftlu:
4594       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
4595         break;
4596       llvm_unreachable("invalid shift count for vshll intrinsic");
4597
4598     case Intrinsic::arm_neon_vrshifts:
4599     case Intrinsic::arm_neon_vrshiftu:
4600       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
4601         break;
4602       return SDValue();
4603
4604     case Intrinsic::arm_neon_vqshifts:
4605     case Intrinsic::arm_neon_vqshiftu:
4606       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4607         break;
4608       return SDValue();
4609
4610     case Intrinsic::arm_neon_vqshiftsu:
4611       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
4612         break;
4613       llvm_unreachable("invalid shift count for vqshlu intrinsic");
4614
4615     case Intrinsic::arm_neon_vshiftn:
4616     case Intrinsic::arm_neon_vrshiftn:
4617     case Intrinsic::arm_neon_vqshiftns:
4618     case Intrinsic::arm_neon_vqshiftnu:
4619     case Intrinsic::arm_neon_vqshiftnsu:
4620     case Intrinsic::arm_neon_vqrshiftns:
4621     case Intrinsic::arm_neon_vqrshiftnu:
4622     case Intrinsic::arm_neon_vqrshiftnsu:
4623       // Narrowing shifts require an immediate right shift.
4624       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
4625         break;
4626       llvm_unreachable("invalid shift count for narrowing vector shift "
4627                        "intrinsic");
4628
4629     default:
4630       llvm_unreachable("unhandled vector shift");
4631     }
4632
4633     switch (IntNo) {
4634     case Intrinsic::arm_neon_vshifts:
4635     case Intrinsic::arm_neon_vshiftu:
4636       // Opcode already set above.
4637       break;
4638     case Intrinsic::arm_neon_vshiftls:
4639     case Intrinsic::arm_neon_vshiftlu:
4640       if (Cnt == VT.getVectorElementType().getSizeInBits())
4641         VShiftOpc = ARMISD::VSHLLi;
4642       else
4643         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
4644                      ARMISD::VSHLLs : ARMISD::VSHLLu);
4645       break;
4646     case Intrinsic::arm_neon_vshiftn:
4647       VShiftOpc = ARMISD::VSHRN; break;
4648     case Intrinsic::arm_neon_vrshifts:
4649       VShiftOpc = ARMISD::VRSHRs; break;
4650     case Intrinsic::arm_neon_vrshiftu:
4651       VShiftOpc = ARMISD::VRSHRu; break;
4652     case Intrinsic::arm_neon_vrshiftn:
4653       VShiftOpc = ARMISD::VRSHRN; break;
4654     case Intrinsic::arm_neon_vqshifts:
4655       VShiftOpc = ARMISD::VQSHLs; break;
4656     case Intrinsic::arm_neon_vqshiftu:
4657       VShiftOpc = ARMISD::VQSHLu; break;
4658     case Intrinsic::arm_neon_vqshiftsu:
4659       VShiftOpc = ARMISD::VQSHLsu; break;
4660     case Intrinsic::arm_neon_vqshiftns:
4661       VShiftOpc = ARMISD::VQSHRNs; break;
4662     case Intrinsic::arm_neon_vqshiftnu:
4663       VShiftOpc = ARMISD::VQSHRNu; break;
4664     case Intrinsic::arm_neon_vqshiftnsu:
4665       VShiftOpc = ARMISD::VQSHRNsu; break;
4666     case Intrinsic::arm_neon_vqrshiftns:
4667       VShiftOpc = ARMISD::VQRSHRNs; break;
4668     case Intrinsic::arm_neon_vqrshiftnu:
4669       VShiftOpc = ARMISD::VQRSHRNu; break;
4670     case Intrinsic::arm_neon_vqrshiftnsu:
4671       VShiftOpc = ARMISD::VQRSHRNsu; break;
4672     }
4673
4674     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4675                        N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
4676   }
4677
4678   case Intrinsic::arm_neon_vshiftins: {
4679     EVT VT = N->getOperand(1).getValueType();
4680     int64_t Cnt;
4681     unsigned VShiftOpc = 0;
4682
4683     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
4684       VShiftOpc = ARMISD::VSLI;
4685     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
4686       VShiftOpc = ARMISD::VSRI;
4687     else {
4688       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
4689     }
4690
4691     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
4692                        N->getOperand(1), N->getOperand(2),
4693                        DAG.getConstant(Cnt, MVT::i32));
4694   }
4695
4696   case Intrinsic::arm_neon_vqrshifts:
4697   case Intrinsic::arm_neon_vqrshiftu:
4698     // No immediate versions of these to check for.
4699     break;
4700   }
4701
4702   return SDValue();
4703 }
4704
4705 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
4706 /// lowers them.  As with the vector shift intrinsics, this is done during DAG
4707 /// combining instead of DAG legalizing because the build_vectors for 64-bit
4708 /// vector element shift counts are generally not legal, and it is hard to see
4709 /// their values after they get legalized to loads from a constant pool.
4710 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
4711                                    const ARMSubtarget *ST) {
4712   EVT VT = N->getValueType(0);
4713
4714   // Nothing to be done for scalar shifts.
4715   if (! VT.isVector())
4716     return SDValue();
4717
4718   assert(ST->hasNEON() && "unexpected vector shift");
4719   int64_t Cnt;
4720
4721   switch (N->getOpcode()) {
4722   default: llvm_unreachable("unexpected shift opcode");
4723
4724   case ISD::SHL:
4725     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
4726       return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
4727                          DAG.getConstant(Cnt, MVT::i32));
4728     break;
4729
4730   case ISD::SRA:
4731   case ISD::SRL:
4732     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
4733       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
4734                             ARMISD::VSHRs : ARMISD::VSHRu);
4735       return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
4736                          DAG.getConstant(Cnt, MVT::i32));
4737     }
4738   }
4739   return SDValue();
4740 }
4741
4742 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
4743 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
4744 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
4745                                     const ARMSubtarget *ST) {
4746   SDValue N0 = N->getOperand(0);
4747
4748   // Check for sign- and zero-extensions of vector extract operations of 8-
4749   // and 16-bit vector elements.  NEON supports these directly.  They are
4750   // handled during DAG combining because type legalization will promote them
4751   // to 32-bit types and it is messy to recognize the operations after that.
4752   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4753     SDValue Vec = N0.getOperand(0);
4754     SDValue Lane = N0.getOperand(1);
4755     EVT VT = N->getValueType(0);
4756     EVT EltVT = N0.getValueType();
4757     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4758
4759     if (VT == MVT::i32 &&
4760         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
4761         TLI.isTypeLegal(Vec.getValueType())) {
4762
4763       unsigned Opc = 0;
4764       switch (N->getOpcode()) {
4765       default: llvm_unreachable("unexpected opcode");
4766       case ISD::SIGN_EXTEND:
4767         Opc = ARMISD::VGETLANEs;
4768         break;
4769       case ISD::ZERO_EXTEND:
4770       case ISD::ANY_EXTEND:
4771         Opc = ARMISD::VGETLANEu;
4772         break;
4773       }
4774       return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
4775     }
4776   }
4777
4778   return SDValue();
4779 }
4780
4781 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
4782 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
4783 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
4784                                        const ARMSubtarget *ST) {
4785   // If the target supports NEON, try to use vmax/vmin instructions for f32
4786   // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
4787   // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
4788   // a NaN; only do the transformation when it matches that behavior.
4789
4790   // For now only do this when using NEON for FP operations; if using VFP, it
4791   // is not obvious that the benefit outweighs the cost of switching to the
4792   // NEON pipeline.
4793   if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
4794       N->getValueType(0) != MVT::f32)
4795     return SDValue();
4796
4797   SDValue CondLHS = N->getOperand(0);
4798   SDValue CondRHS = N->getOperand(1);
4799   SDValue LHS = N->getOperand(2);
4800   SDValue RHS = N->getOperand(3);
4801   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4802
4803   unsigned Opcode = 0;
4804   bool IsReversed;
4805   if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
4806     IsReversed = false; // x CC y ? x : y
4807   } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
4808     IsReversed = true ; // x CC y ? y : x
4809   } else {
4810     return SDValue();
4811   }
4812
4813   bool IsUnordered;
4814   switch (CC) {
4815   default: break;
4816   case ISD::SETOLT:
4817   case ISD::SETOLE:
4818   case ISD::SETLT:
4819   case ISD::SETLE:
4820   case ISD::SETULT:
4821   case ISD::SETULE:
4822     // If LHS is NaN, an ordered comparison will be false and the result will
4823     // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
4824     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4825     IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4826     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4827       break;
4828     // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4829     // will return -0, so vmin can only be used for unsafe math or if one of
4830     // the operands is known to be nonzero.
4831     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4832         !UnsafeFPMath &&
4833         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4834       break;
4835     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4836     break;
4837
4838   case ISD::SETOGT:
4839   case ISD::SETOGE:
4840   case ISD::SETGT:
4841   case ISD::SETGE:
4842   case ISD::SETUGT:
4843   case ISD::SETUGE:
4844     // If LHS is NaN, an ordered comparison will be false and the result will
4845     // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
4846     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
4847     IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4848     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4849       break;
4850     // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4851     // will return +0, so vmax can only be used for unsafe math or if one of
4852     // the operands is known to be nonzero.
4853     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4854         !UnsafeFPMath &&
4855         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4856       break;
4857     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4858     break;
4859   }
4860
4861   if (!Opcode)
4862     return SDValue();
4863   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4864 }
4865
4866 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4867                                              DAGCombinerInfo &DCI) const {
4868   switch (N->getOpcode()) {
4869   default: break;
4870   case ISD::ADD:        return PerformADDCombine(N, DCI);
4871   case ISD::SUB:        return PerformSUBCombine(N, DCI);
4872   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
4873   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
4874   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4875   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
4876   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4877   case ISD::SHL:
4878   case ISD::SRA:
4879   case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
4880   case ISD::SIGN_EXTEND:
4881   case ISD::ZERO_EXTEND:
4882   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4883   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4884   }
4885   return SDValue();
4886 }
4887
4888 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4889   if (!Subtarget->hasV6Ops())
4890     // Pre-v6 does not support unaligned mem access.
4891     return false;
4892
4893   // v6+ may or may not support unaligned mem access depending on the system
4894   // configuration.
4895   // FIXME: This is pretty conservative. Should we provide cmdline option to
4896   // control the behaviour?
4897   if (!Subtarget->isTargetDarwin())
4898     return false;
4899
4900   switch (VT.getSimpleVT().SimpleTy) {
4901   default:
4902     return false;
4903   case MVT::i8:
4904   case MVT::i16:
4905   case MVT::i32:
4906     return true;
4907   // FIXME: VLD1 etc with standard alignment is legal.
4908   }
4909 }
4910
4911 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4912   if (V < 0)
4913     return false;
4914
4915   unsigned Scale = 1;
4916   switch (VT.getSimpleVT().SimpleTy) {
4917   default: return false;
4918   case MVT::i1:
4919   case MVT::i8:
4920     // Scale == 1;
4921     break;
4922   case MVT::i16:
4923     // Scale == 2;
4924     Scale = 2;
4925     break;
4926   case MVT::i32:
4927     // Scale == 4;
4928     Scale = 4;
4929     break;
4930   }
4931
4932   if ((V & (Scale - 1)) != 0)
4933     return false;
4934   V /= Scale;
4935   return V == (V & ((1LL << 5) - 1));
4936 }
4937
4938 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4939                                       const ARMSubtarget *Subtarget) {
4940   bool isNeg = false;
4941   if (V < 0) {
4942     isNeg = true;
4943     V = - V;
4944   }
4945
4946   switch (VT.getSimpleVT().SimpleTy) {
4947   default: return false;
4948   case MVT::i1:
4949   case MVT::i8:
4950   case MVT::i16:
4951   case MVT::i32:
4952     // + imm12 or - imm8
4953     if (isNeg)
4954       return V == (V & ((1LL << 8) - 1));
4955     return V == (V & ((1LL << 12) - 1));
4956   case MVT::f32:
4957   case MVT::f64:
4958     // Same as ARM mode. FIXME: NEON?
4959     if (!Subtarget->hasVFP2())
4960       return false;
4961     if ((V & 3) != 0)
4962       return false;
4963     V >>= 2;
4964     return V == (V & ((1LL << 8) - 1));
4965   }
4966 }
4967
4968 /// isLegalAddressImmediate - Return true if the integer value can be used
4969 /// as the offset of the target addressing mode for load / store of the
4970 /// given type.
4971 static bool isLegalAddressImmediate(int64_t V, EVT VT,
4972                                     const ARMSubtarget *Subtarget) {
4973   if (V == 0)
4974     return true;
4975
4976   if (!VT.isSimple())
4977     return false;
4978
4979   if (Subtarget->isThumb1Only())
4980     return isLegalT1AddressImmediate(V, VT);
4981   else if (Subtarget->isThumb2())
4982     return isLegalT2AddressImmediate(V, VT, Subtarget);
4983
4984   // ARM mode.
4985   if (V < 0)
4986     V = - V;
4987   switch (VT.getSimpleVT().SimpleTy) {
4988   default: return false;
4989   case MVT::i1:
4990   case MVT::i8:
4991   case MVT::i32:
4992     // +- imm12
4993     return V == (V & ((1LL << 12) - 1));
4994   case MVT::i16:
4995     // +- imm8
4996     return V == (V & ((1LL << 8) - 1));
4997   case MVT::f32:
4998   case MVT::f64:
4999     if (!Subtarget->hasVFP2()) // FIXME: NEON?
5000       return false;
5001     if ((V & 3) != 0)
5002       return false;
5003     V >>= 2;
5004     return V == (V & ((1LL << 8) - 1));
5005   }
5006 }
5007
5008 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
5009                                                       EVT VT) const {
5010   int Scale = AM.Scale;
5011   if (Scale < 0)
5012     return false;
5013
5014   switch (VT.getSimpleVT().SimpleTy) {
5015   default: return false;
5016   case MVT::i1:
5017   case MVT::i8:
5018   case MVT::i16:
5019   case MVT::i32:
5020     if (Scale == 1)
5021       return true;
5022     // r + r << imm
5023     Scale = Scale & ~1;
5024     return Scale == 2 || Scale == 4 || Scale == 8;
5025   case MVT::i64:
5026     // r + r
5027     if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5028       return true;
5029     return false;
5030   case MVT::isVoid:
5031     // Note, we allow "void" uses (basically, uses that aren't loads or
5032     // stores), because arm allows folding a scale into many arithmetic
5033     // operations.  This should be made more precise and revisited later.
5034
5035     // Allow r << imm, but the imm has to be a multiple of two.
5036     if (Scale & 1) return false;
5037     return isPowerOf2_32(Scale);
5038   }
5039 }
5040
5041 /// isLegalAddressingMode - Return true if the addressing mode represented
5042 /// by AM is legal for this target, for a load/store of the specified type.
5043 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5044                                               const Type *Ty) const {
5045   EVT VT = getValueType(Ty, true);
5046   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
5047     return false;
5048
5049   // Can never fold addr of global into load/store.
5050   if (AM.BaseGV)
5051     return false;
5052
5053   switch (AM.Scale) {
5054   case 0:  // no scale reg, must be "r+i" or "r", or "i".
5055     break;
5056   case 1:
5057     if (Subtarget->isThumb1Only())
5058       return false;
5059     // FALL THROUGH.
5060   default:
5061     // ARM doesn't support any R+R*scale+imm addr modes.
5062     if (AM.BaseOffs)
5063       return false;
5064
5065     if (!VT.isSimple())
5066       return false;
5067
5068     if (Subtarget->isThumb2())
5069       return isLegalT2ScaledAddressingMode(AM, VT);
5070
5071     int Scale = AM.Scale;
5072     switch (VT.getSimpleVT().SimpleTy) {
5073     default: return false;
5074     case MVT::i1:
5075     case MVT::i8:
5076     case MVT::i32:
5077       if (Scale < 0) Scale = -Scale;
5078       if (Scale == 1)
5079         return true;
5080       // r + r << imm
5081       return isPowerOf2_32(Scale & ~1);
5082     case MVT::i16:
5083     case MVT::i64:
5084       // r + r
5085       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
5086         return true;
5087       return false;
5088
5089     case MVT::isVoid:
5090       // Note, we allow "void" uses (basically, uses that aren't loads or
5091       // stores), because arm allows folding a scale into many arithmetic
5092       // operations.  This should be made more precise and revisited later.
5093
5094       // Allow r << imm, but the imm has to be a multiple of two.
5095       if (Scale & 1) return false;
5096       return isPowerOf2_32(Scale);
5097     }
5098     break;
5099   }
5100   return true;
5101 }
5102
5103 /// isLegalICmpImmediate - Return true if the specified immediate is legal
5104 /// icmp immediate, that is the target has icmp instructions which can compare
5105 /// a register against the immediate without having to materialize the
5106 /// immediate into a register.
5107 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
5108   if (!Subtarget->isThumb())
5109     return ARM_AM::getSOImmVal(Imm) != -1;
5110   if (Subtarget->isThumb2())
5111     return ARM_AM::getT2SOImmVal(Imm) != -1;
5112   return Imm >= 0 && Imm <= 255;
5113 }
5114
5115 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
5116                                       bool isSEXTLoad, SDValue &Base,
5117                                       SDValue &Offset, bool &isInc,
5118                                       SelectionDAG &DAG) {
5119   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5120     return false;
5121
5122   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
5123     // AddressingMode 3
5124     Base = Ptr->getOperand(0);
5125     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5126       int RHSC = (int)RHS->getZExtValue();
5127       if (RHSC < 0 && RHSC > -256) {
5128         assert(Ptr->getOpcode() == ISD::ADD);
5129         isInc = false;
5130         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5131         return true;
5132       }
5133     }
5134     isInc = (Ptr->getOpcode() == ISD::ADD);
5135     Offset = Ptr->getOperand(1);
5136     return true;
5137   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
5138     // AddressingMode 2
5139     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5140       int RHSC = (int)RHS->getZExtValue();
5141       if (RHSC < 0 && RHSC > -0x1000) {
5142         assert(Ptr->getOpcode() == ISD::ADD);
5143         isInc = false;
5144         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5145         Base = Ptr->getOperand(0);
5146         return true;
5147       }
5148     }
5149
5150     if (Ptr->getOpcode() == ISD::ADD) {
5151       isInc = true;
5152       ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
5153       if (ShOpcVal != ARM_AM::no_shift) {
5154         Base = Ptr->getOperand(1);
5155         Offset = Ptr->getOperand(0);
5156       } else {
5157         Base = Ptr->getOperand(0);
5158         Offset = Ptr->getOperand(1);
5159       }
5160       return true;
5161     }
5162
5163     isInc = (Ptr->getOpcode() == ISD::ADD);
5164     Base = Ptr->getOperand(0);
5165     Offset = Ptr->getOperand(1);
5166     return true;
5167   }
5168
5169   // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
5170   return false;
5171 }
5172
5173 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
5174                                      bool isSEXTLoad, SDValue &Base,
5175                                      SDValue &Offset, bool &isInc,
5176                                      SelectionDAG &DAG) {
5177   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
5178     return false;
5179
5180   Base = Ptr->getOperand(0);
5181   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
5182     int RHSC = (int)RHS->getZExtValue();
5183     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
5184       assert(Ptr->getOpcode() == ISD::ADD);
5185       isInc = false;
5186       Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
5187       return true;
5188     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
5189       isInc = Ptr->getOpcode() == ISD::ADD;
5190       Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
5191       return true;
5192     }
5193   }
5194
5195   return false;
5196 }
5197
5198 /// getPreIndexedAddressParts - returns true by value, base pointer and
5199 /// offset pointer and addressing mode by reference if the node's address
5200 /// can be legally represented as pre-indexed load / store address.
5201 bool
5202 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
5203                                              SDValue &Offset,
5204                                              ISD::MemIndexedMode &AM,
5205                                              SelectionDAG &DAG) const {
5206   if (Subtarget->isThumb1Only())
5207     return false;
5208
5209   EVT VT;
5210   SDValue Ptr;
5211   bool isSEXTLoad = false;
5212   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5213     Ptr = LD->getBasePtr();
5214     VT  = LD->getMemoryVT();
5215     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5216   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5217     Ptr = ST->getBasePtr();
5218     VT  = ST->getMemoryVT();
5219   } else
5220     return false;
5221
5222   bool isInc;
5223   bool isLegal = false;
5224   if (Subtarget->isThumb2())
5225     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5226                                        Offset, isInc, DAG);
5227   else
5228     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
5229                                         Offset, isInc, DAG);
5230   if (!isLegal)
5231     return false;
5232
5233   AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
5234   return true;
5235 }
5236
5237 /// getPostIndexedAddressParts - returns true by value, base pointer and
5238 /// offset pointer and addressing mode by reference if this node can be
5239 /// combined with a load / store to form a post-indexed load / store.
5240 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
5241                                                    SDValue &Base,
5242                                                    SDValue &Offset,
5243                                                    ISD::MemIndexedMode &AM,
5244                                                    SelectionDAG &DAG) const {
5245   if (Subtarget->isThumb1Only())
5246     return false;
5247
5248   EVT VT;
5249   SDValue Ptr;
5250   bool isSEXTLoad = false;
5251   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
5252     VT  = LD->getMemoryVT();
5253     Ptr = LD->getBasePtr();
5254     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
5255   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
5256     VT  = ST->getMemoryVT();
5257     Ptr = ST->getBasePtr();
5258   } else
5259     return false;
5260
5261   bool isInc;
5262   bool isLegal = false;
5263   if (Subtarget->isThumb2())
5264     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5265                                        isInc, DAG);
5266   else
5267     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
5268                                         isInc, DAG);
5269   if (!isLegal)
5270     return false;
5271
5272   if (Ptr != Base) {
5273     // Swap base ptr and offset to catch more post-index load / store when
5274     // it's legal. In Thumb2 mode, offset must be an immediate.
5275     if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
5276         !Subtarget->isThumb2())
5277       std::swap(Base, Offset);
5278
5279     // Post-indexed load / store update the base pointer.
5280     if (Ptr != Base)
5281       return false;
5282   }
5283
5284   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
5285   return true;
5286 }
5287
5288 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5289                                                        const APInt &Mask,
5290                                                        APInt &KnownZero,
5291                                                        APInt &KnownOne,
5292                                                        const SelectionDAG &DAG,
5293                                                        unsigned Depth) const {
5294   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
5295   switch (Op.getOpcode()) {
5296   default: break;
5297   case ARMISD::CMOV: {
5298     // Bits are known zero/one if known on the LHS and RHS.
5299     DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
5300     if (KnownZero == 0 && KnownOne == 0) return;
5301
5302     APInt KnownZeroRHS, KnownOneRHS;
5303     DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
5304                           KnownZeroRHS, KnownOneRHS, Depth+1);
5305     KnownZero &= KnownZeroRHS;
5306     KnownOne  &= KnownOneRHS;
5307     return;
5308   }
5309   }
5310 }
5311
5312 //===----------------------------------------------------------------------===//
5313 //                           ARM Inline Assembly Support
5314 //===----------------------------------------------------------------------===//
5315
5316 /// getConstraintType - Given a constraint letter, return the type of
5317 /// constraint it is for this target.
5318 ARMTargetLowering::ConstraintType
5319 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
5320   if (Constraint.size() == 1) {
5321     switch (Constraint[0]) {
5322     default:  break;
5323     case 'l': return C_RegisterClass;
5324     case 'w': return C_RegisterClass;
5325     }
5326   }
5327   return TargetLowering::getConstraintType(Constraint);
5328 }
5329
5330 std::pair<unsigned, const TargetRegisterClass*>
5331 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5332                                                 EVT VT) const {
5333   if (Constraint.size() == 1) {
5334     // GCC ARM Constraint Letters
5335     switch (Constraint[0]) {
5336     case 'l':
5337       if (Subtarget->isThumb())
5338         return std::make_pair(0U, ARM::tGPRRegisterClass);
5339       else
5340         return std::make_pair(0U, ARM::GPRRegisterClass);
5341     case 'r':
5342       return std::make_pair(0U, ARM::GPRRegisterClass);
5343     case 'w':
5344       if (VT == MVT::f32)
5345         return std::make_pair(0U, ARM::SPRRegisterClass);
5346       if (VT.getSizeInBits() == 64)
5347         return std::make_pair(0U, ARM::DPRRegisterClass);
5348       if (VT.getSizeInBits() == 128)
5349         return std::make_pair(0U, ARM::QPRRegisterClass);
5350       break;
5351     }
5352   }
5353   if (StringRef("{cc}").equals_lower(Constraint))
5354     return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
5355
5356   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5357 }
5358
5359 std::vector<unsigned> ARMTargetLowering::
5360 getRegClassForInlineAsmConstraint(const std::string &Constraint,
5361                                   EVT VT) const {
5362   if (Constraint.size() != 1)
5363     return std::vector<unsigned>();
5364
5365   switch (Constraint[0]) {      // GCC ARM Constraint Letters
5366   default: break;
5367   case 'l':
5368     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5369                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5370                                  0);
5371   case 'r':
5372     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
5373                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
5374                                  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
5375                                  ARM::R12, ARM::LR, 0);
5376   case 'w':
5377     if (VT == MVT::f32)
5378       return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
5379                                    ARM::S4, ARM::S5, ARM::S6, ARM::S7,
5380                                    ARM::S8, ARM::S9, ARM::S10, ARM::S11,
5381                                    ARM::S12,ARM::S13,ARM::S14,ARM::S15,
5382                                    ARM::S16,ARM::S17,ARM::S18,ARM::S19,
5383                                    ARM::S20,ARM::S21,ARM::S22,ARM::S23,
5384                                    ARM::S24,ARM::S25,ARM::S26,ARM::S27,
5385                                    ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
5386     if (VT.getSizeInBits() == 64)
5387       return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
5388                                    ARM::D4, ARM::D5, ARM::D6, ARM::D7,
5389                                    ARM::D8, ARM::D9, ARM::D10,ARM::D11,
5390                                    ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
5391     if (VT.getSizeInBits() == 128)
5392       return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
5393                                    ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
5394       break;
5395   }
5396
5397   return std::vector<unsigned>();
5398 }
5399
5400 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5401 /// vector.  If it is invalid, don't add anything to Ops.
5402 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5403                                                      char Constraint,
5404                                                      std::vector<SDValue>&Ops,
5405                                                      SelectionDAG &DAG) const {
5406   SDValue Result(0, 0);
5407
5408   switch (Constraint) {
5409   default: break;
5410   case 'I': case 'J': case 'K': case 'L':
5411   case 'M': case 'N': case 'O':
5412     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5413     if (!C)
5414       return;
5415
5416     int64_t CVal64 = C->getSExtValue();
5417     int CVal = (int) CVal64;
5418     // None of these constraints allow values larger than 32 bits.  Check
5419     // that the value fits in an int.
5420     if (CVal != CVal64)
5421       return;
5422
5423     switch (Constraint) {
5424       case 'I':
5425         if (Subtarget->isThumb1Only()) {
5426           // This must be a constant between 0 and 255, for ADD
5427           // immediates.
5428           if (CVal >= 0 && CVal <= 255)
5429             break;
5430         } else if (Subtarget->isThumb2()) {
5431           // A constant that can be used as an immediate value in a
5432           // data-processing instruction.
5433           if (ARM_AM::getT2SOImmVal(CVal) != -1)
5434             break;
5435         } else {
5436           // A constant that can be used as an immediate value in a
5437           // data-processing instruction.
5438           if (ARM_AM::getSOImmVal(CVal) != -1)
5439             break;
5440         }
5441         return;
5442
5443       case 'J':
5444         if (Subtarget->isThumb()) {  // FIXME thumb2
5445           // This must be a constant between -255 and -1, for negated ADD
5446           // immediates. This can be used in GCC with an "n" modifier that
5447           // prints the negated value, for use with SUB instructions. It is
5448           // not useful otherwise but is implemented for compatibility.
5449           if (CVal >= -255 && CVal <= -1)
5450             break;
5451         } else {
5452           // This must be a constant between -4095 and 4095. It is not clear
5453           // what this constraint is intended for. Implemented for
5454           // compatibility with GCC.
5455           if (CVal >= -4095 && CVal <= 4095)
5456             break;
5457         }
5458         return;
5459
5460       case 'K':
5461         if (Subtarget->isThumb1Only()) {
5462           // A 32-bit value where only one byte has a nonzero value. Exclude
5463           // zero to match GCC. This constraint is used by GCC internally for
5464           // constants that can be loaded with a move/shift combination.
5465           // It is not useful otherwise but is implemented for compatibility.
5466           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
5467             break;
5468         } else if (Subtarget->isThumb2()) {
5469           // A constant whose bitwise inverse can be used as an immediate
5470           // value in a data-processing instruction. This can be used in GCC
5471           // with a "B" modifier that prints the inverted value, for use with
5472           // BIC and MVN instructions. It is not useful otherwise but is
5473           // implemented for compatibility.
5474           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
5475             break;
5476         } else {
5477           // A constant whose bitwise inverse can be used as an immediate
5478           // value in a data-processing instruction. This can be used in GCC
5479           // with a "B" modifier that prints the inverted value, for use with
5480           // BIC and MVN instructions. It is not useful otherwise but is
5481           // implemented for compatibility.
5482           if (ARM_AM::getSOImmVal(~CVal) != -1)
5483             break;
5484         }
5485         return;
5486
5487       case 'L':
5488         if (Subtarget->isThumb1Only()) {
5489           // This must be a constant between -7 and 7,
5490           // for 3-operand ADD/SUB immediate instructions.
5491           if (CVal >= -7 && CVal < 7)
5492             break;
5493         } else if (Subtarget->isThumb2()) {
5494           // A constant whose negation can be used as an immediate value in a
5495           // data-processing instruction. This can be used in GCC with an "n"
5496           // modifier that prints the negated value, for use with SUB
5497           // instructions. It is not useful otherwise but is implemented for
5498           // compatibility.
5499           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
5500             break;
5501         } else {
5502           // A constant whose negation can be used as an immediate value in a
5503           // data-processing instruction. This can be used in GCC with an "n"
5504           // modifier that prints the negated value, for use with SUB
5505           // instructions. It is not useful otherwise but is implemented for
5506           // compatibility.
5507           if (ARM_AM::getSOImmVal(-CVal) != -1)
5508             break;
5509         }
5510         return;
5511
5512       case 'M':
5513         if (Subtarget->isThumb()) { // FIXME thumb2
5514           // This must be a multiple of 4 between 0 and 1020, for
5515           // ADD sp + immediate.
5516           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
5517             break;
5518         } else {
5519           // A power of two or a constant between 0 and 32.  This is used in
5520           // GCC for the shift amount on shifted register operands, but it is
5521           // useful in general for any shift amounts.
5522           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
5523             break;
5524         }
5525         return;
5526
5527       case 'N':
5528         if (Subtarget->isThumb()) {  // FIXME thumb2
5529           // This must be a constant between 0 and 31, for shift amounts.
5530           if (CVal >= 0 && CVal <= 31)
5531             break;
5532         }
5533         return;
5534
5535       case 'O':
5536         if (Subtarget->isThumb()) {  // FIXME thumb2
5537           // This must be a multiple of 4 between -508 and 508, for
5538           // ADD/SUB sp = sp + immediate.
5539           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
5540             break;
5541         }
5542         return;
5543     }
5544     Result = DAG.getTargetConstant(CVal, Op.getValueType());
5545     break;
5546   }
5547
5548   if (Result.getNode()) {
5549     Ops.push_back(Result);
5550     return;
5551   }
5552   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5553 }
5554
5555 bool
5556 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5557   // The ARM target isn't yet aware of offsets.
5558   return false;
5559 }
5560
5561 int ARM::getVFPf32Imm(const APFloat &FPImm) {
5562   APInt Imm = FPImm.bitcastToAPInt();
5563   uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
5564   int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
5565   int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
5566
5567   // We can handle 4 bits of mantissa.
5568   // mantissa = (16+UInt(e:f:g:h))/16.
5569   if (Mantissa & 0x7ffff)
5570     return -1;
5571   Mantissa >>= 19;
5572   if ((Mantissa & 0xf) != Mantissa)
5573     return -1;
5574
5575   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5576   if (Exp < -3 || Exp > 4)
5577     return -1;
5578   Exp = ((Exp+3) & 0x7) ^ 4;
5579
5580   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5581 }
5582
5583 int ARM::getVFPf64Imm(const APFloat &FPImm) {
5584   APInt Imm = FPImm.bitcastToAPInt();
5585   uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
5586   int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
5587   uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
5588
5589   // We can handle 4 bits of mantissa.
5590   // mantissa = (16+UInt(e:f:g:h))/16.
5591   if (Mantissa & 0xffffffffffffLL)
5592     return -1;
5593   Mantissa >>= 48;
5594   if ((Mantissa & 0xf) != Mantissa)
5595     return -1;
5596
5597   // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
5598   if (Exp < -3 || Exp > 4)
5599     return -1;
5600   Exp = ((Exp+3) & 0x7) ^ 4;
5601
5602   return ((int)Sign << 7) | (Exp << 4) | Mantissa;
5603 }
5604
5605 bool ARM::isBitFieldInvertedMask(unsigned v) {
5606   if (v == 0xffffffff)
5607     return 0;
5608   // there can be 1's on either or both "outsides", all the "inside"
5609   // bits must be 0's
5610   unsigned int lsb = 0, msb = 31;
5611   while (v & (1 << msb)) --msb;
5612   while (v & (1 << lsb)) ++lsb;
5613   for (unsigned int i = lsb; i <= msb; ++i) {
5614     if (v & (1 << i))
5615       return 0;
5616   }
5617   return 1;
5618 }
5619
5620 /// isFPImmLegal - Returns true if the target can instruction select the
5621 /// specified FP immediate natively. If false, the legalizer will
5622 /// materialize the FP immediate as a load from a constant pool.
5623 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5624   if (!Subtarget->hasVFP3())
5625     return false;
5626   if (VT == MVT::f32)
5627     return ARM::getVFPf32Imm(Imm) != -1;
5628   if (VT == MVT::f64)
5629     return ARM::getVFPf64Imm(Imm) != -1;
5630   return false;
5631 }