lib/Target/ARM/ARMISelLowering.cpp

   1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the interfaces that ARM uses to lower LLVM code into a
  11 // selection DAG.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "ARM.h"
  16 #include "ARMAddressingModes.h"
  17 #include "ARMConstantPoolValue.h"
  18 #include "ARMISelLowering.h"
  19 #include "ARMMachineFunctionInfo.h"
  20 #include "ARMRegisterInfo.h"
  21 #include "ARMSubtarget.h"
  22 #include "ARMTargetMachine.h"
  23 #include "ARMTargetObjectFile.h"
  24 #include "llvm/CallingConv.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Instruction.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/GlobalValue.h"
  30 #include "llvm/CodeGen/CallingConvLower.h"
  31 #include "llvm/CodeGen/MachineBasicBlock.h"
  32 #include "llvm/CodeGen/MachineFrameInfo.h"
  33 #include "llvm/CodeGen/MachineFunction.h"
  34 #include "llvm/CodeGen/MachineInstrBuilder.h"
  35 #include "llvm/CodeGen/MachineRegisterInfo.h"
  36 #include "llvm/CodeGen/PseudoSourceValue.h"
  37 #include "llvm/CodeGen/SelectionDAG.h"
  38 #include "llvm/Target/TargetOptions.h"
  39 #include "llvm/ADT/VectorExtras.h"
  40 #include "llvm/Support/ErrorHandling.h"
  41 #include "llvm/Support/MathExtras.h"
  42 using namespace llvm;
  43
  44 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  45                                    CCValAssign::LocInfo &LocInfo,
  46                                    ISD::ArgFlagsTy &ArgFlags,
  47                                    CCState &State);
  48 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  49                                     CCValAssign::LocInfo &LocInfo,
  50                                     ISD::ArgFlagsTy &ArgFlags,
  51                                     CCState &State);
  52 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  53                                       CCValAssign::LocInfo &LocInfo,
  54                                       ISD::ArgFlagsTy &ArgFlags,
  55                                       CCState &State);
  56 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  57                                        CCValAssign::LocInfo &LocInfo,
  58                                        ISD::ArgFlagsTy &ArgFlags,
  59                                        CCState &State);
  60
  61 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
  62                                        MVT PromotedBitwiseVT) {
  63   if (VT != PromotedLdStVT) {
  64     setOperationAction(ISD::LOAD, VT, Promote);
  65     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
  66
  67     setOperationAction(ISD::STORE, VT, Promote);
  68     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
  69   }
  70
  71   MVT ElemTy = VT.getVectorElementType();
  72   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
  73     setOperationAction(ISD::VSETCC, VT, Custom);
  74   if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
  75     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
  76   setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
  77   setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
  78   setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
  79   setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
  80   if (VT.isInteger()) {
  81     setOperationAction(ISD::SHL, VT, Custom);
  82     setOperationAction(ISD::SRA, VT, Custom);
  83     setOperationAction(ISD::SRL, VT, Custom);
  84   }
  85
  86   // Promote all bit-wise operations.
  87   if (VT.isInteger() && VT != PromotedBitwiseVT) {
  88     setOperationAction(ISD::AND, VT, Promote);
  89     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
  90     setOperationAction(ISD::OR,  VT, Promote);
  91     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
  92     setOperationAction(ISD::XOR, VT, Promote);
  93     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
  94   }
  95 }
  96
  97 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
  98   addRegisterClass(VT, ARM::DPRRegisterClass);
  99   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 100 }
 101
 102 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
 103   addRegisterClass(VT, ARM::QPRRegisterClass);
 104   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 105 }
 106
 107 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 108   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
 109     return new TargetLoweringObjectFileMachO();
 110   return new ARMElfTargetObjectFile();
 111 }
 112
 113 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 114     : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) {
 115   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 116
 117   if (Subtarget->isTargetDarwin()) {
 118     // Uses VFP for Thumb libfuncs if available.
 119     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
 120       // Single-precision floating-point arithmetic.
 121       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
 122       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
 123       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
 124       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
 125
 126       // Double-precision floating-point arithmetic.
 127       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
 128       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
 129       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
 130       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
 131
 132       // Single-precision comparisons.
 133       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
 134       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
 135       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
 136       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
 137       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
 138       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
 139       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
 140       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
 141
 142       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
 143       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
 144       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
 145       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
 146       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
 147       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
 148       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
 149       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
 150
 151       // Double-precision comparisons.
 152       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
 153       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
 154       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
 155       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
 156       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
 157       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
 158       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
 159       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
 160
 161       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
 162       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
 163       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
 164       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
 165       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
 166       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
 167       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
 168       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
 169
 170       // Floating-point to integer conversions.
 171       // i64 conversions are done via library routines even when generating VFP
 172       // instructions, so use the same ones.
 173       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
 174       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
 175       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
 176       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
 177
 178       // Conversions between floating types.
 179       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
 180       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
 181
 182       // Integer to floating-point conversions.
 183       // i64 conversions are done via library routines even when generating VFP
 184       // instructions, so use the same ones.
 185       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
 186       // e.g., __floatunsidf vs. __floatunssidfvfp.
 187       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
 188       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
 189       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
 190       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
 191     }
 192   }
 193
 194   // These libcalls are not available in 32-bit.
 195   setLibcallName(RTLIB::SHL_I128, 0);
 196   setLibcallName(RTLIB::SRL_I128, 0);
 197   setLibcallName(RTLIB::SRA_I128, 0);
 198
 199   if (Subtarget->isThumb1Only())
 200     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
 201   else
 202     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
 203   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 204     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
 205     addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
 206
 207     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 208   }
 209
 210   if (Subtarget->hasNEON()) {
 211     addDRTypeForNEON(MVT::v2f32);
 212     addDRTypeForNEON(MVT::v8i8);
 213     addDRTypeForNEON(MVT::v4i16);
 214     addDRTypeForNEON(MVT::v2i32);
 215     addDRTypeForNEON(MVT::v1i64);
 216
 217     addQRTypeForNEON(MVT::v4f32);
 218     addQRTypeForNEON(MVT::v2f64);
 219     addQRTypeForNEON(MVT::v16i8);
 220     addQRTypeForNEON(MVT::v8i16);
 221     addQRTypeForNEON(MVT::v4i32);
 222     addQRTypeForNEON(MVT::v2i64);
 223
 224     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 225     setTargetDAGCombine(ISD::SHL);
 226     setTargetDAGCombine(ISD::SRL);
 227     setTargetDAGCombine(ISD::SRA);
 228     setTargetDAGCombine(ISD::SIGN_EXTEND);
 229     setTargetDAGCombine(ISD::ZERO_EXTEND);
 230     setTargetDAGCombine(ISD::ANY_EXTEND);
 231   }
 232
 233   computeRegisterProperties();
 234
 235   // ARM does not have f32 extending load.
 236   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 237
 238   // ARM does not have i1 sign extending load.
 239   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 240
 241   // ARM supports all 4 flavors of integer indexed load / store.
 242   if (!Subtarget->isThumb1Only()) {
 243     for (unsigned im = (unsigned)ISD::PRE_INC;
 244          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
 245       setIndexedLoadAction(im,  MVT::i1,  Legal);
 246       setIndexedLoadAction(im,  MVT::i8,  Legal);
 247       setIndexedLoadAction(im,  MVT::i16, Legal);
 248       setIndexedLoadAction(im,  MVT::i32, Legal);
 249       setIndexedStoreAction(im, MVT::i1,  Legal);
 250       setIndexedStoreAction(im, MVT::i8,  Legal);
 251       setIndexedStoreAction(im, MVT::i16, Legal);
 252       setIndexedStoreAction(im, MVT::i32, Legal);
 253     }
 254   }
 255
 256   // i64 operation support.
 257   if (Subtarget->isThumb1Only()) {
 258     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 259     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 260     setOperationAction(ISD::MULHS,   MVT::i32, Expand);
 261     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 262     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 263   } else {
 264     setOperationAction(ISD::MUL,     MVT::i64, Expand);
 265     setOperationAction(ISD::MULHU,   MVT::i32, Expand);
 266     if (!Subtarget->hasV6Ops())
 267       setOperationAction(ISD::MULHS, MVT::i32, Expand);
 268   }
 269   setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
 270   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
 271   setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
 272   setOperationAction(ISD::SRL,       MVT::i64, Custom);
 273   setOperationAction(ISD::SRA,       MVT::i64, Custom);
 274
 275   // ARM does not have ROTL.
 276   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
 277   setOperationAction(ISD::CTTZ,  MVT::i32, Expand);
 278   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
 279   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
 280     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 281
 282   // Only ARMv6 has BSWAP.
 283   if (!Subtarget->hasV6Ops())
 284     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
 285
 286   // These are expanded into libcalls.
 287   setOperationAction(ISD::SDIV,  MVT::i32, Expand);
 288   setOperationAction(ISD::UDIV,  MVT::i32, Expand);
 289   setOperationAction(ISD::SREM,  MVT::i32, Expand);
 290   setOperationAction(ISD::UREM,  MVT::i32, Expand);
 291   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 292   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 293
 294   // Support label based line numbers.
 295   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 296   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 297
 298   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
 299   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
 300   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
 301   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 302
 303   // Use the default implementation.
 304   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
 305   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
 306   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
 307   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
 308   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
 309   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 310   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
 311   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
 312
 313   if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
 314     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
 315     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
 316   }
 317   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 318
 319   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
 320     // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
 321     setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
 322
 323   // We want to custom lower some of our intrinsics.
 324   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 325   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 326   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 327
 328   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
 329   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
 330   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
 331   setOperationAction(ISD::SELECT,    MVT::i32, Expand);
 332   setOperationAction(ISD::SELECT,    MVT::f32, Expand);
 333   setOperationAction(ISD::SELECT,    MVT::f64, Expand);
 334   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 335   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 336   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 337
 338   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
 339   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
 340   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
 341   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
 342   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
 343
 344   // We don't support sin/cos/fmod/copysign/pow
 345   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
 346   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
 347   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
 348   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
 349   setOperationAction(ISD::FREM,      MVT::f64, Expand);
 350   setOperationAction(ISD::FREM,      MVT::f32, Expand);
 351   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 352     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
 353     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 354   }
 355   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
 356   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 357
 358   // int <-> fp are custom expanded into bit_convert + ARMISD ops.
 359   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
 360     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 361     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 362     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 363     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 364   }
 365
 366   // We have target-specific dag combine patterns for the following nodes:
 367   // ARMISD::FMRRD  - No need to call setTargetDAGCombine
 368   setTargetDAGCombine(ISD::ADD);
 369   setTargetDAGCombine(ISD::SUB);
 370
 371   setStackPointerRegisterToSaveRestore(ARM::SP);
 372   setSchedulingPreference(SchedulingForRegPressure);
 373   setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
 374   setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
 375
 376   if (!Subtarget->isThumb()) {
 377     // Use branch latency information to determine if-conversion limits.
 378     // FIXME: If-converter should use instruction latency of the branch being
 379     // eliminated to compute the threshold. For ARMv6, the branch "latency"
 380     // varies depending on whether it's dynamically or statically predicted
 381     // and on whether the destination is in the prefetch buffer.
 382     const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 383     const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
 384     unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
 385     if (Latency > 1) {
 386       setIfCvtBlockSizeLimit(Latency-1);
 387       if (Latency > 2)
 388         setIfCvtDupBlockSizeLimit(Latency-2);
 389     } else {
 390       setIfCvtBlockSizeLimit(10);
 391       setIfCvtDupBlockSizeLimit(2);
 392     }
 393   }
 394
 395   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
 396   // Do not enable CodePlacementOpt for now: it currently runs after the
 397   // ARMConstantIslandPass and messes up branch relaxation and placement
 398   // of constant islands.
 399   // benefitFromCodePlacementOpt = true;
 400 }
 401
 402 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 403   switch (Opcode) {
 404   default: return 0;
 405   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
 406   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
 407   case ARMISD::CALL:          return "ARMISD::CALL";
 408   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
 409   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
 410   case ARMISD::tCALL:         return "ARMISD::tCALL";
 411   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
 412   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
 413   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
 414   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
 415   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
 416   case ARMISD::CMP:           return "ARMISD::CMP";
 417   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
 418   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
 419   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
 420   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 421   case ARMISD::CMOV:          return "ARMISD::CMOV";
 422   case ARMISD::CNEG:          return "ARMISD::CNEG";
 423
 424   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
 425   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
 426   case ARMISD::SITOF:         return "ARMISD::SITOF";
 427   case ARMISD::UITOF:         return "ARMISD::UITOF";
 428
 429   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
 430   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
 431   case ARMISD::RRX:           return "ARMISD::RRX";
 432
 433   case ARMISD::FMRRD:         return "ARMISD::FMRRD";
 434   case ARMISD::FMDRR:         return "ARMISD::FMDRR";
 435
 436   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
 437
 438   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
 439   case ARMISD::VCGE:          return "ARMISD::VCGE";
 440   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
 441   case ARMISD::VCGT:          return "ARMISD::VCGT";
 442   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
 443   case ARMISD::VTST:          return "ARMISD::VTST";
 444
 445   case ARMISD::VSHL:          return "ARMISD::VSHL";
 446   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
 447   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
 448   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
 449   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
 450   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
 451   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
 452   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
 453   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
 454   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
 455   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
 456   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
 457   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
 458   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
 459   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
 460   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
 461   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
 462   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
 463   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
 464   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
 465   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
 466   case ARMISD::VDUPLANEQ:     return "ARMISD::VDUPLANEQ";
 467   case ARMISD::VLD2D:         return "ARMISD::VLD2D";
 468   case ARMISD::VLD3D:         return "ARMISD::VLD3D";
 469   case ARMISD::VLD4D:         return "ARMISD::VLD4D";
 470   case ARMISD::VST2D:         return "ARMISD::VST2D";
 471   case ARMISD::VST3D:         return "ARMISD::VST3D";
 472   case ARMISD::VST4D:         return "ARMISD::VST4D";
 473   }
 474 }
 475
 476 /// getFunctionAlignment - Return the Log2 alignment of this function.
 477 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
 478   return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
 479 }
 480
 481 //===----------------------------------------------------------------------===//
 482 // Lowering Code
 483 //===----------------------------------------------------------------------===//
 484
 485 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
 486 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
 487   switch (CC) {
 488   default: llvm_unreachable("Unknown condition code!");
 489   case ISD::SETNE:  return ARMCC::NE;
 490   case ISD::SETEQ:  return ARMCC::EQ;
 491   case ISD::SETGT:  return ARMCC::GT;
 492   case ISD::SETGE:  return ARMCC::GE;
 493   case ISD::SETLT:  return ARMCC::LT;
 494   case ISD::SETLE:  return ARMCC::LE;
 495   case ISD::SETUGT: return ARMCC::HI;
 496   case ISD::SETUGE: return ARMCC::HS;
 497   case ISD::SETULT: return ARMCC::LO;
 498   case ISD::SETULE: return ARMCC::LS;
 499   }
 500 }
 501
 502 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
 503 /// returns true if the operands should be inverted to form the proper
 504 /// comparison.
 505 static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 506                         ARMCC::CondCodes &CondCode2) {
 507   bool Invert = false;
 508   CondCode2 = ARMCC::AL;
 509   switch (CC) {
 510   default: llvm_unreachable("Unknown FP condition!");
 511   case ISD::SETEQ:
 512   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
 513   case ISD::SETGT:
 514   case ISD::SETOGT: CondCode = ARMCC::GT; break;
 515   case ISD::SETGE:
 516   case ISD::SETOGE: CondCode = ARMCC::GE; break;
 517   case ISD::SETOLT: CondCode = ARMCC::MI; break;
 518   case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
 519   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
 520   case ISD::SETO:   CondCode = ARMCC::VC; break;
 521   case ISD::SETUO:  CondCode = ARMCC::VS; break;
 522   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
 523   case ISD::SETUGT: CondCode = ARMCC::HI; break;
 524   case ISD::SETUGE: CondCode = ARMCC::PL; break;
 525   case ISD::SETLT:
 526   case ISD::SETULT: CondCode = ARMCC::LT; break;
 527   case ISD::SETLE:
 528   case ISD::SETULE: CondCode = ARMCC::LE; break;
 529   case ISD::SETNE:
 530   case ISD::SETUNE: CondCode = ARMCC::NE; break;
 531   }
 532   return Invert;
 533 }
 534
 535 //===----------------------------------------------------------------------===//
 536 //                      Calling Convention Implementation
 537 //===----------------------------------------------------------------------===//
 538
 539 #include "ARMGenCallingConv.inc"
 540
 541 // APCS f64 is in register pairs, possibly split to stack
 542 static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 543                           CCValAssign::LocInfo &LocInfo,
 544                           CCState &State, bool CanFail) {
 545   static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 546
 547   // Try to get the first register.
 548   if (unsigned Reg = State.AllocateReg(RegList, 4))
 549     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 550   else {
 551     // For the 2nd half of a v2f64, do not fail.
 552     if (CanFail)
 553       return false;
 554
 555     // Put the whole thing on the stack.
 556     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 557                                            State.AllocateStack(8, 4),
 558                                            LocVT, LocInfo));
 559     return true;
 560   }
 561
 562   // Try to get the second register.
 563   if (unsigned Reg = State.AllocateReg(RegList, 4))
 564     State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 565   else
 566     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 567                                            State.AllocateStack(4, 4),
 568                                            LocVT, LocInfo));
 569   return true;
 570 }
 571
 572 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 573                                    CCValAssign::LocInfo &LocInfo,
 574                                    ISD::ArgFlagsTy &ArgFlags,
 575                                    CCState &State) {
 576   if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 577     return false;
 578   if (LocVT == MVT::v2f64 &&
 579       !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 580     return false;
 581   return true;  // we handled it
 582 }
 583
 584 // AAPCS f64 is in aligned register pairs
 585 static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 586                            CCValAssign::LocInfo &LocInfo,
 587                            CCState &State, bool CanFail) {
 588   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 589   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 590
 591   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
 592   if (Reg == 0) {
 593     // For the 2nd half of a v2f64, do not just fail.
 594     if (CanFail)
 595       return false;
 596
 597     // Put the whole thing on the stack.
 598     State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
 599                                            State.AllocateStack(8, 8),
 600                                            LocVT, LocInfo));
 601     return true;
 602   }
 603
 604   unsigned i;
 605   for (i = 0; i < 2; ++i)
 606     if (HiRegList[i] == Reg)
 607       break;
 608
 609   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 610   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 611                                          LocVT, LocInfo));
 612   return true;
 613 }
 614
 615 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 616                                     CCValAssign::LocInfo &LocInfo,
 617                                     ISD::ArgFlagsTy &ArgFlags,
 618                                     CCState &State) {
 619   if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
 620     return false;
 621   if (LocVT == MVT::v2f64 &&
 622       !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
 623     return false;
 624   return true;  // we handled it
 625 }
 626
 627 static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 628                          CCValAssign::LocInfo &LocInfo, CCState &State) {
 629   static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
 630   static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
 631
 632   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
 633   if (Reg == 0)
 634     return false; // we didn't handle it
 635
 636   unsigned i;
 637   for (i = 0; i < 2; ++i)
 638     if (HiRegList[i] == Reg)
 639       break;
 640
 641   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
 642   State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
 643                                          LocVT, LocInfo));
 644   return true;
 645 }
 646
 647 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 648                                       CCValAssign::LocInfo &LocInfo,
 649                                       ISD::ArgFlagsTy &ArgFlags,
 650                                       CCState &State) {
 651   if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 652     return false;
 653   if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
 654     return false;
 655   return true;  // we handled it
 656 }
 657
 658 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 659                                        CCValAssign::LocInfo &LocInfo,
 660                                        ISD::ArgFlagsTy &ArgFlags,
 661                                        CCState &State) {
 662   return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
 663                                    State);
 664 }
 665
 666 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
 667 /// given CallingConvention value.
 668 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
 669                                                  bool Return,
 670                                                  bool isVarArg) const {
 671   switch (CC) {
 672   default:
 673     llvm_unreachable("Unsupported calling convention");
 674   case CallingConv::C:
 675   case CallingConv::Fast:
 676     // Use target triple & subtarget features to do actual dispatch.
 677     if (Subtarget->isAAPCS_ABI()) {
 678       if (Subtarget->hasVFP2() &&
 679           FloatABIType == FloatABI::Hard && !isVarArg)
 680         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 681       else
 682         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 683     } else
 684         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 685   case CallingConv::ARM_AAPCS_VFP:
 686     return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
 687   case CallingConv::ARM_AAPCS:
 688     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
 689   case CallingConv::ARM_APCS:
 690     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
 691   }
 692 }
 693
 694 /// LowerCallResult - Lower the result values of a call into the
 695 /// appropriate copies out of appropriate physical registers.
 696 SDValue
 697 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 698                                    unsigned CallConv, bool isVarArg,
 699                                    const SmallVectorImpl<ISD::InputArg> &Ins,
 700                                    DebugLoc dl, SelectionDAG &DAG,
 701                                    SmallVectorImpl<SDValue> &InVals) {
 702
 703   // Assign locations to each value returned by this call.
 704   SmallVector<CCValAssign, 16> RVLocs;
 705   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
 706                  RVLocs, *DAG.getContext());
 707   CCInfo.AnalyzeCallResult(Ins,
 708                            CCAssignFnForNode(CallConv, /* Return*/ true,
 709                                              isVarArg));
 710
 711   // Copy all of the result registers out of their specified physreg.
 712   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 713     CCValAssign VA = RVLocs[i];
 714
 715     SDValue Val;
 716     if (VA.needsCustom()) {
 717       // Handle f64 or half of a v2f64.
 718       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
 719                                       InFlag);
 720       Chain = Lo.getValue(1);
 721       InFlag = Lo.getValue(2);
 722       VA = RVLocs[++i]; // skip ahead to next loc
 723       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
 724                                       InFlag);
 725       Chain = Hi.getValue(1);
 726       InFlag = Hi.getValue(2);
 727       Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
 728
 729       if (VA.getLocVT() == MVT::v2f64) {
 730         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
 731         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
 732                           DAG.getConstant(0, MVT::i32));
 733
 734         VA = RVLocs[++i]; // skip ahead to next loc
 735         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
 736         Chain = Lo.getValue(1);
 737         InFlag = Lo.getValue(2);
 738         VA = RVLocs[++i]; // skip ahead to next loc
 739         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
 740         Chain = Hi.getValue(1);
 741         InFlag = Hi.getValue(2);
 742         Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
 743         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
 744                           DAG.getConstant(1, MVT::i32));
 745       }
 746     } else {
 747       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
 748                                InFlag);
 749       Chain = Val.getValue(1);
 750       InFlag = Val.getValue(2);
 751     }
 752
 753     switch (VA.getLocInfo()) {
 754     default: llvm_unreachable("Unknown loc info!");
 755     case CCValAssign::Full: break;
 756     case CCValAssign::BCvt:
 757       Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
 758       break;
 759     }
 760
 761     InVals.push_back(Val);
 762   }
 763
 764   return Chain;
 765 }
 766
 767 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 768 /// by "Src" to address "Dst" of size "Size".  Alignment information is
 769 /// specified by the specific parameter attribute.  The copy will be passed as
 770 /// a byval function parameter.
 771 /// Sometimes what we are copying is the end of a larger object, the part that
 772 /// does not fit in registers.
 773 static SDValue
 774 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 775                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
 776                           DebugLoc dl) {
 777   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
 778   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
 779                        /*AlwaysInline=*/false, NULL, 0, NULL, 0);
 780 }
 781
 782 /// LowerMemOpCallTo - Store the argument to the stack.
 783 SDValue
 784 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
 785                                     SDValue StackPtr, SDValue Arg,
 786                                     DebugLoc dl, SelectionDAG &DAG,
 787                                     const CCValAssign &VA,
 788                                     ISD::ArgFlagsTy Flags) {
 789   unsigned LocMemOffset = VA.getLocMemOffset();
 790   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
 791   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 792   if (Flags.isByVal()) {
 793     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
 794   }
 795   return DAG.getStore(Chain, dl, Arg, PtrOff,
 796                       PseudoSourceValue::getStack(), LocMemOffset);
 797 }
 798
 799 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
 800                                          SDValue Chain, SDValue &Arg,
 801                                          RegsToPassVector &RegsToPass,
 802                                          CCValAssign &VA, CCValAssign &NextVA,
 803                                          SDValue &StackPtr,
 804                                          SmallVector<SDValue, 8> &MemOpChains,
 805                                          ISD::ArgFlagsTy Flags) {
 806
 807   SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
 808                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
 809   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
 810
 811   if (NextVA.isRegLoc())
 812     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
 813   else {
 814     assert(NextVA.isMemLoc());
 815     if (StackPtr.getNode() == 0)
 816       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 817
 818     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
 819                                            dl, DAG, NextVA,
 820                                            Flags));
 821   }
 822 }
 823
 824 /// LowerCall - Lowering a call into a callseq_start <-
 825 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
 826 /// nodes.
 827 SDValue
 828 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 829                              unsigned CallConv, bool isVarArg,
 830                              bool isTailCall,
 831                              const SmallVectorImpl<ISD::OutputArg> &Outs,
 832                              const SmallVectorImpl<ISD::InputArg> &Ins,
 833                              DebugLoc dl, SelectionDAG &DAG,
 834                              SmallVectorImpl<SDValue> &InVals) {
 835
 836   // Analyze operands of the call, assigning locations to each operand.
 837   SmallVector<CCValAssign, 16> ArgLocs;
 838   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
 839                  *DAG.getContext());
 840   CCInfo.AnalyzeCallOperands(Outs,
 841                              CCAssignFnForNode(CallConv, /* Return*/ false,
 842                                                isVarArg));
 843
 844   // Get a count of how many bytes are to be pushed on the stack.
 845   unsigned NumBytes = CCInfo.getNextStackOffset();
 846
 847   // Adjust the stack pointer for the new arguments...
 848   // These operations are automatically eliminated by the prolog/epilog pass
 849   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 850
 851   SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
 852
 853   RegsToPassVector RegsToPass;
 854   SmallVector<SDValue, 8> MemOpChains;
 855
 856   // Walk the register/memloc assignments, inserting copies/loads.  In the case
 857   // of tail call optimization, arguments are handled later.
 858   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
 859        i != e;
 860        ++i, ++realArgIdx) {
 861     CCValAssign &VA = ArgLocs[i];
 862     SDValue Arg = Outs[realArgIdx].Val;
 863     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
 864
 865     // Promote the value if needed.
 866     switch (VA.getLocInfo()) {
 867     default: llvm_unreachable("Unknown loc info!");
 868     case CCValAssign::Full: break;
 869     case CCValAssign::SExt:
 870       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
 871       break;
 872     case CCValAssign::ZExt:
 873       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
 874       break;
 875     case CCValAssign::AExt:
 876       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
 877       break;
 878     case CCValAssign::BCvt:
 879       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
 880       break;
 881     }
 882
 883     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
 884     if (VA.needsCustom()) {
 885       if (VA.getLocVT() == MVT::v2f64) {
 886         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
 887                                   DAG.getConstant(0, MVT::i32));
 888         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
 889                                   DAG.getConstant(1, MVT::i32));
 890
 891         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
 892                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
 893
 894         VA = ArgLocs[++i]; // skip ahead to next loc
 895         if (VA.isRegLoc()) {
 896           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
 897                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
 898         } else {
 899           assert(VA.isMemLoc());
 900           if (StackPtr.getNode() == 0)
 901             StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 902
 903           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
 904                                                  dl, DAG, VA, Flags));
 905         }
 906       } else {
 907         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
 908                          StackPtr, MemOpChains, Flags);
 909       }
 910     } else if (VA.isRegLoc()) {
 911       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 912     } else {
 913       assert(VA.isMemLoc());
 914       if (StackPtr.getNode() == 0)
 915         StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 916
 917       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
 918                                              dl, DAG, VA, Flags));
 919     }
 920   }
 921
 922   if (!MemOpChains.empty())
 923     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
 924                         &MemOpChains[0], MemOpChains.size());
 925
 926   // Build a sequence of copy-to-reg nodes chained together with token chain
 927   // and flag operands which copy the outgoing args into the appropriate regs.
 928   SDValue InFlag;
 929   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 930     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
 931                              RegsToPass[i].second, InFlag);
 932     InFlag = Chain.getValue(1);
 933   }
 934
 935   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
 936   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
 937   // node so that legalize doesn't hack it.
 938   bool isDirect = false;
 939   bool isARMFunc = false;
 940   bool isLocalARMFunc = false;
 941   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
 942     GlobalValue *GV = G->getGlobal();
 943     isDirect = true;
 944     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
 945     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
 946                    getTargetMachine().getRelocationModel() != Reloc::Static;
 947     isARMFunc = !Subtarget->isThumb() || isStub;
 948     // ARM call to a local ARM function is predicable.
 949     isLocalARMFunc = !Subtarget->isThumb() && !isExt;
 950     // tBX takes a register source operand.
 951     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
 952       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
 953                                                            ARMCP::CPStub, 4);
 954       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
 955       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 956       Callee = DAG.getLoad(getPointerTy(), dl,
 957                            DAG.getEntryNode(), CPAddr, NULL, 0);
 958       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
 959       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
 960                            getPointerTy(), Callee, PICLabel);
 961    } else
 962       Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
 963   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
 964     isDirect = true;
 965     bool isStub = Subtarget->isTargetDarwin() &&
 966                   getTargetMachine().getRelocationModel() != Reloc::Static;
 967     isARMFunc = !Subtarget->isThumb() || isStub;
 968     // tBX takes a register source operand.
 969     const char *Sym = S->getSymbol();
 970     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
 971       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
 972                                                            ARMCP::CPStub, 4);
 973       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
 974       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 975       Callee = DAG.getLoad(getPointerTy(), dl,
 976                            DAG.getEntryNode(), CPAddr, NULL, 0);
 977       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
 978       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
 979                            getPointerTy(), Callee, PICLabel);
 980     } else
 981       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
 982   }
 983
 984   // FIXME: handle tail calls differently.
 985   unsigned CallOpc;
 986   if (Subtarget->isThumb()) {
 987     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
 988       CallOpc = ARMISD::CALL_NOLINK;
 989     else
 990       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
 991   } else {
 992     CallOpc = (isDirect || Subtarget->hasV5TOps())
 993       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
 994       : ARMISD::CALL_NOLINK;
 995   }
 996   if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
 997     // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
 998     Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
 999     InFlag = Chain.getValue(1);
1000   }
1001
1002   std::vector<SDValue> Ops;
1003   Ops.push_back(Chain);
1004   Ops.push_back(Callee);
1005
1006   // Add argument registers to the end of the list so that they are known live
1007   // into the call.
1008   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1009     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1010                                   RegsToPass[i].second.getValueType()));
1011
1012   if (InFlag.getNode())
1013     Ops.push_back(InFlag);
1014   // Returns a chain and a flag for retval copy to use.
1015   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1016                       &Ops[0], Ops.size());
1017   InFlag = Chain.getValue(1);
1018
1019   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1020                              DAG.getIntPtrConstant(0, true), InFlag);
1021   if (!Ins.empty())
1022     InFlag = Chain.getValue(1);
1023
1024   // Handle result values, copying them out of physregs into vregs that we
1025   // return.
1026   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1027                          dl, DAG, InVals);
1028 }
1029
1030 SDValue
1031 ARMTargetLowering::LowerReturn(SDValue Chain,
1032                                unsigned CallConv, bool isVarArg,
1033                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1034                                DebugLoc dl, SelectionDAG &DAG) {
1035
1036   // CCValAssign - represent the assignment of the return value to a location.
1037   SmallVector<CCValAssign, 16> RVLocs;
1038
1039   // CCState - Info about the registers and stack slots.
1040   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1041                  *DAG.getContext());
1042
1043   // Analyze outgoing return values.
1044   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1045                                                isVarArg));
1046
1047   // If this is the first return lowered for this function, add
1048   // the regs to the liveout set for the function.
1049   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1050     for (unsigned i = 0; i != RVLocs.size(); ++i)
1051       if (RVLocs[i].isRegLoc())
1052         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1053   }
1054
1055   SDValue Flag;
1056
1057   // Copy the result values into the output registers.
1058   for (unsigned i = 0, realRVLocIdx = 0;
1059        i != RVLocs.size();
1060        ++i, ++realRVLocIdx) {
1061     CCValAssign &VA = RVLocs[i];
1062     assert(VA.isRegLoc() && "Can only return in registers!");
1063
1064     SDValue Arg = Outs[realRVLocIdx].Val;
1065
1066     switch (VA.getLocInfo()) {
1067     default: llvm_unreachable("Unknown loc info!");
1068     case CCValAssign::Full: break;
1069     case CCValAssign::BCvt:
1070       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1071       break;
1072     }
1073
1074     if (VA.needsCustom()) {
1075       if (VA.getLocVT() == MVT::v2f64) {
1076         // Extract the first half and return it in two registers.
1077         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1078                                    DAG.getConstant(0, MVT::i32));
1079         SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl,
1080                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
1081
1082         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1083         Flag = Chain.getValue(1);
1084         VA = RVLocs[++i]; // skip ahead to next loc
1085         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1086                                  HalfGPRs.getValue(1), Flag);
1087         Flag = Chain.getValue(1);
1088         VA = RVLocs[++i]; // skip ahead to next loc
1089
1090         // Extract the 2nd half and fall through to handle it as an f64 value.
1091         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1092                           DAG.getConstant(1, MVT::i32));
1093       }
1094       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
1095       // available.
1096       SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
1097                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1098       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1099       Flag = Chain.getValue(1);
1100       VA = RVLocs[++i]; // skip ahead to next loc
1101       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1102                                Flag);
1103     } else
1104       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1105
1106     // Guarantee that all emitted copies are
1107     // stuck together, avoiding something bad.
1108     Flag = Chain.getValue(1);
1109   }
1110
1111   SDValue result;
1112   if (Flag.getNode())
1113     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1114   else // Return Void
1115     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1116
1117   return result;
1118 }
1119
1120 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1121 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1122 // one of the above mentioned nodes. It has to be wrapped because otherwise
1123 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1124 // be used to form addressing mode. These wrapped nodes will be selected
1125 // into MOVi.
1126 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1127   MVT PtrVT = Op.getValueType();
1128   // FIXME there is no actual debug info here
1129   DebugLoc dl = Op.getDebugLoc();
1130   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1131   SDValue Res;
1132   if (CP->isMachineConstantPoolEntry())
1133     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1134                                     CP->getAlignment());
1135   else
1136     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1137                                     CP->getAlignment());
1138   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1139 }
1140
1141 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1142 SDValue
1143 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1144                                                  SelectionDAG &DAG) {
1145   DebugLoc dl = GA->getDebugLoc();
1146   MVT PtrVT = getPointerTy();
1147   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1148   ARMConstantPoolValue *CPV =
1149     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
1150                              PCAdj, "tlsgd", true);
1151   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1152   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1153   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
1154   SDValue Chain = Argument.getValue(1);
1155
1156   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
1157   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1158
1159   // call __tls_get_addr.
1160   ArgListTy Args;
1161   ArgListEntry Entry;
1162   Entry.Node = Argument;
1163   Entry.Ty = (const Type *) Type::Int32Ty;
1164   Args.push_back(Entry);
1165   // FIXME: is there useful debug info available here?
1166   std::pair<SDValue, SDValue> CallResult =
1167     LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
1168                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1169                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1170   return CallResult.first;
1171 }
1172
1173 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1174 // "local exec" model.
1175 SDValue
1176 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1177                                         SelectionDAG &DAG) {
1178   GlobalValue *GV = GA->getGlobal();
1179   DebugLoc dl = GA->getDebugLoc();
1180   SDValue Offset;
1181   SDValue Chain = DAG.getEntryNode();
1182   MVT PtrVT = getPointerTy();
1183   // Get the Thread Pointer
1184   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1185
1186   if (GV->isDeclaration()) {
1187     // initial exec model
1188     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1189     ARMConstantPoolValue *CPV =
1190       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
1191                                PCAdj, "gottpoff", true);
1192     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1193     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1194     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
1195     Chain = Offset.getValue(1);
1196
1197     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
1198     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1199
1200     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
1201   } else {
1202     // local exec model
1203     ARMConstantPoolValue *CPV =
1204       new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
1205     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1206     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1207     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
1208   }
1209
1210   // The address of the thread local variable is the add of the thread
1211   // pointer with the offset of the variable.
1212   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1213 }
1214
1215 SDValue
1216 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
1217   // TODO: implement the "local dynamic" model
1218   assert(Subtarget->isTargetELF() &&
1219          "TLS not implemented for non-ELF targets");
1220   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1221   // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1222   // otherwise use the "Local Exec" TLS Model
1223   if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1224     return LowerToTLSGeneralDynamicModel(GA, DAG);
1225   else
1226     return LowerToTLSExecModels(GA, DAG);
1227 }
1228
1229 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1230                                                  SelectionDAG &DAG) {
1231   MVT PtrVT = getPointerTy();
1232   DebugLoc dl = Op.getDebugLoc();
1233   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1234   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1235   if (RelocM == Reloc::PIC_) {
1236     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1237     ARMConstantPoolValue *CPV =
1238       new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
1239     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1240     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1241     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1242                                  CPAddr, NULL, 0);
1243     SDValue Chain = Result.getValue(1);
1244     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1245     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1246     if (!UseGOTOFF)
1247       Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
1248     return Result;
1249   } else {
1250     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1251     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1252     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
1253   }
1254 }
1255
1256 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
1257 /// even in non-static mode.
1258 static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
1259   // If symbol visibility is hidden, the extra load is not needed if
1260   // the symbol is definitely defined in the current translation unit.
1261   bool isDecl = GV->isDeclaration() || GV->hasAvailableExternallyLinkage();
1262   if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
1263     return false;
1264   return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
1265 }
1266
1267 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1268                                                     SelectionDAG &DAG) {
1269   MVT PtrVT = getPointerTy();
1270   DebugLoc dl = Op.getDebugLoc();
1271   GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1272   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1273   bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
1274   SDValue CPAddr;
1275   if (RelocM == Reloc::Static)
1276     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1277   else {
1278     unsigned PCAdj = (RelocM != Reloc::PIC_)
1279       ? 0 : (Subtarget->isThumb() ? 4 : 8);
1280     ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
1281       : ARMCP::CPValue;
1282     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
1283                                                          Kind, PCAdj);
1284     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1285   }
1286   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1287
1288   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
1289   SDValue Chain = Result.getValue(1);
1290
1291   if (RelocM == Reloc::PIC_) {
1292     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
1293     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1294   }
1295   if (IsIndirect)
1296     Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
1297
1298   return Result;
1299 }
1300
1301 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1302                                                     SelectionDAG &DAG){
1303   assert(Subtarget->isTargetELF() &&
1304          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1305   MVT PtrVT = getPointerTy();
1306   DebugLoc dl = Op.getDebugLoc();
1307   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1308   ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
1309                                                        ARMPCLabelIndex,
1310                                                        ARMCP::CPValue, PCAdj);
1311   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1312   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1313   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
1314   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
1315   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1316 }
1317
1318 static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
1319                                      unsigned Opcode) {
1320   SDNode *Node = Op.getNode();
1321   MVT VT = Node->getValueType(0);
1322   DebugLoc dl = Op.getDebugLoc();
1323
1324   if (!VT.is64BitVector())
1325     return SDValue(); // unimplemented
1326
1327   SDValue Ops[] = { Node->getOperand(0),
1328                     Node->getOperand(2) };
1329   return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2);
1330 }
1331
1332 static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG,
1333                                      unsigned Opcode, unsigned NumVecs) {
1334   SDNode *Node = Op.getNode();
1335   MVT VT = Node->getOperand(3).getValueType();
1336   DebugLoc dl = Op.getDebugLoc();
1337
1338   if (!VT.is64BitVector())
1339     return SDValue(); // unimplemented
1340
1341   SmallVector<SDValue, 6> Ops;
1342   Ops.push_back(Node->getOperand(0));
1343   Ops.push_back(Node->getOperand(2));
1344   for (unsigned N = 0; N < NumVecs; ++N)
1345     Ops.push_back(Node->getOperand(N + 3));
1346   return DAG.getNode(Opcode, dl, MVT::Other, Ops.data(), Ops.size());
1347 }
1348
1349 SDValue
1350 ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
1351   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1352   switch (IntNo) {
1353   case Intrinsic::arm_neon_vld2i:
1354   case Intrinsic::arm_neon_vld2f:
1355     return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D);
1356   case Intrinsic::arm_neon_vld3i:
1357   case Intrinsic::arm_neon_vld3f:
1358     return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D);
1359   case Intrinsic::arm_neon_vld4i:
1360   case Intrinsic::arm_neon_vld4f:
1361     return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
1362   case Intrinsic::arm_neon_vst2i:
1363   case Intrinsic::arm_neon_vst2f:
1364     return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2);
1365   case Intrinsic::arm_neon_vst3i:
1366   case Intrinsic::arm_neon_vst3f:
1367     return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3);
1368   case Intrinsic::arm_neon_vst4i:
1369   case Intrinsic::arm_neon_vst4f:
1370     return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4);
1371   default: return SDValue();    // Don't custom lower most intrinsics.
1372   }
1373 }
1374
1375 SDValue
1376 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
1377   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1378   DebugLoc dl = Op.getDebugLoc();
1379   switch (IntNo) {
1380   default: return SDValue();    // Don't custom lower most intrinsics.
1381   case Intrinsic::arm_thread_pointer: {
1382     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1383     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1384   }
1385   case Intrinsic::eh_sjlj_setjmp:
1386     return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1));
1387   }
1388 }
1389
1390 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
1391                             unsigned VarArgsFrameIndex) {
1392   // vastart just stores the address of the VarArgsFrameIndex slot into the
1393   // memory location argument.
1394   DebugLoc dl = Op.getDebugLoc();
1395   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1396   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1397   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1398   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
1399 }
1400
1401 SDValue
1402 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
1403                                         SDValue &Root, SelectionDAG &DAG,
1404                                         DebugLoc dl) {
1405   MachineFunction &MF = DAG.getMachineFunction();
1406   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1407
1408   TargetRegisterClass *RC;
1409   if (AFI->isThumb1OnlyFunction())
1410     RC = ARM::tGPRRegisterClass;
1411   else
1412     RC = ARM::GPRRegisterClass;
1413
1414   // Transform the arguments stored in physical registers into virtual ones.
1415   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1416   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1417
1418   SDValue ArgValue2;
1419   if (NextVA.isMemLoc()) {
1420     unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
1421     MachineFrameInfo *MFI = MF.getFrameInfo();
1422     int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset());
1423
1424     // Create load node to retrieve arguments from the stack.
1425     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1426     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
1427   } else {
1428     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1429     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1430   }
1431
1432   return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2);
1433 }
1434
1435 SDValue
1436 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
1437                                         unsigned CallConv, bool isVarArg,
1438                                         const SmallVectorImpl<ISD::InputArg>
1439                                           &Ins,
1440                                         DebugLoc dl, SelectionDAG &DAG,
1441                                         SmallVectorImpl<SDValue> &InVals) {
1442
1443   MachineFunction &MF = DAG.getMachineFunction();
1444   MachineFrameInfo *MFI = MF.getFrameInfo();
1445
1446   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447
1448   // Assign locations to all of the incoming arguments.
1449   SmallVector<CCValAssign, 16> ArgLocs;
1450   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1451                  *DAG.getContext());
1452   CCInfo.AnalyzeFormalArguments(Ins,
1453                                 CCAssignFnForNode(CallConv, /* Return*/ false,
1454                                                   isVarArg));
1455
1456   SmallVector<SDValue, 16> ArgValues;
1457
1458   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1459     CCValAssign &VA = ArgLocs[i];
1460
1461     // Arguments stored in registers.
1462     if (VA.isRegLoc()) {
1463       MVT RegVT = VA.getLocVT();
1464
1465       SDValue ArgValue;
1466       if (VA.needsCustom()) {
1467         // f64 and vector types are split up into multiple registers or
1468         // combinations of registers and stack slots.
1469         RegVT = MVT::i32;
1470
1471         if (VA.getLocVT() == MVT::v2f64) {
1472           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
1473                                                    Chain, DAG, dl);
1474           VA = ArgLocs[++i]; // skip ahead to next loc
1475           SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
1476                                                    Chain, DAG, dl);
1477           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1478           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1479                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
1480           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1481                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
1482         } else
1483           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
1484
1485       } else {
1486         TargetRegisterClass *RC;
1487
1488         if (RegVT == MVT::f32)
1489           RC = ARM::SPRRegisterClass;
1490         else if (RegVT == MVT::f64)
1491           RC = ARM::DPRRegisterClass;
1492         else if (RegVT == MVT::v2f64)
1493           RC = ARM::QPRRegisterClass;
1494         else if (RegVT == MVT::i32)
1495           RC = (AFI->isThumb1OnlyFunction() ?
1496                 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
1497         else
1498           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
1499
1500         // Transform the arguments in physical registers into virtual ones.
1501         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1502         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1503       }
1504
1505       // If this is an 8 or 16-bit value, it is really passed promoted
1506       // to 32 bits.  Insert an assert[sz]ext to capture this, then
1507       // truncate to the right size.
1508       switch (VA.getLocInfo()) {
1509       default: llvm_unreachable("Unknown loc info!");
1510       case CCValAssign::Full: break;
1511       case CCValAssign::BCvt:
1512         ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
1513         break;
1514       case CCValAssign::SExt:
1515         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1516                                DAG.getValueType(VA.getValVT()));
1517         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1518         break;
1519       case CCValAssign::ZExt:
1520         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1521                                DAG.getValueType(VA.getValVT()));
1522         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1523         break;
1524       }
1525
1526       InVals.push_back(ArgValue);
1527
1528     } else { // VA.isRegLoc()
1529
1530       // sanity check
1531       assert(VA.isMemLoc());
1532       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
1533
1534       unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
1535       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
1536
1537       // Create load nodes to retrieve arguments from the stack.
1538       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1539       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
1540     }
1541   }
1542
1543   // varargs
1544   if (isVarArg) {
1545     static const unsigned GPRArgRegs[] = {
1546       ARM::R0, ARM::R1, ARM::R2, ARM::R3
1547     };
1548
1549     unsigned NumGPRs = CCInfo.getFirstUnallocated
1550       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
1551
1552     unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
1553     unsigned VARegSize = (4 - NumGPRs) * 4;
1554     unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
1555     unsigned ArgOffset = 0;
1556     if (VARegSaveSize) {
1557       // If this function is vararg, store any remaining integer argument regs
1558       // to their spots on the stack so that they may be loaded by deferencing
1559       // the result of va_next.
1560       AFI->setVarArgsRegSaveSize(VARegSaveSize);
1561       ArgOffset = CCInfo.getNextStackOffset();
1562       VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
1563                                                  VARegSaveSize - VARegSize);
1564       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
1565
1566       SmallVector<SDValue, 4> MemOps;
1567       for (; NumGPRs < 4; ++NumGPRs) {
1568         TargetRegisterClass *RC;
1569         if (AFI->isThumb1OnlyFunction())
1570           RC = ARM::tGPRRegisterClass;
1571         else
1572           RC = ARM::GPRRegisterClass;
1573
1574         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
1575         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
1576         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1577         MemOps.push_back(Store);
1578         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
1579                           DAG.getConstant(4, getPointerTy()));
1580       }
1581       if (!MemOps.empty())
1582         Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1583                             &MemOps[0], MemOps.size());
1584     } else
1585       // This will point to the next argument passed via stack.
1586       VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
1587   }
1588
1589   return Chain;
1590 }
1591
1592 /// isFloatingPointZero - Return true if this is +0.0.
1593 static bool isFloatingPointZero(SDValue Op) {
1594   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1595     return CFP->getValueAPF().isPosZero();
1596   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1597     // Maybe this has already been legalized into the constant pool?
1598     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
1599       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
1600       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
1601         if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1602           return CFP->getValueAPF().isPosZero();
1603     }
1604   }
1605   return false;
1606 }
1607
1608 static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) {
1609   return ( isThumb1Only && (C & ~255U) == 0) ||
1610          (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1);
1611 }
1612
1613 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
1614 /// the given operands.
1615 static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1616                          SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only,
1617                          DebugLoc dl) {
1618   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1619     unsigned C = RHSC->getZExtValue();
1620     if (!isLegalCmpImmediate(C, isThumb1Only)) {
1621       // Constant does not fit, try adjusting it by one?
1622       switch (CC) {
1623       default: break;
1624       case ISD::SETLT:
1625       case ISD::SETGE:
1626         if (isLegalCmpImmediate(C-1, isThumb1Only)) {
1627           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1628           RHS = DAG.getConstant(C-1, MVT::i32);
1629         }
1630         break;
1631       case ISD::SETULT:
1632       case ISD::SETUGE:
1633         if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) {
1634           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1635           RHS = DAG.getConstant(C-1, MVT::i32);
1636         }
1637         break;
1638       case ISD::SETLE:
1639       case ISD::SETGT:
1640         if (isLegalCmpImmediate(C+1, isThumb1Only)) {
1641           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1642           RHS = DAG.getConstant(C+1, MVT::i32);
1643         }
1644         break;
1645       case ISD::SETULE:
1646       case ISD::SETUGT:
1647         if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) {
1648           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1649           RHS = DAG.getConstant(C+1, MVT::i32);
1650         }
1651         break;
1652       }
1653     }
1654   }
1655
1656   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
1657   ARMISD::NodeType CompareType;
1658   switch (CondCode) {
1659   default:
1660     CompareType = ARMISD::CMP;
1661     break;
1662   case ARMCC::EQ:
1663   case ARMCC::NE:
1664     // Uses only Z Flag
1665     CompareType = ARMISD::CMPZ;
1666     break;
1667   }
1668   ARMCC = DAG.getConstant(CondCode, MVT::i32);
1669   return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
1670 }
1671
1672 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
1673 static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
1674                          DebugLoc dl) {
1675   SDValue Cmp;
1676   if (!isFloatingPointZero(RHS))
1677     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
1678   else
1679     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
1680   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
1681 }
1682
1683 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
1684                               const ARMSubtarget *ST) {
1685   MVT VT = Op.getValueType();
1686   SDValue LHS = Op.getOperand(0);
1687   SDValue RHS = Op.getOperand(1);
1688   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
1689   SDValue TrueVal = Op.getOperand(2);
1690   SDValue FalseVal = Op.getOperand(3);
1691   DebugLoc dl = Op.getDebugLoc();
1692
1693   if (LHS.getValueType() == MVT::i32) {
1694     SDValue ARMCC;
1695     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1696     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
1697     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
1698   }
1699
1700   ARMCC::CondCodes CondCode, CondCode2;
1701   if (FPCCToARMCC(CC, CondCode, CondCode2))
1702     std::swap(TrueVal, FalseVal);
1703
1704   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
1705   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1706   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
1707   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
1708                                  ARMCC, CCR, Cmp);
1709   if (CondCode2 != ARMCC::AL) {
1710     SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
1711     // FIXME: Needs another CMP because flag can have but one use.
1712     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
1713     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
1714                          Result, TrueVal, ARMCC2, CCR, Cmp2);
1715   }
1716   return Result;
1717 }
1718
1719 static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
1720                           const ARMSubtarget *ST) {
1721   SDValue  Chain = Op.getOperand(0);
1722   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
1723   SDValue    LHS = Op.getOperand(2);
1724   SDValue    RHS = Op.getOperand(3);
1725   SDValue   Dest = Op.getOperand(4);
1726   DebugLoc dl = Op.getDebugLoc();
1727
1728   if (LHS.getValueType() == MVT::i32) {
1729     SDValue ARMCC;
1730     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1731     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
1732     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
1733                        Chain, Dest, ARMCC, CCR,Cmp);
1734   }
1735
1736   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
1737   ARMCC::CondCodes CondCode, CondCode2;
1738   if (FPCCToARMCC(CC, CondCode, CondCode2))
1739     // Swap the LHS/RHS of the comparison if needed.
1740     std::swap(LHS, RHS);
1741
1742   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
1743   SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
1744   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1745   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
1746   SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
1747   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
1748   if (CondCode2 != ARMCC::AL) {
1749     ARMCC = DAG.getConstant(CondCode2, MVT::i32);
1750     SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
1751     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
1752   }
1753   return Res;
1754 }
1755
1756 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
1757   SDValue Chain = Op.getOperand(0);
1758   SDValue Table = Op.getOperand(1);
1759   SDValue Index = Op.getOperand(2);
1760   DebugLoc dl = Op.getDebugLoc();
1761
1762   MVT PTy = getPointerTy();
1763   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
1764   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
1765   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
1766   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
1767   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
1768   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
1769   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
1770   if (Subtarget->isThumb2()) {
1771     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
1772     // which does another jump to the destination. This also makes it easier
1773     // to translate it to TBB / TBH later.
1774     // FIXME: This might not work if the function is extremely large.
1775     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
1776                        Addr, Op.getOperand(2), JTI, UId);
1777   }
1778   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
1779     Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0);
1780     Chain = Addr.getValue(1);
1781     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
1782     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
1783   } else {
1784     Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0);
1785     Chain = Addr.getValue(1);
1786     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
1787   }
1788 }
1789
1790 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
1791   DebugLoc dl = Op.getDebugLoc();
1792   unsigned Opc =
1793     Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
1794   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
1795   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
1796 }
1797
1798 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
1799   MVT VT = Op.getValueType();
1800   DebugLoc dl = Op.getDebugLoc();
1801   unsigned Opc =
1802     Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
1803
1804   Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
1805   return DAG.getNode(Opc, dl, VT, Op);
1806 }
1807
1808 static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
1809   // Implement fcopysign with a fabs and a conditional fneg.
1810   SDValue Tmp0 = Op.getOperand(0);
1811   SDValue Tmp1 = Op.getOperand(1);
1812   DebugLoc dl = Op.getDebugLoc();
1813   MVT VT = Op.getValueType();
1814   MVT SrcVT = Tmp1.getValueType();
1815   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
1816   SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
1817   SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
1818   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1819   return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
1820 }
1821
1822 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
1823   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
1824   MFI->setFrameAddressIsTaken(true);
1825   MVT VT = Op.getValueType();
1826   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
1827   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1828   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
1829     ? ARM::R7 : ARM::R11;
1830   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
1831   while (Depth--)
1832     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
1833   return FrameAddr;
1834 }
1835
1836 SDValue
1837 ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
1838                                            SDValue Chain,
1839                                            SDValue Dst, SDValue Src,
1840                                            SDValue Size, unsigned Align,
1841                                            bool AlwaysInline,
1842                                          const Value *DstSV, uint64_t DstSVOff,
1843                                          const Value *SrcSV, uint64_t SrcSVOff){
1844   // Do repeated 4-byte loads and stores. To be improved.
1845   // This requires 4-byte alignment.
1846   if ((Align & 3) != 0)
1847     return SDValue();
1848   // This requires the copy size to be a constant, preferrably
1849   // within a subtarget-specific limit.
1850   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
1851   if (!ConstantSize)
1852     return SDValue();
1853   uint64_t SizeVal = ConstantSize->getZExtValue();
1854   if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
1855     return SDValue();
1856
1857   unsigned BytesLeft = SizeVal & 3;
1858   unsigned NumMemOps = SizeVal >> 2;
1859   unsigned EmittedNumMemOps = 0;
1860   MVT VT = MVT::i32;
1861   unsigned VTSize = 4;
1862   unsigned i = 0;
1863   const unsigned MAX_LOADS_IN_LDM = 6;
1864   SDValue TFOps[MAX_LOADS_IN_LDM];
1865   SDValue Loads[MAX_LOADS_IN_LDM];
1866   uint64_t SrcOff = 0, DstOff = 0;
1867
1868   // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
1869   // same number of stores.  The loads and stores will get combined into
1870   // ldm/stm later on.
1871   while (EmittedNumMemOps < NumMemOps) {
1872     for (i = 0;
1873          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
1874       Loads[i] = DAG.getLoad(VT, dl, Chain,
1875                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
1876                                          DAG.getConstant(SrcOff, MVT::i32)),
1877                              SrcSV, SrcSVOff + SrcOff);
1878       TFOps[i] = Loads[i].getValue(1);
1879       SrcOff += VTSize;
1880     }
1881     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
1882
1883     for (i = 0;
1884          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
1885       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
1886                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
1887                                        DAG.getConstant(DstOff, MVT::i32)),
1888                            DstSV, DstSVOff + DstOff);
1889       DstOff += VTSize;
1890     }
1891     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
1892
1893     EmittedNumMemOps += i;
1894   }
1895
1896   if (BytesLeft == 0)
1897     return Chain;
1898
1899   // Issue loads / stores for the trailing (1 - 3) bytes.
1900   unsigned BytesLeftSave = BytesLeft;
1901   i = 0;
1902   while (BytesLeft) {
1903     if (BytesLeft >= 2) {
1904       VT = MVT::i16;
1905       VTSize = 2;
1906     } else {
1907       VT = MVT::i8;
1908       VTSize = 1;
1909     }
1910
1911     Loads[i] = DAG.getLoad(VT, dl, Chain,
1912                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
1913                                        DAG.getConstant(SrcOff, MVT::i32)),
1914                            SrcSV, SrcSVOff + SrcOff);
1915     TFOps[i] = Loads[i].getValue(1);
1916     ++i;
1917     SrcOff += VTSize;
1918     BytesLeft -= VTSize;
1919   }
1920   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
1921
1922   i = 0;
1923   BytesLeft = BytesLeftSave;
1924   while (BytesLeft) {
1925     if (BytesLeft >= 2) {
1926       VT = MVT::i16;
1927       VTSize = 2;
1928     } else {
1929       VT = MVT::i8;
1930       VTSize = 1;
1931     }
1932
1933     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
1934                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
1935                                         DAG.getConstant(DstOff, MVT::i32)),
1936                             DstSV, DstSVOff + DstOff);
1937     ++i;
1938     DstOff += VTSize;
1939     BytesLeft -= VTSize;
1940   }
1941   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
1942 }
1943
1944 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
1945   SDValue Op = N->getOperand(0);
1946   DebugLoc dl = N->getDebugLoc();
1947   if (N->getValueType(0) == MVT::f64) {
1948     // Turn i64->f64 into FMDRR.
1949     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
1950                              DAG.getConstant(0, MVT::i32));
1951     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
1952                              DAG.getConstant(1, MVT::i32));
1953     return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
1954   }
1955
1956   // Turn f64->i64 into FMRRD.
1957   SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
1958                             DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
1959
1960   // Merge the pieces into a single i64 value.
1961   return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
1962 }
1963
1964 /// getZeroVector - Returns a vector of specified type with all zero elements.
1965 ///
1966 static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
1967   assert(VT.isVector() && "Expected a vector type");
1968
1969   // Zero vectors are used to represent vector negation and in those cases
1970   // will be implemented with the NEON VNEG instruction.  However, VNEG does
1971   // not support i64 elements, so sometimes the zero vectors will need to be
1972   // explicitly constructed.  For those cases, and potentially other uses in
1973   // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted
1974   // to their dest type.  This ensures they get CSE'd.
1975   SDValue Vec;
1976   SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
1977   if (VT.getSizeInBits() == 64)
1978     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
1979   else
1980     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
1981
1982   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
1983 }
1984
1985 /// getOnesVector - Returns a vector of specified type with all bits set.
1986 ///
1987 static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
1988   assert(VT.isVector() && "Expected a vector type");
1989
1990   // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
1991   // type.  This ensures they get CSE'd.
1992   SDValue Vec;
1993   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
1994   if (VT.getSizeInBits() == 64)
1995     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
1996   else
1997     Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
1998
1999   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2000 }
2001
2002 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2003                           const ARMSubtarget *ST) {
2004   MVT VT = N->getValueType(0);
2005   DebugLoc dl = N->getDebugLoc();
2006
2007   // Lower vector shifts on NEON to use VSHL.
2008   if (VT.isVector()) {
2009     assert(ST->hasNEON() && "unexpected vector shift");
2010
2011     // Left shifts translate directly to the vshiftu intrinsic.
2012     if (N->getOpcode() == ISD::SHL)
2013       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2014                          DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2015                          N->getOperand(0), N->getOperand(1));
2016
2017     assert((N->getOpcode() == ISD::SRA ||
2018             N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2019
2020     // NEON uses the same intrinsics for both left and right shifts.  For
2021     // right shifts, the shift amounts are negative, so negate the vector of
2022     // shift amounts.
2023     MVT ShiftVT = N->getOperand(1).getValueType();
2024     SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2025                                        getZeroVector(ShiftVT, DAG, dl),
2026                                        N->getOperand(1));
2027     Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2028                                Intrinsic::arm_neon_vshifts :
2029                                Intrinsic::arm_neon_vshiftu);
2030     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2031                        DAG.getConstant(vshiftInt, MVT::i32),
2032                        N->getOperand(0), NegatedCount);
2033   }
2034
2035   assert(VT == MVT::i64 &&
2036          (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2037          "Unknown shift to lower!");
2038
2039   // We only lower SRA, SRL of 1 here, all others use generic lowering.
2040   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2041       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2042     return SDValue();
2043
2044   // If we are in thumb mode, we don't have RRX.
2045   if (ST->isThumb1Only()) return SDValue();
2046
2047   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
2048   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2049                              DAG.getConstant(0, MVT::i32));
2050   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2051                              DAG.getConstant(1, MVT::i32));
2052
2053   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2054   // captures the result into a carry flag.
2055   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2056   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2057
2058   // The low part is an ARMISD::RRX operand, which shifts the carry in.
2059   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2060
2061   // Merge the pieces into a single i64 value.
2062  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2063 }
2064
2065 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2066   SDValue TmpOp0, TmpOp1;
2067   bool Invert = false;
2068   bool Swap = false;
2069   unsigned Opc = 0;
2070
2071   SDValue Op0 = Op.getOperand(0);
2072   SDValue Op1 = Op.getOperand(1);
2073   SDValue CC = Op.getOperand(2);
2074   MVT VT = Op.getValueType();
2075   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2076   DebugLoc dl = Op.getDebugLoc();
2077
2078   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2079     switch (SetCCOpcode) {
2080     default: llvm_unreachable("Illegal FP comparison"); break;
2081     case ISD::SETUNE:
2082     case ISD::SETNE:  Invert = true; // Fallthrough
2083     case ISD::SETOEQ:
2084     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2085     case ISD::SETOLT:
2086     case ISD::SETLT: Swap = true; // Fallthrough
2087     case ISD::SETOGT:
2088     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2089     case ISD::SETOLE:
2090     case ISD::SETLE:  Swap = true; // Fallthrough
2091     case ISD::SETOGE:
2092     case ISD::SETGE: Opc = ARMISD::VCGE; break;
2093     case ISD::SETUGE: Swap = true; // Fallthrough
2094     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2095     case ISD::SETUGT: Swap = true; // Fallthrough
2096     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2097     case ISD::SETUEQ: Invert = true; // Fallthrough
2098     case ISD::SETONE:
2099       // Expand this to (OLT | OGT).
2100       TmpOp0 = Op0;
2101       TmpOp1 = Op1;
2102       Opc = ISD::OR;
2103       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2104       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2105       break;
2106     case ISD::SETUO: Invert = true; // Fallthrough
2107     case ISD::SETO:
2108       // Expand this to (OLT | OGE).
2109       TmpOp0 = Op0;
2110       TmpOp1 = Op1;
2111       Opc = ISD::OR;
2112       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2113       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2114       break;
2115     }
2116   } else {
2117     // Integer comparisons.
2118     switch (SetCCOpcode) {
2119     default: llvm_unreachable("Illegal integer comparison"); break;
2120     case ISD::SETNE:  Invert = true;
2121     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
2122     case ISD::SETLT:  Swap = true;
2123     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
2124     case ISD::SETLE:  Swap = true;
2125     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
2126     case ISD::SETULT: Swap = true;
2127     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2128     case ISD::SETULE: Swap = true;
2129     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2130     }
2131
2132     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2133     if (Opc == ARMISD::VCEQ) {
2134
2135       SDValue AndOp;
2136       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2137         AndOp = Op0;
2138       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2139         AndOp = Op1;
2140
2141       // Ignore bitconvert.
2142       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2143         AndOp = AndOp.getOperand(0);
2144
2145       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2146         Opc = ARMISD::VTST;
2147         Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
2148         Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
2149         Invert = !Invert;
2150       }
2151     }
2152   }
2153
2154   if (Swap)
2155     std::swap(Op0, Op1);
2156
2157   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
2158
2159   if (Invert)
2160     Result = DAG.getNOT(dl, Result, VT);
2161
2162   return Result;
2163 }
2164
2165 /// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2166 /// VMOV instruction, and if so, return the constant being splatted.
2167 static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
2168                            unsigned SplatBitSize, SelectionDAG &DAG) {
2169   switch (SplatBitSize) {
2170   case 8:
2171     // Any 1-byte value is OK.
2172     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2173     return DAG.getTargetConstant(SplatBits, MVT::i8);
2174
2175   case 16:
2176     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2177     if ((SplatBits & ~0xff) == 0 ||
2178         (SplatBits & ~0xff00) == 0)
2179       return DAG.getTargetConstant(SplatBits, MVT::i16);
2180     break;
2181
2182   case 32:
2183     // NEON's 32-bit VMOV supports splat values where:
2184     // * only one byte is nonzero, or
2185     // * the least significant byte is 0xff and the second byte is nonzero, or
2186     // * the least significant 2 bytes are 0xff and the third is nonzero.
2187     if ((SplatBits & ~0xff) == 0 ||
2188         (SplatBits & ~0xff00) == 0 ||
2189         (SplatBits & ~0xff0000) == 0 ||
2190         (SplatBits & ~0xff000000) == 0)
2191       return DAG.getTargetConstant(SplatBits, MVT::i32);
2192
2193     if ((SplatBits & ~0xffff) == 0 &&
2194         ((SplatBits | SplatUndef) & 0xff) == 0xff)
2195       return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
2196
2197     if ((SplatBits & ~0xffffff) == 0 &&
2198         ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
2199       return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
2200
2201     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2202     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2203     // VMOV.I32.  A (very) minor optimization would be to replicate the value
2204     // and fall through here to test for a valid 64-bit splat.  But, then the
2205     // caller would also need to check and handle the change in size.
2206     break;
2207
2208   case 64: {
2209     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2210     uint64_t BitMask = 0xff;
2211     uint64_t Val = 0;
2212     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2213       if (((SplatBits | SplatUndef) & BitMask) == BitMask)
2214         Val |= BitMask;
2215       else if ((SplatBits & BitMask) != 0)
2216         return SDValue();
2217       BitMask <<= 8;
2218     }
2219     return DAG.getTargetConstant(Val, MVT::i64);
2220   }
2221
2222   default:
2223     llvm_unreachable("unexpected size for isVMOVSplat");
2224     break;
2225   }
2226
2227   return SDValue();
2228 }
2229
2230 /// getVMOVImm - If this is a build_vector of constants which can be
2231 /// formed by using a VMOV instruction of the specified element size,
2232 /// return the constant being splatted.  The ByteSize field indicates the
2233 /// number of bytes of each element [1248].
2234 SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2235   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
2236   APInt SplatBits, SplatUndef;
2237   unsigned SplatBitSize;
2238   bool HasAnyUndefs;
2239   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2240                                       HasAnyUndefs, ByteSize * 8))
2241     return SDValue();
2242
2243   if (SplatBitSize > ByteSize * 8)
2244     return SDValue();
2245
2246   return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
2247                      SplatBitSize, DAG);
2248 }
2249
2250 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2251 /// instruction with the specified blocksize.  (The order of the elements
2252 /// within each block of the vector is reversed.)
2253 bool ARM::isVREVMask(ShuffleVectorSDNode *N, unsigned BlockSize) {
2254   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2255          "Only possible block sizes for VREV are: 16, 32, 64");
2256
2257   MVT VT = N->getValueType(0);
2258   unsigned NumElts = VT.getVectorNumElements();
2259   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2260   unsigned BlockElts = N->getMaskElt(0) + 1;
2261
2262   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2263     return false;
2264
2265   for (unsigned i = 0; i < NumElts; ++i) {
2266     if ((unsigned) N->getMaskElt(i) !=
2267         (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2268       return false;
2269   }
2270
2271   return true;
2272 }
2273
2274 static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) {
2275   // Canonicalize all-zeros and all-ones vectors.
2276   ConstantSDNode *ConstVal = dyn_cast<ConstantSDNode>(Val.getNode());
2277   if (ConstVal->isNullValue())
2278     return getZeroVector(VT, DAG, dl);
2279   if (ConstVal->isAllOnesValue())
2280     return getOnesVector(VT, DAG, dl);
2281
2282   MVT CanonicalVT;
2283   if (VT.is64BitVector()) {
2284     switch (Val.getValueType().getSizeInBits()) {
2285     case 8:  CanonicalVT = MVT::v8i8; break;
2286     case 16: CanonicalVT = MVT::v4i16; break;
2287     case 32: CanonicalVT = MVT::v2i32; break;
2288     case 64: CanonicalVT = MVT::v1i64; break;
2289     default: llvm_unreachable("unexpected splat element type"); break;
2290     }
2291   } else {
2292     assert(VT.is128BitVector() && "unknown splat vector size");
2293     switch (Val.getValueType().getSizeInBits()) {
2294     case 8:  CanonicalVT = MVT::v16i8; break;
2295     case 16: CanonicalVT = MVT::v8i16; break;
2296     case 32: CanonicalVT = MVT::v4i32; break;
2297     case 64: CanonicalVT = MVT::v2i64; break;
2298     default: llvm_unreachable("unexpected splat element type"); break;
2299     }
2300   }
2301
2302   // Build a canonical splat for this value.
2303   SmallVector<SDValue, 8> Ops;
2304   Ops.assign(CanonicalVT.getVectorNumElements(), Val);
2305   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
2306                             Ops.size());
2307   return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
2308 }
2309
2310 // If this is a case we can't handle, return null and let the default
2311 // expansion code take care of it.
2312 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
2313   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
2314   assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
2315   DebugLoc dl = Op.getDebugLoc();
2316   MVT VT = Op.getValueType();
2317
2318   APInt SplatBits, SplatUndef;
2319   unsigned SplatBitSize;
2320   bool HasAnyUndefs;
2321   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
2322     SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
2323                               SplatUndef.getZExtValue(), SplatBitSize, DAG);
2324     if (Val.getNode())
2325       return BuildSplat(Val, VT, DAG, dl);
2326   }
2327
2328   // If there are only 2 elements in a 128-bit vector, insert them into an
2329   // undef vector.  This handles the common case for 128-bit vector argument
2330   // passing, where the insertions should be translated to subreg accesses
2331   // with no real instructions.
2332   if (VT.is128BitVector() && Op.getNumOperands() == 2) {
2333     SDValue Val = DAG.getUNDEF(VT);
2334     SDValue Op0 = Op.getOperand(0);
2335     SDValue Op1 = Op.getOperand(1);
2336     if (Op0.getOpcode() != ISD::UNDEF)
2337       Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
2338                         DAG.getIntPtrConstant(0));
2339     if (Op1.getOpcode() != ISD::UNDEF)
2340       Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
2341                         DAG.getIntPtrConstant(1));
2342     return Val;
2343   }
2344
2345   return SDValue();
2346 }
2347
2348 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
2349   return Op;
2350 }
2351
2352 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
2353   return Op;
2354 }
2355
2356 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2357   MVT VT = Op.getValueType();
2358   DebugLoc dl = Op.getDebugLoc();
2359   assert((VT == MVT::i8 || VT == MVT::i16) &&
2360          "unexpected type for custom-lowering vector extract");
2361   SDValue Vec = Op.getOperand(0);
2362   SDValue Lane = Op.getOperand(1);
2363   Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
2364   Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT));
2365   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
2366 }
2367
2368 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
2369   // The only time a CONCAT_VECTORS operation can have legal types is when
2370   // two 64-bit vectors are concatenated to a 128-bit vector.
2371   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
2372          "unexpected CONCAT_VECTORS");
2373   DebugLoc dl = Op.getDebugLoc();
2374   SDValue Val = DAG.getUNDEF(MVT::v2f64);
2375   SDValue Op0 = Op.getOperand(0);
2376   SDValue Op1 = Op.getOperand(1);
2377   if (Op0.getOpcode() != ISD::UNDEF)
2378     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
2379                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
2380                       DAG.getIntPtrConstant(0));
2381   if (Op1.getOpcode() != ISD::UNDEF)
2382     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
2383                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
2384                       DAG.getIntPtrConstant(1));
2385   return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
2386 }
2387
2388 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
2389   switch (Op.getOpcode()) {
2390   default: llvm_unreachable("Don't know how to custom lower this!");
2391   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
2392   case ISD::GlobalAddress:
2393     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
2394       LowerGlobalAddressELF(Op, DAG);
2395   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
2396   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG, Subtarget);
2397   case ISD::BR_CC:         return LowerBR_CC(Op, DAG, Subtarget);
2398   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
2399   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
2400   case ISD::SINT_TO_FP:
2401   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
2402   case ISD::FP_TO_SINT:
2403   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
2404   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
2405   case ISD::RETURNADDR:    break;
2406   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
2407   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
2408   case ISD::INTRINSIC_VOID:
2409   case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
2410   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2411   case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
2412   case ISD::SHL:
2413   case ISD::SRL:
2414   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
2415   case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
2416   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG);
2417   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
2418   case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
2419   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2420   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
2421   }
2422   return SDValue();
2423 }
2424
2425 /// ReplaceNodeResults - Replace the results of node with an illegal result
2426 /// type with new values built out of custom code.
2427 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
2428                                            SmallVectorImpl<SDValue>&Results,
2429                                            SelectionDAG &DAG) {
2430   switch (N->getOpcode()) {
2431   default:
2432     llvm_unreachable("Don't know how to custom expand this!");
2433     return;
2434   case ISD::BIT_CONVERT:
2435     Results.push_back(ExpandBIT_CONVERT(N, DAG));
2436     return;
2437   case ISD::SRL:
2438   case ISD::SRA: {
2439     SDValue Res = LowerShift(N, DAG, Subtarget);
2440     if (Res.getNode())
2441       Results.push_back(Res);
2442     return;
2443   }
2444   }
2445 }
2446
2447 //===----------------------------------------------------------------------===//
2448 //                           ARM Scheduler Hooks
2449 //===----------------------------------------------------------------------===//
2450
2451 MachineBasicBlock *
2452 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
2453                                                MachineBasicBlock *BB) const {
2454   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
2455   DebugLoc dl = MI->getDebugLoc();
2456   switch (MI->getOpcode()) {
2457   default: assert(false && "Unexpected instr type to insert");
2458   case ARM::tMOVCCr: {
2459     // To "insert" a SELECT_CC instruction, we actually have to insert the
2460     // diamond control-flow pattern.  The incoming instruction knows the
2461     // destination vreg to set, the condition code register to branch on, the
2462     // true/false values to select between, and a branch opcode to use.
2463     const BasicBlock *LLVM_BB = BB->getBasicBlock();
2464     MachineFunction::iterator It = BB;
2465     ++It;
2466
2467     //  thisMBB:
2468     //  ...
2469     //   TrueVal = ...
2470     //   cmpTY ccX, r1, r2
2471     //   bCC copy1MBB
2472     //   fallthrough --> copy0MBB
2473     MachineBasicBlock *thisMBB  = BB;
2474     MachineFunction *F = BB->getParent();
2475     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
2476     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
2477     BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
2478       .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
2479     F->insert(It, copy0MBB);
2480     F->insert(It, sinkMBB);
2481     // Update machine-CFG edges by first adding all successors of the current
2482     // block to the new block which will contain the Phi node for the select.
2483     for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
2484         e = BB->succ_end(); i != e; ++i)
2485       sinkMBB->addSuccessor(*i);
2486     // Next, remove all successors of the current block, and add the true
2487     // and fallthrough blocks as its successors.
2488     while(!BB->succ_empty())
2489       BB->removeSuccessor(BB->succ_begin());
2490     BB->addSuccessor(copy0MBB);
2491     BB->addSuccessor(sinkMBB);
2492
2493     //  copy0MBB:
2494     //   %FalseValue = ...
2495     //   # fallthrough to sinkMBB
2496     BB = copy0MBB;
2497
2498     // Update machine-CFG edges
2499     BB->addSuccessor(sinkMBB);
2500
2501     //  sinkMBB:
2502     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2503     //  ...
2504     BB = sinkMBB;
2505     BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
2506       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
2507       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
2508
2509     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
2510     return BB;
2511   }
2512   }
2513 }
2514
2515 //===----------------------------------------------------------------------===//
2516 //                           ARM Optimization Hooks
2517 //===----------------------------------------------------------------------===//
2518
2519 static
2520 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
2521                             TargetLowering::DAGCombinerInfo &DCI) {
2522   SelectionDAG &DAG = DCI.DAG;
2523   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2524   MVT VT = N->getValueType(0);
2525   unsigned Opc = N->getOpcode();
2526   bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
2527   SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
2528   SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
2529   ISD::CondCode CC = ISD::SETCC_INVALID;
2530
2531   if (isSlctCC) {
2532     CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
2533   } else {
2534     SDValue CCOp = Slct.getOperand(0);
2535     if (CCOp.getOpcode() == ISD::SETCC)
2536       CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
2537   }
2538
2539   bool DoXform = false;
2540   bool InvCC = false;
2541   assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
2542           "Bad input!");
2543
2544   if (LHS.getOpcode() == ISD::Constant &&
2545       cast<ConstantSDNode>(LHS)->isNullValue()) {
2546     DoXform = true;
2547   } else if (CC != ISD::SETCC_INVALID &&
2548              RHS.getOpcode() == ISD::Constant &&
2549              cast<ConstantSDNode>(RHS)->isNullValue()) {
2550     std::swap(LHS, RHS);
2551     SDValue Op0 = Slct.getOperand(0);
2552     MVT OpVT = isSlctCC ? Op0.getValueType() :
2553                           Op0.getOperand(0).getValueType();
2554     bool isInt = OpVT.isInteger();
2555     CC = ISD::getSetCCInverse(CC, isInt);
2556
2557     if (!TLI.isCondCodeLegal(CC, OpVT))
2558       return SDValue();         // Inverse operator isn't legal.
2559
2560     DoXform = true;
2561     InvCC = true;
2562   }
2563
2564   if (DoXform) {
2565     SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
2566     if (isSlctCC)
2567       return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
2568                              Slct.getOperand(0), Slct.getOperand(1), CC);
2569     SDValue CCOp = Slct.getOperand(0);
2570     if (InvCC)
2571       CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
2572                           CCOp.getOperand(0), CCOp.getOperand(1), CC);
2573     return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
2574                        CCOp, OtherOp, Result);
2575   }
2576   return SDValue();
2577 }
2578
2579 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
2580 static SDValue PerformADDCombine(SDNode *N,
2581                                  TargetLowering::DAGCombinerInfo &DCI) {
2582   // added by evan in r37685 with no testcase.
2583   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2584
2585   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
2586   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
2587     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
2588     if (Result.getNode()) return Result;
2589   }
2590   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
2591     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
2592     if (Result.getNode()) return Result;
2593   }
2594
2595   return SDValue();
2596 }
2597
2598 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
2599 static SDValue PerformSUBCombine(SDNode *N,
2600                                  TargetLowering::DAGCombinerInfo &DCI) {
2601   // added by evan in r37685 with no testcase.
2602   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2603
2604   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
2605   if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
2606     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
2607     if (Result.getNode()) return Result;
2608   }
2609
2610   return SDValue();
2611 }
2612
2613
2614 /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
2615 static SDValue PerformFMRRDCombine(SDNode *N,
2616                                    TargetLowering::DAGCombinerInfo &DCI) {
2617   // fmrrd(fmdrr x, y) -> x,y
2618   SDValue InDouble = N->getOperand(0);
2619   if (InDouble.getOpcode() == ARMISD::FMDRR)
2620     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
2621   return SDValue();
2622 }
2623
2624 /// getVShiftImm - Check if this is a valid build_vector for the immediate
2625 /// operand of a vector shift operation, where all the elements of the
2626 /// build_vector must have the same constant integer value.
2627 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
2628   // Ignore bit_converts.
2629   while (Op.getOpcode() == ISD::BIT_CONVERT)
2630     Op = Op.getOperand(0);
2631   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
2632   APInt SplatBits, SplatUndef;
2633   unsigned SplatBitSize;
2634   bool HasAnyUndefs;
2635   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2636                                       HasAnyUndefs, ElementBits) ||
2637       SplatBitSize > ElementBits)
2638     return false;
2639   Cnt = SplatBits.getSExtValue();
2640   return true;
2641 }
2642
2643 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
2644 /// operand of a vector shift left operation.  That value must be in the range:
2645 ///   0 <= Value < ElementBits for a left shift; or
2646 ///   0 <= Value <= ElementBits for a long left shift.
2647 static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) {
2648   assert(VT.isVector() && "vector shift count is not a vector type");
2649   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
2650   if (! getVShiftImm(Op, ElementBits, Cnt))
2651     return false;
2652   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
2653 }
2654
2655 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
2656 /// operand of a vector shift right operation.  For a shift opcode, the value
2657 /// is positive, but for an intrinsic the value count must be negative. The
2658 /// absolute value must be in the range:
2659 ///   1 <= |Value| <= ElementBits for a right shift; or
2660 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
2661 static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic,
2662                          int64_t &Cnt) {
2663   assert(VT.isVector() && "vector shift count is not a vector type");
2664   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
2665   if (! getVShiftImm(Op, ElementBits, Cnt))
2666     return false;
2667   if (isIntrinsic)
2668     Cnt = -Cnt;
2669   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
2670 }
2671
2672 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
2673 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
2674   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2675   switch (IntNo) {
2676   default:
2677     // Don't do anything for most intrinsics.
2678     break;
2679
2680   // Vector shifts: check for immediate versions and lower them.
2681   // Note: This is done during DAG combining instead of DAG legalizing because
2682   // the build_vectors for 64-bit vector element shift counts are generally
2683   // not legal, and it is hard to see their values after they get legalized to
2684   // loads from a constant pool.
2685   case Intrinsic::arm_neon_vshifts:
2686   case Intrinsic::arm_neon_vshiftu:
2687   case Intrinsic::arm_neon_vshiftls:
2688   case Intrinsic::arm_neon_vshiftlu:
2689   case Intrinsic::arm_neon_vshiftn:
2690   case Intrinsic::arm_neon_vrshifts:
2691   case Intrinsic::arm_neon_vrshiftu:
2692   case Intrinsic::arm_neon_vrshiftn:
2693   case Intrinsic::arm_neon_vqshifts:
2694   case Intrinsic::arm_neon_vqshiftu:
2695   case Intrinsic::arm_neon_vqshiftsu:
2696   case Intrinsic::arm_neon_vqshiftns:
2697   case Intrinsic::arm_neon_vqshiftnu:
2698   case Intrinsic::arm_neon_vqshiftnsu:
2699   case Intrinsic::arm_neon_vqrshiftns:
2700   case Intrinsic::arm_neon_vqrshiftnu:
2701   case Intrinsic::arm_neon_vqrshiftnsu: {
2702     MVT VT = N->getOperand(1).getValueType();
2703     int64_t Cnt;
2704     unsigned VShiftOpc = 0;
2705
2706     switch (IntNo) {
2707     case Intrinsic::arm_neon_vshifts:
2708     case Intrinsic::arm_neon_vshiftu:
2709       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
2710         VShiftOpc = ARMISD::VSHL;
2711         break;
2712       }
2713       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
2714         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
2715                      ARMISD::VSHRs : ARMISD::VSHRu);
2716         break;
2717       }
2718       return SDValue();
2719
2720     case Intrinsic::arm_neon_vshiftls:
2721     case Intrinsic::arm_neon_vshiftlu:
2722       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
2723         break;
2724       llvm_unreachable("invalid shift count for vshll intrinsic");
2725
2726     case Intrinsic::arm_neon_vrshifts:
2727     case Intrinsic::arm_neon_vrshiftu:
2728       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
2729         break;
2730       return SDValue();
2731
2732     case Intrinsic::arm_neon_vqshifts:
2733     case Intrinsic::arm_neon_vqshiftu:
2734       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
2735         break;
2736       return SDValue();
2737
2738     case Intrinsic::arm_neon_vqshiftsu:
2739       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
2740         break;
2741       llvm_unreachable("invalid shift count for vqshlu intrinsic");
2742
2743     case Intrinsic::arm_neon_vshiftn:
2744     case Intrinsic::arm_neon_vrshiftn:
2745     case Intrinsic::arm_neon_vqshiftns:
2746     case Intrinsic::arm_neon_vqshiftnu:
2747     case Intrinsic::arm_neon_vqshiftnsu:
2748     case Intrinsic::arm_neon_vqrshiftns:
2749     case Intrinsic::arm_neon_vqrshiftnu:
2750     case Intrinsic::arm_neon_vqrshiftnsu:
2751       // Narrowing shifts require an immediate right shift.
2752       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
2753         break;
2754       llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
2755
2756     default:
2757       llvm_unreachable("unhandled vector shift");
2758     }
2759
2760     switch (IntNo) {
2761     case Intrinsic::arm_neon_vshifts:
2762     case Intrinsic::arm_neon_vshiftu:
2763       // Opcode already set above.
2764       break;
2765     case Intrinsic::arm_neon_vshiftls:
2766     case Intrinsic::arm_neon_vshiftlu:
2767       if (Cnt == VT.getVectorElementType().getSizeInBits())
2768         VShiftOpc = ARMISD::VSHLLi;
2769       else
2770         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
2771                      ARMISD::VSHLLs : ARMISD::VSHLLu);
2772       break;
2773     case Intrinsic::arm_neon_vshiftn:
2774       VShiftOpc = ARMISD::VSHRN; break;
2775     case Intrinsic::arm_neon_vrshifts:
2776       VShiftOpc = ARMISD::VRSHRs; break;
2777     case Intrinsic::arm_neon_vrshiftu:
2778       VShiftOpc = ARMISD::VRSHRu; break;
2779     case Intrinsic::arm_neon_vrshiftn:
2780       VShiftOpc = ARMISD::VRSHRN; break;
2781     case Intrinsic::arm_neon_vqshifts:
2782       VShiftOpc = ARMISD::VQSHLs; break;
2783     case Intrinsic::arm_neon_vqshiftu:
2784       VShiftOpc = ARMISD::VQSHLu; break;
2785     case Intrinsic::arm_neon_vqshiftsu:
2786       VShiftOpc = ARMISD::VQSHLsu; break;
2787     case Intrinsic::arm_neon_vqshiftns:
2788       VShiftOpc = ARMISD::VQSHRNs; break;
2789     case Intrinsic::arm_neon_vqshiftnu:
2790       VShiftOpc = ARMISD::VQSHRNu; break;
2791     case Intrinsic::arm_neon_vqshiftnsu:
2792       VShiftOpc = ARMISD::VQSHRNsu; break;
2793     case Intrinsic::arm_neon_vqrshiftns:
2794       VShiftOpc = ARMISD::VQRSHRNs; break;
2795     case Intrinsic::arm_neon_vqrshiftnu:
2796       VShiftOpc = ARMISD::VQRSHRNu; break;
2797     case Intrinsic::arm_neon_vqrshiftnsu:
2798       VShiftOpc = ARMISD::VQRSHRNsu; break;
2799     }
2800
2801     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
2802                        N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
2803   }
2804
2805   case Intrinsic::arm_neon_vshiftins: {
2806     MVT VT = N->getOperand(1).getValueType();
2807     int64_t Cnt;
2808     unsigned VShiftOpc = 0;
2809
2810     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
2811       VShiftOpc = ARMISD::VSLI;
2812     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
2813       VShiftOpc = ARMISD::VSRI;
2814     else {
2815       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
2816     }
2817
2818     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
2819                        N->getOperand(1), N->getOperand(2),
2820                        DAG.getConstant(Cnt, MVT::i32));
2821   }
2822
2823   case Intrinsic::arm_neon_vqrshifts:
2824   case Intrinsic::arm_neon_vqrshiftu:
2825     // No immediate versions of these to check for.
2826     break;
2827   }
2828
2829   return SDValue();
2830 }
2831
2832 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
2833 /// lowers them.  As with the vector shift intrinsics, this is done during DAG
2834 /// combining instead of DAG legalizing because the build_vectors for 64-bit
2835 /// vector element shift counts are generally not legal, and it is hard to see
2836 /// their values after they get legalized to loads from a constant pool.
2837 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
2838                                    const ARMSubtarget *ST) {
2839   MVT VT = N->getValueType(0);
2840
2841   // Nothing to be done for scalar shifts.
2842   if (! VT.isVector())
2843     return SDValue();
2844
2845   assert(ST->hasNEON() && "unexpected vector shift");
2846   int64_t Cnt;
2847
2848   switch (N->getOpcode()) {
2849   default: llvm_unreachable("unexpected shift opcode");
2850
2851   case ISD::SHL:
2852     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
2853       return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
2854                          DAG.getConstant(Cnt, MVT::i32));
2855     break;
2856
2857   case ISD::SRA:
2858   case ISD::SRL:
2859     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
2860       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
2861                             ARMISD::VSHRs : ARMISD::VSHRu);
2862       return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
2863                          DAG.getConstant(Cnt, MVT::i32));
2864     }
2865   }
2866   return SDValue();
2867 }
2868
2869 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
2870 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
2871 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
2872                                     const ARMSubtarget *ST) {
2873   SDValue N0 = N->getOperand(0);
2874
2875   // Check for sign- and zero-extensions of vector extract operations of 8-
2876   // and 16-bit vector elements.  NEON supports these directly.  They are
2877   // handled during DAG combining because type legalization will promote them
2878   // to 32-bit types and it is messy to recognize the operations after that.
2879   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2880     SDValue Vec = N0.getOperand(0);
2881     SDValue Lane = N0.getOperand(1);
2882     MVT VT = N->getValueType(0);
2883     MVT EltVT = N0.getValueType();
2884     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2885
2886     if (VT == MVT::i32 &&
2887         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
2888         TLI.isTypeLegal(Vec.getValueType())) {
2889
2890       unsigned Opc = 0;
2891       switch (N->getOpcode()) {
2892       default: llvm_unreachable("unexpected opcode");
2893       case ISD::SIGN_EXTEND:
2894         Opc = ARMISD::VGETLANEs;
2895         break;
2896       case ISD::ZERO_EXTEND:
2897       case ISD::ANY_EXTEND:
2898         Opc = ARMISD::VGETLANEu;
2899         break;
2900       }
2901       return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
2902     }
2903   }
2904
2905   return SDValue();
2906 }
2907
2908 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
2909                                              DAGCombinerInfo &DCI) const {
2910   switch (N->getOpcode()) {
2911   default: break;
2912   case ISD::ADD:      return PerformADDCombine(N, DCI);
2913   case ISD::SUB:      return PerformSUBCombine(N, DCI);
2914   case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
2915   case ISD::INTRINSIC_WO_CHAIN:
2916     return PerformIntrinsicCombine(N, DCI.DAG);
2917   case ISD::SHL:
2918   case ISD::SRA:
2919   case ISD::SRL:
2920     return PerformShiftCombine(N, DCI.DAG, Subtarget);
2921   case ISD::SIGN_EXTEND:
2922   case ISD::ZERO_EXTEND:
2923   case ISD::ANY_EXTEND:
2924     return PerformExtendCombine(N, DCI.DAG, Subtarget);
2925   }
2926   return SDValue();
2927 }
2928
2929 /// isLegalAddressImmediate - Return true if the integer value can be used
2930 /// as the offset of the target addressing mode for load / store of the
2931 /// given type.
2932 static bool isLegalAddressImmediate(int64_t V, MVT VT,
2933                                     const ARMSubtarget *Subtarget) {
2934   if (V == 0)
2935     return true;
2936
2937   if (!VT.isSimple())
2938     return false;
2939
2940   if (Subtarget->isThumb()) { // FIXME for thumb2
2941     if (V < 0)
2942       return false;
2943
2944     unsigned Scale = 1;
2945     switch (VT.getSimpleVT()) {
2946     default: return false;
2947     case MVT::i1:
2948     case MVT::i8:
2949       // Scale == 1;
2950       break;
2951     case MVT::i16:
2952       // Scale == 2;
2953       Scale = 2;
2954       break;
2955     case MVT::i32:
2956       // Scale == 4;
2957       Scale = 4;
2958       break;
2959     }
2960
2961     if ((V & (Scale - 1)) != 0)
2962       return false;
2963     V /= Scale;
2964     return V == (V & ((1LL << 5) - 1));
2965   }
2966
2967   if (V < 0)
2968     V = - V;
2969   switch (VT.getSimpleVT()) {
2970   default: return false;
2971   case MVT::i1:
2972   case MVT::i8:
2973   case MVT::i32:
2974     // +- imm12
2975     return V == (V & ((1LL << 12) - 1));
2976   case MVT::i16:
2977     // +- imm8
2978     return V == (V & ((1LL << 8) - 1));
2979   case MVT::f32:
2980   case MVT::f64:
2981     if (!Subtarget->hasVFP2())
2982       return false;
2983     if ((V & 3) != 0)
2984       return false;
2985     V >>= 2;
2986     return V == (V & ((1LL << 8) - 1));
2987   }
2988 }
2989
2990 /// isLegalAddressingMode - Return true if the addressing mode represented
2991 /// by AM is legal for this target, for a load/store of the specified type.
2992 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
2993                                               const Type *Ty) const {
2994   MVT VT = getValueType(Ty, true);
2995   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
2996     return false;
2997
2998   // Can never fold addr of global into load/store.
2999   if (AM.BaseGV)
3000     return false;
3001
3002   switch (AM.Scale) {
3003   case 0:  // no scale reg, must be "r+i" or "r", or "i".
3004     break;
3005   case 1:
3006     if (Subtarget->isThumb())  // FIXME for thumb2
3007       return false;
3008     // FALL THROUGH.
3009   default:
3010     // ARM doesn't support any R+R*scale+imm addr modes.
3011     if (AM.BaseOffs)
3012       return false;
3013
3014     if (!VT.isSimple())
3015       return false;
3016
3017     int Scale = AM.Scale;
3018     switch (VT.getSimpleVT()) {
3019     default: return false;
3020     case MVT::i1:
3021     case MVT::i8:
3022     case MVT::i32:
3023     case MVT::i64:
3024       // This assumes i64 is legalized to a pair of i32. If not (i.e.
3025       // ldrd / strd are used, then its address mode is same as i16.
3026       // r + r
3027       if (Scale < 0) Scale = -Scale;
3028       if (Scale == 1)
3029         return true;
3030       // r + r << imm
3031       return isPowerOf2_32(Scale & ~1);
3032     case MVT::i16:
3033       // r + r
3034       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
3035         return true;
3036       return false;
3037
3038     case MVT::isVoid:
3039       // Note, we allow "void" uses (basically, uses that aren't loads or
3040       // stores), because arm allows folding a scale into many arithmetic
3041       // operations.  This should be made more precise and revisited later.
3042
3043       // Allow r << imm, but the imm has to be a multiple of two.
3044       if (AM.Scale & 1) return false;
3045       return isPowerOf2_32(AM.Scale);
3046     }
3047     break;
3048   }
3049   return true;
3050 }
3051
3052 static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT,
3053                                       bool isSEXTLoad, SDValue &Base,
3054                                       SDValue &Offset, bool &isInc,
3055                                       SelectionDAG &DAG) {
3056   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
3057     return false;
3058
3059   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
3060     // AddressingMode 3
3061     Base = Ptr->getOperand(0);
3062     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
3063       int RHSC = (int)RHS->getZExtValue();
3064       if (RHSC < 0 && RHSC > -256) {
3065         assert(Ptr->getOpcode() == ISD::ADD);
3066         isInc = false;
3067         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
3068         return true;
3069       }
3070     }
3071     isInc = (Ptr->getOpcode() == ISD::ADD);
3072     Offset = Ptr->getOperand(1);
3073     return true;
3074   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
3075     // AddressingMode 2
3076     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
3077       int RHSC = (int)RHS->getZExtValue();
3078       if (RHSC < 0 && RHSC > -0x1000) {
3079         assert(Ptr->getOpcode() == ISD::ADD);
3080         isInc = false;
3081         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
3082         Base = Ptr->getOperand(0);
3083         return true;
3084       }
3085     }
3086
3087     if (Ptr->getOpcode() == ISD::ADD) {
3088       isInc = true;
3089       ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
3090       if (ShOpcVal != ARM_AM::no_shift) {
3091         Base = Ptr->getOperand(1);
3092         Offset = Ptr->getOperand(0);
3093       } else {
3094         Base = Ptr->getOperand(0);
3095         Offset = Ptr->getOperand(1);
3096       }
3097       return true;
3098     }
3099
3100     isInc = (Ptr->getOpcode() == ISD::ADD);
3101     Base = Ptr->getOperand(0);
3102     Offset = Ptr->getOperand(1);
3103     return true;
3104   }
3105
3106   // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
3107   return false;
3108 }
3109
3110 static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT,
3111                                      bool isSEXTLoad, SDValue &Base,
3112                                      SDValue &Offset, bool &isInc,
3113                                      SelectionDAG &DAG) {
3114   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
3115     return false;
3116
3117   Base = Ptr->getOperand(0);
3118   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
3119     int RHSC = (int)RHS->getZExtValue();
3120     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
3121       assert(Ptr->getOpcode() == ISD::ADD);
3122       isInc = false;
3123       Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
3124       return true;
3125     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
3126       isInc = Ptr->getOpcode() == ISD::ADD;
3127       Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
3128       return true;
3129     }
3130   }
3131
3132   return false;
3133 }
3134
3135 /// getPreIndexedAddressParts - returns true by value, base pointer and
3136 /// offset pointer and addressing mode by reference if the node's address
3137 /// can be legally represented as pre-indexed load / store address.
3138 bool
3139 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3140                                              SDValue &Offset,
3141                                              ISD::MemIndexedMode &AM,
3142                                              SelectionDAG &DAG) const {
3143   if (Subtarget->isThumb1Only())
3144     return false;
3145
3146   MVT VT;
3147   SDValue Ptr;
3148   bool isSEXTLoad = false;
3149   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3150     Ptr = LD->getBasePtr();
3151     VT  = LD->getMemoryVT();
3152     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
3153   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3154     Ptr = ST->getBasePtr();
3155     VT  = ST->getMemoryVT();
3156   } else
3157     return false;
3158
3159   bool isInc;
3160   bool isLegal = false;
3161   if (Subtarget->isThumb() && Subtarget->hasThumb2())
3162     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
3163                                        Offset, isInc, DAG);
3164   else
3165     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
3166                                         Offset, isInc, DAG);
3167   if (!isLegal)
3168     return false;
3169
3170   AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
3171   return true;
3172 }
3173
3174 /// getPostIndexedAddressParts - returns true by value, base pointer and
3175 /// offset pointer and addressing mode by reference if this node can be
3176 /// combined with a load / store to form a post-indexed load / store.
3177 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
3178                                                    SDValue &Base,
3179                                                    SDValue &Offset,
3180                                                    ISD::MemIndexedMode &AM,
3181                                                    SelectionDAG &DAG) const {
3182   if (Subtarget->isThumb1Only())
3183     return false;
3184
3185   MVT VT;
3186   SDValue Ptr;
3187   bool isSEXTLoad = false;
3188   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3189     VT  = LD->getMemoryVT();
3190     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
3191   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3192     VT  = ST->getMemoryVT();
3193   } else
3194     return false;
3195
3196   bool isInc;
3197   bool isLegal = false;
3198   if (Subtarget->isThumb() && Subtarget->hasThumb2())
3199     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
3200                                         isInc, DAG);
3201   else
3202     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
3203                                         isInc, DAG);
3204   if (!isLegal)
3205     return false;
3206
3207   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
3208   return true;
3209 }
3210
3211 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3212                                                        const APInt &Mask,
3213                                                        APInt &KnownZero,
3214                                                        APInt &KnownOne,
3215                                                        const SelectionDAG &DAG,
3216                                                        unsigned Depth) const {
3217   KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3218   switch (Op.getOpcode()) {
3219   default: break;
3220   case ARMISD::CMOV: {
3221     // Bits are known zero/one if known on the LHS and RHS.
3222     DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
3223     if (KnownZero == 0 && KnownOne == 0) return;
3224
3225     APInt KnownZeroRHS, KnownOneRHS;
3226     DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
3227                           KnownZeroRHS, KnownOneRHS, Depth+1);
3228     KnownZero &= KnownZeroRHS;
3229     KnownOne  &= KnownOneRHS;
3230     return;
3231   }
3232   }
3233 }
3234
3235 //===----------------------------------------------------------------------===//
3236 //                           ARM Inline Assembly Support
3237 //===----------------------------------------------------------------------===//
3238
3239 /// getConstraintType - Given a constraint letter, return the type of
3240 /// constraint it is for this target.
3241 ARMTargetLowering::ConstraintType
3242 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
3243   if (Constraint.size() == 1) {
3244     switch (Constraint[0]) {
3245     default:  break;
3246     case 'l': return C_RegisterClass;
3247     case 'w': return C_RegisterClass;
3248     }
3249   }
3250   return TargetLowering::getConstraintType(Constraint);
3251 }
3252
3253 std::pair<unsigned, const TargetRegisterClass*>
3254 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3255                                                 MVT VT) const {
3256   if (Constraint.size() == 1) {
3257     // GCC RS6000 Constraint Letters
3258     switch (Constraint[0]) {
3259     case 'l':
3260       if (Subtarget->isThumb1Only())
3261         return std::make_pair(0U, ARM::tGPRRegisterClass);
3262       else
3263         return std::make_pair(0U, ARM::GPRRegisterClass);
3264     case 'r':
3265       return std::make_pair(0U, ARM::GPRRegisterClass);
3266     case 'w':
3267       if (VT == MVT::f32)
3268         return std::make_pair(0U, ARM::SPRRegisterClass);
3269       if (VT == MVT::f64)
3270         return std::make_pair(0U, ARM::DPRRegisterClass);
3271       break;
3272     }
3273   }
3274   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3275 }
3276
3277 std::vector<unsigned> ARMTargetLowering::
3278 getRegClassForInlineAsmConstraint(const std::string &Constraint,
3279                                   MVT VT) const {
3280   if (Constraint.size() != 1)
3281     return std::vector<unsigned>();
3282
3283   switch (Constraint[0]) {      // GCC ARM Constraint Letters
3284   default: break;
3285   case 'l':
3286     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
3287                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
3288                                  0);
3289   case 'r':
3290     return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
3291                                  ARM::R4, ARM::R5, ARM::R6, ARM::R7,
3292                                  ARM::R8, ARM::R9, ARM::R10, ARM::R11,
3293                                  ARM::R12, ARM::LR, 0);
3294   case 'w':
3295     if (VT == MVT::f32)
3296       return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
3297                                    ARM::S4, ARM::S5, ARM::S6, ARM::S7,
3298                                    ARM::S8, ARM::S9, ARM::S10, ARM::S11,
3299                                    ARM::S12,ARM::S13,ARM::S14,ARM::S15,
3300                                    ARM::S16,ARM::S17,ARM::S18,ARM::S19,
3301                                    ARM::S20,ARM::S21,ARM::S22,ARM::S23,
3302                                    ARM::S24,ARM::S25,ARM::S26,ARM::S27,
3303                                    ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
3304     if (VT == MVT::f64)
3305       return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
3306                                    ARM::D4, ARM::D5, ARM::D6, ARM::D7,
3307                                    ARM::D8, ARM::D9, ARM::D10,ARM::D11,
3308                                    ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
3309       break;
3310   }
3311
3312   return std::vector<unsigned>();
3313 }
3314
3315 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
3316 /// vector.  If it is invalid, don't add anything to Ops.
3317 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3318                                                      char Constraint,
3319                                                      bool hasMemory,
3320                                                      std::vector<SDValue>&Ops,
3321                                                      SelectionDAG &DAG) const {
3322   SDValue Result(0, 0);
3323
3324   switch (Constraint) {
3325   default: break;
3326   case 'I': case 'J': case 'K': case 'L':
3327   case 'M': case 'N': case 'O':
3328     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3329     if (!C)
3330       return;
3331
3332     int64_t CVal64 = C->getSExtValue();
3333     int CVal = (int) CVal64;
3334     // None of these constraints allow values larger than 32 bits.  Check
3335     // that the value fits in an int.
3336     if (CVal != CVal64)
3337       return;
3338
3339     switch (Constraint) {
3340       case 'I':
3341         if (Subtarget->isThumb1Only()) {
3342           // This must be a constant between 0 and 255, for ADD
3343           // immediates.
3344           if (CVal >= 0 && CVal <= 255)
3345             break;
3346         } else if (Subtarget->isThumb2()) {
3347           // A constant that can be used as an immediate value in a
3348           // data-processing instruction.
3349           if (ARM_AM::getT2SOImmVal(CVal) != -1)
3350             break;
3351         } else {
3352           // A constant that can be used as an immediate value in a
3353           // data-processing instruction.
3354           if (ARM_AM::getSOImmVal(CVal) != -1)
3355             break;
3356         }
3357         return;
3358
3359       case 'J':
3360         if (Subtarget->isThumb()) {  // FIXME thumb2
3361           // This must be a constant between -255 and -1, for negated ADD
3362           // immediates. This can be used in GCC with an "n" modifier that
3363           // prints the negated value, for use with SUB instructions. It is
3364           // not useful otherwise but is implemented for compatibility.
3365           if (CVal >= -255 && CVal <= -1)
3366             break;
3367         } else {
3368           // This must be a constant between -4095 and 4095. It is not clear
3369           // what this constraint is intended for. Implemented for
3370           // compatibility with GCC.
3371           if (CVal >= -4095 && CVal <= 4095)
3372             break;
3373         }
3374         return;
3375
3376       case 'K':
3377         if (Subtarget->isThumb1Only()) {
3378           // A 32-bit value where only one byte has a nonzero value. Exclude
3379           // zero to match GCC. This constraint is used by GCC internally for
3380           // constants that can be loaded with a move/shift combination.
3381           // It is not useful otherwise but is implemented for compatibility.
3382           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
3383             break;
3384         } else if (Subtarget->isThumb2()) {
3385           // A constant whose bitwise inverse can be used as an immediate
3386           // value in a data-processing instruction. This can be used in GCC
3387           // with a "B" modifier that prints the inverted value, for use with
3388           // BIC and MVN instructions. It is not useful otherwise but is
3389           // implemented for compatibility.
3390           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
3391             break;
3392         } else {
3393           // A constant whose bitwise inverse can be used as an immediate
3394           // value in a data-processing instruction. This can be used in GCC
3395           // with a "B" modifier that prints the inverted value, for use with
3396           // BIC and MVN instructions. It is not useful otherwise but is
3397           // implemented for compatibility.
3398           if (ARM_AM::getSOImmVal(~CVal) != -1)
3399             break;
3400         }
3401         return;
3402
3403       case 'L':
3404         if (Subtarget->isThumb1Only()) {
3405           // This must be a constant between -7 and 7,
3406           // for 3-operand ADD/SUB immediate instructions.
3407           if (CVal >= -7 && CVal < 7)
3408             break;
3409         } else if (Subtarget->isThumb2()) {
3410           // A constant whose negation can be used as an immediate value in a
3411           // data-processing instruction. This can be used in GCC with an "n"
3412           // modifier that prints the negated value, for use with SUB
3413           // instructions. It is not useful otherwise but is implemented for
3414           // compatibility.
3415           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
3416             break;
3417         } else {
3418           // A constant whose negation can be used as an immediate value in a
3419           // data-processing instruction. This can be used in GCC with an "n"
3420           // modifier that prints the negated value, for use with SUB
3421           // instructions. It is not useful otherwise but is implemented for
3422           // compatibility.
3423           if (ARM_AM::getSOImmVal(-CVal) != -1)
3424             break;
3425         }
3426         return;
3427
3428       case 'M':
3429         if (Subtarget->isThumb()) { // FIXME thumb2
3430           // This must be a multiple of 4 between 0 and 1020, for
3431           // ADD sp + immediate.
3432           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
3433             break;
3434         } else {
3435           // A power of two or a constant between 0 and 32.  This is used in
3436           // GCC for the shift amount on shifted register operands, but it is
3437           // useful in general for any shift amounts.
3438           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
3439             break;
3440         }
3441         return;
3442
3443       case 'N':
3444         if (Subtarget->isThumb()) {  // FIXME thumb2
3445           // This must be a constant between 0 and 31, for shift amounts.
3446           if (CVal >= 0 && CVal <= 31)
3447             break;
3448         }
3449         return;
3450
3451       case 'O':
3452         if (Subtarget->isThumb()) {  // FIXME thumb2
3453           // This must be a multiple of 4 between -508 and 508, for
3454           // ADD/SUB sp = sp + immediate.
3455           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
3456             break;
3457         }
3458         return;
3459     }
3460     Result = DAG.getTargetConstant(CVal, Op.getValueType());
3461     break;
3462   }
3463
3464   if (Result.getNode()) {
3465     Ops.push_back(Result);
3466     return;
3467   }
3468   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
3469                                                       Ops, DAG);
3470 }