lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/APInt.h"
  19 #include "llvm/ADT/VectorExtras.h"
  20 #include "llvm/CallingConv.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/MachineRegisterInfo.h"
  26 #include "llvm/CodeGen/SelectionDAG.h"
  27 #include "llvm/Constants.h"
  28 #include "llvm/Function.h"
  29 #include "llvm/Intrinsics.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/MathExtras.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 #include <map>
  35
  36 using namespace llvm;
  37
  38 // Used in getTargetNodeName() below
  39 namespace {
  40   std::map<unsigned, const char *> node_names;
  41
  42   //! MVT mapping to useful data for Cell SPU
  43   struct valtype_map_s {
  44     const MVT   valtype;
  45     const int   prefslot_byte;
  46   };
  47
  48   const valtype_map_s valtype_map[] = {
  49     { MVT::i1,   3 },
  50     { MVT::i8,   3 },
  51     { MVT::i16,  2 },
  52     { MVT::i32,  0 },
  53     { MVT::f32,  0 },
  54     { MVT::i64,  0 },
  55     { MVT::f64,  0 },
  56     { MVT::i128, 0 }
  57   };
  58
  59   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  60
  61   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  62     const valtype_map_s *retval = 0;
  63
  64     for (size_t i = 0; i < n_valtype_map; ++i) {
  65       if (valtype_map[i].valtype == VT) {
  66         retval = valtype_map + i;
  67         break;
  68       }
  69     }
  70
  71 #ifndef NDEBUG
  72     if (retval == 0) {
  73       cerr << "getValueTypeMapEntry returns NULL for "
  74            << VT.getMVTString()
  75            << "\n";
  76       abort();
  77     }
  78 #endif
  79
  80     return retval;
  81   }
  82
  83   //! Expand a library call into an actual call DAG node
  84   /*!
  85    \note
  86    This code is taken from SelectionDAGLegalize, since it is not exposed as
  87    part of the LLVM SelectionDAG API.
  88    */
  89
  90   SDValue
  91   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  92                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  93     // The input chain to this libcall is the entry node of the function.
  94     // Legalizing the call will automatically add the previous call to the
  95     // dependence.
  96     SDValue InChain = DAG.getEntryNode();
  97
  98     TargetLowering::ArgListTy Args;
  99     TargetLowering::ArgListEntry Entry;
 100     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 101       MVT ArgVT = Op.getOperand(i).getValueType();
 102       const Type *ArgTy = ArgVT.getTypeForMVT();
 103       Entry.Node = Op.getOperand(i);
 104       Entry.Ty = ArgTy;
 105       Entry.isSExt = isSigned;
 106       Entry.isZExt = !isSigned;
 107       Args.push_back(Entry);
 108     }
 109     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 110                                            TLI.getPointerTy());
 111
 112     // Splice the libcall in wherever FindInputOutputChains tells us to.
 113     const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
 114     std::pair<SDValue, SDValue> CallInfo =
 115             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 116                             CallingConv::C, false, Callee, Args, DAG,
 117                             Op.getNode()->getDebugLoc());
 118
 119     return CallInfo.first;
 120   }
 121 }
 122
 123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 124   : TargetLowering(TM),
 125     SPUTM(TM)
 126 {
 127   // Fold away setcc operations if possible.
 128   setPow2DivIsCheap();
 129
 130   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 131   setUseUnderscoreSetJmp(true);
 132   setUseUnderscoreLongJmp(true);
 133
 134   // Set RTLIB libcall names as used by SPU:
 135   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 136
 137   // Set up the SPU's register classes:
 138   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 139   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 140   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 141   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 142   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 143   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 144   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 145
 146   // SPU has no sign or zero extended loads for i1, i8, i16:
 147   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 148   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 149   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 150
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 153
 154   // SPU constant load actions are custom lowered:
 155   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 156   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 157
 158   // SPU's loads and stores have to be custom lowered:
 159   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 160        ++sctype) {
 161     MVT VT = (MVT::SimpleValueType)sctype;
 162
 163     setOperationAction(ISD::LOAD,   VT, Custom);
 164     setOperationAction(ISD::STORE,  VT, Custom);
 165     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 166     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 167     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 168
 169     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 170       MVT StoreVT = (MVT::SimpleValueType) stype;
 171       setTruncStoreAction(VT, StoreVT, Expand);
 172     }
 173   }
 174
 175   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 176        ++sctype) {
 177     MVT VT = (MVT::SimpleValueType) sctype;
 178
 179     setOperationAction(ISD::LOAD,   VT, Custom);
 180     setOperationAction(ISD::STORE,  VT, Custom);
 181
 182     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 183       MVT StoreVT = (MVT::SimpleValueType) stype;
 184       setTruncStoreAction(VT, StoreVT, Expand);
 185     }
 186   }
 187
 188   // Expand the jumptable branches
 189   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 190   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 191
 192   // Custom lower SELECT_CC for most cases, but expand by default
 193   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 194   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 195   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 196   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 197   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 198
 199   // SPU has no intrinsics for these particular operations:
 200   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 201
 202   // SPU has no SREM/UREM instructions
 203   setOperationAction(ISD::SREM, MVT::i32, Expand);
 204   setOperationAction(ISD::UREM, MVT::i32, Expand);
 205   setOperationAction(ISD::SREM, MVT::i64, Expand);
 206   setOperationAction(ISD::UREM, MVT::i64, Expand);
 207
 208   // We don't support sin/cos/sqrt/fmod
 209   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 210   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 211   setOperationAction(ISD::FREM , MVT::f64, Expand);
 212   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 213   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 214   setOperationAction(ISD::FREM , MVT::f32, Expand);
 215
 216   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 217   // for f32!)
 218   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 219   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 220
 221   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 222   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 223
 224   // SPU can do rotate right and left, so legalize it... but customize for i8
 225   // because instructions don't exist.
 226
 227   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 228   //        .td files.
 229   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 230   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 231   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 232
 233   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 234   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 235   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 236
 237   // SPU has no native version of shift left/right for i8
 238   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 239   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 240   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 241
 242   // Make these operations legal and handle them during instruction selection:
 243   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 244   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 245   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 246
 247   // Custom lower i8, i32 and i64 multiplications
 248   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 249   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 250   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 251
 252   // Need to custom handle (some) common i8, i64 math ops
 253   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 254   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 255   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 256   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 257
 258   // SPU does not have BSWAP. It does have i32 support CTLZ.
 259   // CTPOP has to be custom lowered.
 260   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 261   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 262
 263   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 264   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 265   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 266   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 267
 268   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 269   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 270
 271   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 272
 273   // SPU has a version of select that implements (a&~c)|(b&c), just like
 274   // select ought to work:
 275   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 276   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 277   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 278   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 279
 280   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 281   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 282   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 283   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 284   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 285
 286   // Custom lower i128 -> i64 truncates
 287   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 288
 289   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 290   // to expand to a libcall, hence the custom lowering:
 291   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 292   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 293
 294   // FDIV on SPU requires custom lowering
 295   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 296
 297   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 298   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 299   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 300   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 301   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 302   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 303   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 304   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 305   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 306
 307   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 308   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 309   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 310   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 311
 312   // We cannot sextinreg(i1).  Expand to shifts.
 313   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 314
 315   // Support label based line numbers.
 316   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 317   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 318
 319   // We want to legalize GlobalAddress and ConstantPool nodes into the
 320   // appropriate instructions to materialize the address.
 321   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 322        ++sctype) {
 323     MVT VT = (MVT::SimpleValueType)sctype;
 324
 325     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 326     setOperationAction(ISD::ConstantPool,   VT, Custom);
 327     setOperationAction(ISD::JumpTable,      VT, Custom);
 328   }
 329
 330   // RET must be custom lowered, to meet ABI requirements
 331   setOperationAction(ISD::RET,           MVT::Other, Custom);
 332
 333   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 334   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 335
 336   // Use the default implementation.
 337   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 338   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 339   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 340   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 341   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 342   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 343   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 344
 345   // Cell SPU has instructions for converting between i64 and fp.
 346   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 347   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 348
 349   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 350   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 351
 352   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 353   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 354
 355   // First set operation action for all vector types to expand. Then we
 356   // will selectively turn on ones that can be effectively codegen'd.
 357   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 358   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 359   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 360   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 361   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 362   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 363
 364   // "Odd size" vector classes that we're willing to support:
 365   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 366
 367   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 368        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 369     MVT VT = (MVT::SimpleValueType)i;
 370
 371     // add/sub are legal for all supported vector VT's.
 372     setOperationAction(ISD::ADD,     VT, Legal);
 373     setOperationAction(ISD::SUB,     VT, Legal);
 374     // mul has to be custom lowered.
 375     setOperationAction(ISD::MUL,     VT, Legal);
 376
 377     setOperationAction(ISD::AND,     VT, Legal);
 378     setOperationAction(ISD::OR,      VT, Legal);
 379     setOperationAction(ISD::XOR,     VT, Legal);
 380     setOperationAction(ISD::LOAD,    VT, Legal);
 381     setOperationAction(ISD::SELECT,  VT, Legal);
 382     setOperationAction(ISD::STORE,   VT, Legal);
 383
 384     // These operations need to be expanded:
 385     setOperationAction(ISD::SDIV,    VT, Expand);
 386     setOperationAction(ISD::SREM,    VT, Expand);
 387     setOperationAction(ISD::UDIV,    VT, Expand);
 388     setOperationAction(ISD::UREM,    VT, Expand);
 389
 390     // Custom lower build_vector, constant pool spills, insert and
 391     // extract vector elements:
 392     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 393     setOperationAction(ISD::ConstantPool, VT, Custom);
 394     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 395     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 396     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 397     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 398   }
 399
 400   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 401   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 402   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 403   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 404
 405   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 406
 407   setShiftAmountType(MVT::i32);
 408   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 409
 410   setStackPointerRegisterToSaveRestore(SPU::R1);
 411
 412   // We have target-specific dag combine patterns for the following nodes:
 413   setTargetDAGCombine(ISD::ADD);
 414   setTargetDAGCombine(ISD::ZERO_EXTEND);
 415   setTargetDAGCombine(ISD::SIGN_EXTEND);
 416   setTargetDAGCombine(ISD::ANY_EXTEND);
 417
 418   computeRegisterProperties();
 419
 420   // Set pre-RA register scheduler default to BURR, which produces slightly
 421   // better code than the default (could also be TDRR, but TargetLowering.h
 422   // needs a mod to support that model):
 423   setSchedulingPreference(SchedulingForRegPressure);
 424 }
 425
 426 const char *
 427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 428 {
 429   if (node_names.empty()) {
 430     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 431     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 432     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 433     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 434     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 435     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 436     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 437     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 438     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 439     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 440     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 441     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 442     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 443     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 444     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 445     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 446     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 447     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 448     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 449     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 450     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 451     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 452             "SPUISD::ROTBYTES_LEFT_BITS";
 453     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 454     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 455     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 456     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 457     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 458   }
 459
 460   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 461
 462   return ((i != node_names.end()) ? i->second : 0);
 463 }
 464
 465 //===----------------------------------------------------------------------===//
 466 // Return the Cell SPU's SETCC result type
 467 //===----------------------------------------------------------------------===//
 468
 469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
 470   // i16 and i32 are valid SETCC result types
 471   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
 472 }
 473
 474 //===----------------------------------------------------------------------===//
 475 // Calling convention code:
 476 //===----------------------------------------------------------------------===//
 477
 478 #include "SPUGenCallingConv.inc"
 479
 480 //===----------------------------------------------------------------------===//
 481 //  LowerOperation implementation
 482 //===----------------------------------------------------------------------===//
 483
 484 /// Custom lower loads for CellSPU
 485 /*!
 486  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 487  within a 16-byte block, we have to rotate to extract the requested element.
 488
 489  For extending loads, we also want to ensure that the following sequence is
 490  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 491
 492 \verbatim
 493 %1  v16i8,ch = load
 494 %2  v16i8,ch = rotate %1
 495 %3  v4f8, ch = bitconvert %2
 496 %4  f32      = vec2perfslot %3
 497 %5  f64      = fp_extend %4
 498 \endverbatim
 499 */
 500 static SDValue
 501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 502   LoadSDNode *LN = cast<LoadSDNode>(Op);
 503   SDValue the_chain = LN->getChain();
 504   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 505   MVT InVT = LN->getMemoryVT();
 506   MVT OutVT = Op.getValueType();
 507   ISD::LoadExtType ExtType = LN->getExtensionType();
 508   unsigned alignment = LN->getAlignment();
 509   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 510   DebugLoc dl = Op.getDebugLoc();
 511
 512   switch (LN->getAddressingMode()) {
 513   case ISD::UNINDEXED: {
 514     SDValue result;
 515     SDValue basePtr = LN->getBasePtr();
 516     SDValue rotate;
 517
 518     if (alignment == 16) {
 519       ConstantSDNode *CN;
 520
 521       // Special cases for a known aligned load to simplify the base pointer
 522       // and the rotation amount:
 523       if (basePtr.getOpcode() == ISD::ADD
 524           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 525         // Known offset into basePtr
 526         int64_t offset = CN->getSExtValue();
 527         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 528
 529         if (rotamt < 0)
 530           rotamt += 16;
 531
 532         rotate = DAG.getConstant(rotamt, MVT::i16);
 533
 534         // Simplify the base pointer for this case:
 535         basePtr = basePtr.getOperand(0);
 536         if ((offset & ~0xf) > 0) {
 537           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 538                                 basePtr,
 539                                 DAG.getConstant((offset & ~0xf), PtrVT));
 540         }
 541       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 542                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 543                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 544                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 545         // Plain aligned a-form address: rotate into preferred slot
 546         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 547         int64_t rotamt = -vtm->prefslot_byte;
 548         if (rotamt < 0)
 549           rotamt += 16;
 550         rotate = DAG.getConstant(rotamt, MVT::i16);
 551       } else {
 552         // Offset the rotate amount by the basePtr and the preferred slot
 553         // byte offset
 554         int64_t rotamt = -vtm->prefslot_byte;
 555         if (rotamt < 0)
 556           rotamt += 16;
 557         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 558                              basePtr,
 559                              DAG.getConstant(rotamt, PtrVT));
 560       }
 561     } else {
 562       // Unaligned load: must be more pessimistic about addressing modes:
 563       if (basePtr.getOpcode() == ISD::ADD) {
 564         MachineFunction &MF = DAG.getMachineFunction();
 565         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 566         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 567         SDValue Flag;
 568
 569         SDValue Op0 = basePtr.getOperand(0);
 570         SDValue Op1 = basePtr.getOperand(1);
 571
 572         if (isa<ConstantSDNode>(Op1)) {
 573           // Convert the (add <ptr>, <const>) to an indirect address contained
 574           // in a register. Note that this is done because we need to avoid
 575           // creating a 0(reg) d-form address due to the SPU's block loads.
 576           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 577           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 578           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 579         } else {
 580           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 581           // will likely be lowered as a reg(reg) x-form address.
 582           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 583         }
 584       } else {
 585         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 586                               basePtr,
 587                               DAG.getConstant(0, PtrVT));
 588       }
 589
 590       // Offset the rotate amount by the basePtr and the preferred slot
 591       // byte offset
 592       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 593                            basePtr,
 594                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 595     }
 596
 597     // Re-emit as a v16i8 vector load
 598     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 599                          LN->getSrcValue(), LN->getSrcValueOffset(),
 600                          LN->isVolatile(), 16);
 601
 602     // Update the chain
 603     the_chain = result.getValue(1);
 604
 605     // Rotate into the preferred slot:
 606     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 607                          result.getValue(0), rotate);
 608
 609     // Convert the loaded v16i8 vector to the appropriate vector type
 610     // specified by the operand:
 611     MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
 612     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 613                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 614
 615     // Handle extending loads by extending the scalar result:
 616     if (ExtType == ISD::SEXTLOAD) {
 617       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 618     } else if (ExtType == ISD::ZEXTLOAD) {
 619       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 620     } else if (ExtType == ISD::EXTLOAD) {
 621       unsigned NewOpc = ISD::ANY_EXTEND;
 622
 623       if (OutVT.isFloatingPoint())
 624         NewOpc = ISD::FP_EXTEND;
 625
 626       result = DAG.getNode(NewOpc, dl, OutVT, result);
 627     }
 628
 629     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 630     SDValue retops[2] = {
 631       result,
 632       the_chain
 633     };
 634
 635     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 636                          retops, sizeof(retops) / sizeof(retops[0]));
 637     return result;
 638   }
 639   case ISD::PRE_INC:
 640   case ISD::PRE_DEC:
 641   case ISD::POST_INC:
 642   case ISD::POST_DEC:
 643   case ISD::LAST_INDEXED_MODE:
 644     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 645             "UNINDEXED\n";
 646     cerr << (unsigned) LN->getAddressingMode() << "\n";
 647     abort();
 648     /*NOTREACHED*/
 649   }
 650
 651   return SDValue();
 652 }
 653
 654 /// Custom lower stores for CellSPU
 655 /*!
 656  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 657  within a 16-byte block, we have to generate a shuffle to insert the
 658  requested element into its place, then store the resulting block.
 659  */
 660 static SDValue
 661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 662   StoreSDNode *SN = cast<StoreSDNode>(Op);
 663   SDValue Value = SN->getValue();
 664   MVT VT = Value.getValueType();
 665   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 666   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 667   DebugLoc dl = Op.getDebugLoc();
 668   unsigned alignment = SN->getAlignment();
 669
 670   switch (SN->getAddressingMode()) {
 671   case ISD::UNINDEXED: {
 672     // The vector type we really want to load from the 16-byte chunk.
 673     MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
 674         stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 675
 676     SDValue alignLoadVec;
 677     SDValue basePtr = SN->getBasePtr();
 678     SDValue the_chain = SN->getChain();
 679     SDValue insertEltOffs;
 680
 681     if (alignment == 16) {
 682       ConstantSDNode *CN;
 683
 684       // Special cases for a known aligned load to simplify the base pointer
 685       // and insertion byte:
 686       if (basePtr.getOpcode() == ISD::ADD
 687           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 688         // Known offset into basePtr
 689         int64_t offset = CN->getSExtValue();
 690
 691         // Simplify the base pointer for this case:
 692         basePtr = basePtr.getOperand(0);
 693         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 694                                     basePtr,
 695                                     DAG.getConstant((offset & 0xf), PtrVT));
 696
 697         if ((offset & ~0xf) > 0) {
 698           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 699                                 basePtr,
 700                                 DAG.getConstant((offset & ~0xf), PtrVT));
 701         }
 702       } else {
 703         // Otherwise, assume it's at byte 0 of basePtr
 704         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 705                                     basePtr,
 706                                     DAG.getConstant(0, PtrVT));
 707       }
 708     } else {
 709       // Unaligned load: must be more pessimistic about addressing modes:
 710       if (basePtr.getOpcode() == ISD::ADD) {
 711         MachineFunction &MF = DAG.getMachineFunction();
 712         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 713         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 714         SDValue Flag;
 715
 716         SDValue Op0 = basePtr.getOperand(0);
 717         SDValue Op1 = basePtr.getOperand(1);
 718
 719         if (isa<ConstantSDNode>(Op1)) {
 720           // Convert the (add <ptr>, <const>) to an indirect address contained
 721           // in a register. Note that this is done because we need to avoid
 722           // creating a 0(reg) d-form address due to the SPU's block loads.
 723           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 724           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 725           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 726         } else {
 727           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 728           // will likely be lowered as a reg(reg) x-form address.
 729           basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
 730         }
 731       } else {
 732         basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
 733                               basePtr,
 734                               DAG.getConstant(0, PtrVT));
 735       }
 736
 737       // Insertion point is solely determined by basePtr's contents
 738       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 739                                   basePtr,
 740                                   DAG.getConstant(0, PtrVT));
 741     }
 742
 743     // Re-emit as a v16i8 vector load
 744     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 745                                SN->getSrcValue(), SN->getSrcValueOffset(),
 746                                SN->isVolatile(), 16);
 747
 748     // Update the chain
 749     the_chain = alignLoadVec.getValue(1);
 750
 751     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 752     SDValue theValue = SN->getValue();
 753     SDValue result;
 754
 755     if (StVT != VT
 756         && (theValue.getOpcode() == ISD::AssertZext
 757             || theValue.getOpcode() == ISD::AssertSext)) {
 758       // Drill down and get the value for zero- and sign-extended
 759       // quantities
 760       theValue = theValue.getOperand(0);
 761     }
 762
 763     // If the base pointer is already a D-form address, then just create
 764     // a new D-form address with a slot offset and the orignal base pointer.
 765     // Otherwise generate a D-form address with the slot offset relative
 766     // to the stack pointer, which is always aligned.
 767 #if !defined(NDEBUG)
 768       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 769         cerr << "CellSPU LowerSTORE: basePtr = ";
 770         basePtr.getNode()->dump(&DAG);
 771         cerr << "\n";
 772       }
 773 #endif
 774
 775     SDValue insertEltOp =
 776             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 777     SDValue vectorizeOp =
 778             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 779
 780     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 781                          vectorizeOp, alignLoadVec,
 782                          DAG.getNode(ISD::BIT_CONVERT, dl,
 783                                      MVT::v4i32, insertEltOp));
 784
 785     result = DAG.getStore(the_chain, dl, result, basePtr,
 786                           LN->getSrcValue(), LN->getSrcValueOffset(),
 787                           LN->isVolatile(), LN->getAlignment());
 788
 789 #if 0 && !defined(NDEBUG)
 790     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 791       const SDValue &currentRoot = DAG.getRoot();
 792
 793       DAG.setRoot(result);
 794       cerr << "------- CellSPU:LowerStore result:\n";
 795       DAG.dump();
 796       cerr << "-------\n";
 797       DAG.setRoot(currentRoot);
 798     }
 799 #endif
 800
 801     return result;
 802     /*UNREACHED*/
 803   }
 804   case ISD::PRE_INC:
 805   case ISD::PRE_DEC:
 806   case ISD::POST_INC:
 807   case ISD::POST_DEC:
 808   case ISD::LAST_INDEXED_MODE:
 809     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 810             "UNINDEXED\n";
 811     cerr << (unsigned) SN->getAddressingMode() << "\n";
 812     abort();
 813     /*NOTREACHED*/
 814   }
 815
 816   return SDValue();
 817 }
 818
 819 //! Generate the address of a constant pool entry.
 820 SDValue
 821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 822   MVT PtrVT = Op.getValueType();
 823   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 824   Constant *C = CP->getConstVal();
 825   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 826   SDValue Zero = DAG.getConstant(0, PtrVT);
 827   const TargetMachine &TM = DAG.getTarget();
 828
 829   if (TM.getRelocationModel() == Reloc::Static) {
 830     if (!ST->usingLargeMem()) {
 831       // Just return the SDValue with the constant pool address in it.
 832       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 833     } else {
 834       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 835       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 836       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 837     }
 838   }
 839
 840   assert(0 &&
 841          "LowerConstantPool: Relocation model other than static"
 842          " not supported.");
 843   return SDValue();
 844 }
 845
 846 //! Alternate entry point for generating the address of a constant pool entry
 847 SDValue
 848 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 849   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 850 }
 851
 852 static SDValue
 853 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 854   MVT PtrVT = Op.getValueType();
 855   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 856   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 857   SDValue Zero = DAG.getConstant(0, PtrVT);
 858   const TargetMachine &TM = DAG.getTarget();
 859
 860   if (TM.getRelocationModel() == Reloc::Static) {
 861     if (!ST->usingLargeMem()) {
 862       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 863     } else {
 864       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 865       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 866       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 867     }
 868   }
 869
 870   assert(0 &&
 871          "LowerJumpTable: Relocation model other than static not supported.");
 872   return SDValue();
 873 }
 874
 875 static SDValue
 876 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 877   MVT PtrVT = Op.getValueType();
 878   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 879   GlobalValue *GV = GSDN->getGlobal();
 880   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 881   const TargetMachine &TM = DAG.getTarget();
 882   SDValue Zero = DAG.getConstant(0, PtrVT);
 883
 884   if (TM.getRelocationModel() == Reloc::Static) {
 885     if (!ST->usingLargeMem()) {
 886       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 887     } else {
 888       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 889       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 890       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 891     }
 892   } else {
 893     cerr << "LowerGlobalAddress: Relocation model other than static not "
 894          << "supported.\n";
 895     abort();
 896     /*NOTREACHED*/
 897   }
 898
 899   return SDValue();
 900 }
 901
 902 //! Custom lower double precision floating point constants
 903 static SDValue
 904 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 905   MVT VT = Op.getValueType();
 906
 907   if (VT == MVT::f64) {
 908     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 909
 910     assert((FP != 0) &&
 911            "LowerConstantFP: Node is not ConstantFPSDNode");
 912
 913     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 914     SDValue T = DAG.getConstant(dbits, MVT::i64);
 915     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
 916     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
 917                        DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
 918   }
 919
 920   return SDValue();
 921 }
 922
 923 static SDValue
 924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 925 {
 926   MachineFunction &MF = DAG.getMachineFunction();
 927   MachineFrameInfo *MFI = MF.getFrameInfo();
 928   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 929   SmallVector<SDValue, 48> ArgValues;
 930   SDValue Root = Op.getOperand(0);
 931   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 932   DebugLoc dl = Op.getDebugLoc();
 933
 934   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 935   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 936
 937   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 938   unsigned ArgRegIdx = 0;
 939   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 940
 941   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 942
 943   // Add DAG nodes to load the arguments or copy them out of registers.
 944   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 945        ArgNo != e; ++ArgNo) {
 946     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 947     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 948     SDValue ArgVal;
 949
 950     if (ArgRegIdx < NumArgRegs) {
 951       const TargetRegisterClass *ArgRegClass;
 952
 953       switch (ObjectVT.getSimpleVT()) {
 954       default: {
 955         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 956              << ObjectVT.getMVTString()
 957              << "\n";
 958         abort();
 959       }
 960       case MVT::i8:
 961         ArgRegClass = &SPU::R8CRegClass;
 962         break;
 963       case MVT::i16:
 964         ArgRegClass = &SPU::R16CRegClass;
 965         break;
 966       case MVT::i32:
 967         ArgRegClass = &SPU::R32CRegClass;
 968         break;
 969       case MVT::i64:
 970         ArgRegClass = &SPU::R64CRegClass;
 971         break;
 972       case MVT::i128:
 973         ArgRegClass = &SPU::GPRCRegClass;
 974         break;
 975       case MVT::f32:
 976         ArgRegClass = &SPU::R32FPRegClass;
 977         break;
 978       case MVT::f64:
 979         ArgRegClass = &SPU::R64FPRegClass;
 980         break;
 981       case MVT::v2f64:
 982       case MVT::v4f32:
 983       case MVT::v2i64:
 984       case MVT::v4i32:
 985       case MVT::v8i16:
 986       case MVT::v16i8:
 987         ArgRegClass = &SPU::VECREGRegClass;
 988         break;
 989       }
 990
 991       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
 992       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 993       ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
 994       ++ArgRegIdx;
 995     } else {
 996       // We need to load the argument to a virtual register if we determined
 997       // above that we ran out of physical registers of the appropriate type
 998       // or we're forced to do vararg
 999       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1000       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1001       ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1002       ArgOffset += StackSlotSize;
1003     }
1004
1005     ArgValues.push_back(ArgVal);
1006     // Update the chain
1007     Root = ArgVal.getOperand(0);
1008   }
1009
1010   // vararg handling:
1011   if (isVarArg) {
1012     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1013     // We will spill (79-3)+1 registers to the stack
1014     SmallVector<SDValue, 79-3+1> MemOps;
1015
1016     // Create the frame slot
1017
1018     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1019       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1020       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1021       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1022       SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1023       Root = Store.getOperand(0);
1024       MemOps.push_back(Store);
1025
1026       // Increment address by stack slot size for the next stored argument
1027       ArgOffset += StackSlotSize;
1028     }
1029     if (!MemOps.empty())
1030       Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1031                          &MemOps[0], MemOps.size());
1032   }
1033
1034   ArgValues.push_back(Root);
1035
1036   // Return the new list of results.
1037   return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1038                      &ArgValues[0], ArgValues.size());
1039 }
1040
1041 /// isLSAAddress - Return the immediate to use if the specified
1042 /// value is representable as a LSA address.
1043 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1044   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1045   if (!C) return 0;
1046
1047   int Addr = C->getZExtValue();
1048   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1049       (Addr << 14 >> 14) != Addr)
1050     return 0;  // Top 14 bits have to be sext of immediate.
1051
1052   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1053 }
1054
1055 static SDValue
1056 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1057   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1058   SDValue Chain = TheCall->getChain();
1059   SDValue Callee    = TheCall->getCallee();
1060   unsigned NumOps     = TheCall->getNumArgs();
1061   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1062   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1063   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1064   DebugLoc dl = TheCall->getDebugLoc();
1065
1066   // Handy pointer type
1067   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1068
1069   // Accumulate how many bytes are to be pushed on the stack, including the
1070   // linkage area, and parameter passing area.  According to the SPU ABI,
1071   // we minimally need space for [LR] and [SP]
1072   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1073
1074   // Set up a copy of the stack pointer for use loading and storing any
1075   // arguments that may not fit in the registers available for argument
1076   // passing.
1077   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1078
1079   // Figure out which arguments are going to go in registers, and which in
1080   // memory.
1081   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1082   unsigned ArgRegIdx = 0;
1083
1084   // Keep track of registers passing arguments
1085   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1086   // And the arguments passed on the stack
1087   SmallVector<SDValue, 8> MemOpChains;
1088
1089   for (unsigned i = 0; i != NumOps; ++i) {
1090     SDValue Arg = TheCall->getArg(i);
1091
1092     // PtrOff will be used to store the current argument to the stack if a
1093     // register cannot be found for it.
1094     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1095     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1096
1097     switch (Arg.getValueType().getSimpleVT()) {
1098     default: assert(0 && "Unexpected ValueType for argument!");
1099     case MVT::i8:
1100     case MVT::i16:
1101     case MVT::i32:
1102     case MVT::i64:
1103     case MVT::i128:
1104       if (ArgRegIdx != NumArgRegs) {
1105         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1106       } else {
1107         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1108         ArgOffset += StackSlotSize;
1109       }
1110       break;
1111     case MVT::f32:
1112     case MVT::f64:
1113       if (ArgRegIdx != NumArgRegs) {
1114         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115       } else {
1116         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1117         ArgOffset += StackSlotSize;
1118       }
1119       break;
1120     case MVT::v2i64:
1121     case MVT::v2f64:
1122     case MVT::v4f32:
1123     case MVT::v4i32:
1124     case MVT::v8i16:
1125     case MVT::v16i8:
1126       if (ArgRegIdx != NumArgRegs) {
1127         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128       } else {
1129         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1130         ArgOffset += StackSlotSize;
1131       }
1132       break;
1133     }
1134   }
1135
1136   // Update number of stack bytes actually used, insert a call sequence start
1137   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1138   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1139                                                             true));
1140
1141   if (!MemOpChains.empty()) {
1142     // Adjust the stack pointer for the stack arguments.
1143     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1144                         &MemOpChains[0], MemOpChains.size());
1145   }
1146
1147   // Build a sequence of copy-to-reg nodes chained together with token chain
1148   // and flag operands which copy the outgoing args into the appropriate regs.
1149   SDValue InFlag;
1150   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1151     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1152                              RegsToPass[i].second, InFlag);
1153     InFlag = Chain.getValue(1);
1154   }
1155
1156   SmallVector<SDValue, 8> Ops;
1157   unsigned CallOpc = SPUISD::CALL;
1158
1159   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1160   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1161   // node so that legalize doesn't hack it.
1162   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1163     GlobalValue *GV = G->getGlobal();
1164     MVT CalleeVT = Callee.getValueType();
1165     SDValue Zero = DAG.getConstant(0, PtrVT);
1166     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1167
1168     if (!ST->usingLargeMem()) {
1169       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1170       // style calls, otherwise, external symbols are BRASL calls. This assumes
1171       // that declared/defined symbols are in the same compilation unit and can
1172       // be reached through PC-relative jumps.
1173       //
1174       // NOTE:
1175       // This may be an unsafe assumption for JIT and really large compilation
1176       // units.
1177       if (GV->isDeclaration()) {
1178         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1179       } else {
1180         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1181       }
1182     } else {
1183       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1184       // address pairs:
1185       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1186     }
1187   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1188     MVT CalleeVT = Callee.getValueType();
1189     SDValue Zero = DAG.getConstant(0, PtrVT);
1190     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1191         Callee.getValueType());
1192
1193     if (!ST->usingLargeMem()) {
1194       Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1195     } else {
1196       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1197     }
1198   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199     // If this is an absolute destination address that appears to be a legal
1200     // local store address, use the munged value.
1201     Callee = SDValue(Dest, 0);
1202   }
1203
1204   Ops.push_back(Chain);
1205   Ops.push_back(Callee);
1206
1207   // Add argument registers to the end of the list so that they are known live
1208   // into the call.
1209   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211                                   RegsToPass[i].second.getValueType()));
1212
1213   if (InFlag.getNode())
1214     Ops.push_back(InFlag);
1215   // Returns a chain and a flag for retval copy to use.
1216   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1217                       &Ops[0], Ops.size());
1218   InFlag = Chain.getValue(1);
1219
1220   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221                              DAG.getIntPtrConstant(0, true), InFlag);
1222   if (TheCall->getValueType(0) != MVT::Other)
1223     InFlag = Chain.getValue(1);
1224
1225   SDValue ResultVals[3];
1226   unsigned NumResults = 0;
1227
1228   // If the call has results, copy the values out of the ret val registers.
1229   switch (TheCall->getValueType(0).getSimpleVT()) {
1230   default: assert(0 && "Unexpected ret value!");
1231   case MVT::Other: break;
1232   case MVT::i32:
1233     if (TheCall->getValueType(1) == MVT::i32) {
1234       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1235                                  MVT::i32, InFlag).getValue(1);
1236       ResultVals[0] = Chain.getValue(0);
1237       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1238                                  Chain.getValue(2)).getValue(1);
1239       ResultVals[1] = Chain.getValue(0);
1240       NumResults = 2;
1241     } else {
1242       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1243                                  InFlag).getValue(1);
1244       ResultVals[0] = Chain.getValue(0);
1245       NumResults = 1;
1246     }
1247     break;
1248   case MVT::i64:
1249     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1250                                InFlag).getValue(1);
1251     ResultVals[0] = Chain.getValue(0);
1252     NumResults = 1;
1253     break;
1254   case MVT::i128:
1255     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1256                                InFlag).getValue(1);
1257     ResultVals[0] = Chain.getValue(0);
1258     NumResults = 1;
1259     break;
1260   case MVT::f32:
1261   case MVT::f64:
1262     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1263                                InFlag).getValue(1);
1264     ResultVals[0] = Chain.getValue(0);
1265     NumResults = 1;
1266     break;
1267   case MVT::v2f64:
1268   case MVT::v2i64:
1269   case MVT::v4f32:
1270   case MVT::v4i32:
1271   case MVT::v8i16:
1272   case MVT::v16i8:
1273     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1274                                    InFlag).getValue(1);
1275     ResultVals[0] = Chain.getValue(0);
1276     NumResults = 1;
1277     break;
1278   }
1279
1280   // If the function returns void, just return the chain.
1281   if (NumResults == 0)
1282     return Chain;
1283
1284   // Otherwise, merge everything together with a MERGE_VALUES node.
1285   ResultVals[NumResults++] = Chain;
1286   SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1287   return Res.getValue(Op.getResNo());
1288 }
1289
1290 static SDValue
1291 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1292   SmallVector<CCValAssign, 16> RVLocs;
1293   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1294   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1295   DebugLoc dl = Op.getDebugLoc();
1296   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1297   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1298
1299   // If this is the first return lowered for this function, add the regs to the
1300   // liveout set for the function.
1301   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1302     for (unsigned i = 0; i != RVLocs.size(); ++i)
1303       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1304   }
1305
1306   SDValue Chain = Op.getOperand(0);
1307   SDValue Flag;
1308
1309   // Copy the result values into the output registers.
1310   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1311     CCValAssign &VA = RVLocs[i];
1312     assert(VA.isRegLoc() && "Can only return in registers!");
1313     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1314                              Op.getOperand(i*2+1), Flag);
1315     Flag = Chain.getValue(1);
1316   }
1317
1318   if (Flag.getNode())
1319     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1320   else
1321     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1322 }
1323
1324
1325 //===----------------------------------------------------------------------===//
1326 // Vector related lowering:
1327 //===----------------------------------------------------------------------===//
1328
1329 static ConstantSDNode *
1330 getVecImm(SDNode *N) {
1331   SDValue OpVal(0, 0);
1332
1333   // Check to see if this buildvec has a single non-undef value in its elements.
1334   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1335     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1336     if (OpVal.getNode() == 0)
1337       OpVal = N->getOperand(i);
1338     else if (OpVal != N->getOperand(i))
1339       return 0;
1340   }
1341
1342   if (OpVal.getNode() != 0) {
1343     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1344       return CN;
1345     }
1346   }
1347
1348   return 0; // All UNDEF: use implicit def.; not Constant node
1349 }
1350
1351 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1352 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1353 /// constant
1354 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1355                               MVT ValueType) {
1356   if (ConstantSDNode *CN = getVecImm(N)) {
1357     uint64_t Value = CN->getZExtValue();
1358     if (ValueType == MVT::i64) {
1359       uint64_t UValue = CN->getZExtValue();
1360       uint32_t upper = uint32_t(UValue >> 32);
1361       uint32_t lower = uint32_t(UValue);
1362       if (upper != lower)
1363         return SDValue();
1364       Value = Value >> 32;
1365     }
1366     if (Value <= 0x3ffff)
1367       return DAG.getTargetConstant(Value, ValueType);
1368   }
1369
1370   return SDValue();
1371 }
1372
1373 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 16-bit constant, and if so, return the
1375 /// constant
1376 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1377                               MVT ValueType) {
1378   if (ConstantSDNode *CN = getVecImm(N)) {
1379     int64_t Value = CN->getSExtValue();
1380     if (ValueType == MVT::i64) {
1381       uint64_t UValue = CN->getZExtValue();
1382       uint32_t upper = uint32_t(UValue >> 32);
1383       uint32_t lower = uint32_t(UValue);
1384       if (upper != lower)
1385         return SDValue();
1386       Value = Value >> 32;
1387     }
1388     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1389       return DAG.getTargetConstant(Value, ValueType);
1390     }
1391   }
1392
1393   return SDValue();
1394 }
1395
1396 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1397 /// and the value fits into a signed 10-bit constant, and if so, return the
1398 /// constant
1399 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1400                               MVT ValueType) {
1401   if (ConstantSDNode *CN = getVecImm(N)) {
1402     int64_t Value = CN->getSExtValue();
1403     if (ValueType == MVT::i64) {
1404       uint64_t UValue = CN->getZExtValue();
1405       uint32_t upper = uint32_t(UValue >> 32);
1406       uint32_t lower = uint32_t(UValue);
1407       if (upper != lower)
1408         return SDValue();
1409       Value = Value >> 32;
1410     }
1411     if (isS10Constant(Value))
1412       return DAG.getTargetConstant(Value, ValueType);
1413   }
1414
1415   return SDValue();
1416 }
1417
1418 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 8-bit constant, and if so, return the
1420 /// constant.
1421 ///
1422 /// @note: The incoming vector is v16i8 because that's the only way we can load
1423 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1424 /// same value.
1425 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1426                              MVT ValueType) {
1427   if (ConstantSDNode *CN = getVecImm(N)) {
1428     int Value = (int) CN->getZExtValue();
1429     if (ValueType == MVT::i16
1430         && Value <= 0xffff                 /* truncated from uint64_t */
1431         && ((short) Value >> 8) == ((short) Value & 0xff))
1432       return DAG.getTargetConstant(Value & 0xff, ValueType);
1433     else if (ValueType == MVT::i8
1434              && (Value & 0xff) == Value)
1435       return DAG.getTargetConstant(Value, ValueType);
1436   }
1437
1438   return SDValue();
1439 }
1440
1441 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into a signed 16-bit constant, and if so, return the
1443 /// constant
1444 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1445                                MVT ValueType) {
1446   if (ConstantSDNode *CN = getVecImm(N)) {
1447     uint64_t Value = CN->getZExtValue();
1448     if ((ValueType == MVT::i32
1449           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1450         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1451       return DAG.getTargetConstant(Value >> 16, ValueType);
1452   }
1453
1454   return SDValue();
1455 }
1456
1457 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1458 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1459   if (ConstantSDNode *CN = getVecImm(N)) {
1460     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1461   }
1462
1463   return SDValue();
1464 }
1465
1466 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1467 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1468   if (ConstantSDNode *CN = getVecImm(N)) {
1469     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1470   }
1471
1472   return SDValue();
1473 }
1474
1475 // If this is a vector of constants or undefs, get the bits.  A bit in
1476 // UndefBits is set if the corresponding element of the vector is an
1477 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1478 // zero.   Return true if this is not an array of constants, false if it is.
1479 //
1480 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1481                                        uint64_t UndefBits[2]) {
1482   // Start with zero'd results.
1483   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1484
1485   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1486   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1487     SDValue OpVal = BV->getOperand(i);
1488
1489     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1490     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1491
1492     uint64_t EltBits = 0;
1493     if (OpVal.getOpcode() == ISD::UNDEF) {
1494       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1495       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1496       continue;
1497     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1498       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1499     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1500       const APFloat &apf = CN->getValueAPF();
1501       EltBits = (CN->getValueType(0) == MVT::f32
1502                  ? FloatToBits(apf.convertToFloat())
1503                  : DoubleToBits(apf.convertToDouble()));
1504     } else {
1505       // Nonconstant element.
1506       return true;
1507     }
1508
1509     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1510   }
1511
1512   //printf("%llx %llx  %llx %llx\n",
1513   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1514   return false;
1515 }
1516
1517 /// If this is a splat (repetition) of a value across the whole vector, return
1518 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1519 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1520 /// SplatSize = 1 byte.
1521 static bool isConstantSplat(const uint64_t Bits128[2],
1522                             const uint64_t Undef128[2],
1523                             int MinSplatBits,
1524                             uint64_t &SplatBits, uint64_t &SplatUndef,
1525                             int &SplatSize) {
1526   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1527   // the same as the lower 64-bits, ignoring undefs.
1528   uint64_t Bits64  = Bits128[0] | Bits128[1];
1529   uint64_t Undef64 = Undef128[0] & Undef128[1];
1530   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1531   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1532   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1533   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1534
1535   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1536     if (MinSplatBits < 64) {
1537
1538       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1539       // undefs.
1540       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1541         if (MinSplatBits < 32) {
1542
1543           // If the top 16-bits are different than the lower 16-bits, ignoring
1544           // undefs, we have an i32 splat.
1545           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1546             if (MinSplatBits < 16) {
1547               // If the top 8-bits are different than the lower 8-bits, ignoring
1548               // undefs, we have an i16 splat.
1549               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1550                   == ((Bits16 >> 8) & ~Undef16)) {
1551                 // Otherwise, we have an 8-bit splat.
1552                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1553                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1554                 SplatSize = 1;
1555                 return true;
1556               }
1557             } else {
1558               SplatBits = Bits16;
1559               SplatUndef = Undef16;
1560               SplatSize = 2;
1561               return true;
1562             }
1563           }
1564         } else {
1565           SplatBits = Bits32;
1566           SplatUndef = Undef32;
1567           SplatSize = 4;
1568           return true;
1569         }
1570       }
1571     } else {
1572       SplatBits = Bits128[0];
1573       SplatUndef = Undef128[0];
1574       SplatSize = 8;
1575       return true;
1576     }
1577   }
1578
1579   return false;  // Can't be a splat if two pieces don't match.
1580 }
1581
1582 //! Lower a BUILD_VECTOR instruction creatively:
1583 SDValue
1584 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1585   MVT VT = Op.getValueType();
1586   // If this is a vector of constants or undefs, get the bits.  A bit in
1587   // UndefBits is set if the corresponding element of the vector is an
1588   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1589   // zero.
1590   uint64_t VectorBits[2];
1591   uint64_t UndefBits[2];
1592   uint64_t SplatBits, SplatUndef;
1593   int SplatSize;
1594   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1595       || !isConstantSplat(VectorBits, UndefBits,
1596                           VT.getVectorElementType().getSizeInBits(),
1597                           SplatBits, SplatUndef, SplatSize))
1598     return SDValue();   // Not a constant vector, not a splat.
1599
1600   switch (VT.getSimpleVT()) {
1601   default:
1602     cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1603          << VT.getMVTString()
1604          << "\n";
1605     abort();
1606     /*NOTREACHED*/
1607   case MVT::v4f32: {
1608     uint32_t Value32 = uint32_t(SplatBits);
1609     assert(SplatSize == 4
1610            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1611     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1612     SDValue T = DAG.getConstant(Value32, MVT::i32);
1613     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1614                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1615     break;
1616   }
1617   case MVT::v2f64: {
1618     uint64_t f64val = uint64_t(SplatBits);
1619     assert(SplatSize == 8
1620            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1621     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1622     SDValue T = DAG.getConstant(f64val, MVT::i64);
1623     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1624                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1625     break;
1626   }
1627   case MVT::v16i8: {
1628    // 8-bit constants have to be expanded to 16-bits
1629    unsigned short Value16 = SplatBits | (SplatBits << 8);
1630    SDValue Ops[8];
1631    for (int i = 0; i < 8; ++i)
1632      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1633    return DAG.getNode(ISD::BIT_CONVERT, VT,
1634                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1635   }
1636   case MVT::v8i16: {
1637     unsigned short Value16;
1638     if (SplatSize == 2)
1639       Value16 = (unsigned short) (SplatBits & 0xffff);
1640     else
1641       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1642     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1643     SDValue Ops[8];
1644     for (int i = 0; i < 8; ++i) Ops[i] = T;
1645     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1646   }
1647   case MVT::v4i32: {
1648     unsigned int Value = SplatBits;
1649     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1650     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1651   }
1652   case MVT::v2i32: {
1653     unsigned int Value = SplatBits;
1654     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1655     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1656   }
1657   case MVT::v2i64: {
1658     return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1659   }
1660   }
1661
1662   return SDValue();
1663 }
1664
1665 SDValue
1666 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1667   uint32_t upper = uint32_t(SplatVal >> 32);
1668   uint32_t lower = uint32_t(SplatVal);
1669
1670   if (upper == lower) {
1671     // Magic constant that can be matched by IL, ILA, et. al.
1672     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1673     return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1674                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1675                                    Val, Val, Val, Val));
1676   } else {
1677     SDValue LO32;
1678     SDValue HI32;
1679     SmallVector<SDValue, 16> ShufBytes;
1680     SDValue Result;
1681     bool upper_special, lower_special;
1682
1683     // NOTE: This code creates common-case shuffle masks that can be easily
1684     // detected as common expressions. It is not attempting to create highly
1685     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1686
1687     // Detect if the upper or lower half is a special shuffle mask pattern:
1688     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1689     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1690
1691     // Create lower vector if not a special pattern
1692     if (!lower_special) {
1693       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1694       LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1695                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1696                                      LO32C, LO32C, LO32C, LO32C));
1697     }
1698
1699     // Create upper vector if not a special pattern
1700     if (!upper_special) {
1701       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1702       HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1703                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1704                                      HI32C, HI32C, HI32C, HI32C));
1705     }
1706
1707     // If either upper or lower are special, then the two input operands are
1708     // the same (basically, one of them is a "don't care")
1709     if (lower_special)
1710       LO32 = HI32;
1711     if (upper_special)
1712       HI32 = LO32;
1713     if (lower_special && upper_special) {
1714       // Unhappy situation... both upper and lower are special, so punt with
1715       // a target constant:
1716       SDValue Zero = DAG.getConstant(0, MVT::i32);
1717       HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1718                                 Zero, Zero);
1719     }
1720
1721     for (int i = 0; i < 4; ++i) {
1722       uint64_t val = 0;
1723       for (int j = 0; j < 4; ++j) {
1724         SDValue V;
1725         bool process_upper, process_lower;
1726         val <<= 8;
1727         process_upper = (upper_special && (i & 1) == 0);
1728         process_lower = (lower_special && (i & 1) == 1);
1729
1730         if (process_upper || process_lower) {
1731           if ((process_upper && upper == 0)
1732                   || (process_lower && lower == 0))
1733             val |= 0x80;
1734           else if ((process_upper && upper == 0xffffffff)
1735                   || (process_lower && lower == 0xffffffff))
1736             val |= 0xc0;
1737           else if ((process_upper && upper == 0x80000000)
1738                   || (process_lower && lower == 0x80000000))
1739             val |= (j == 0 ? 0xe0 : 0x80);
1740         } else
1741           val |= i * 4 + j + ((i & 1) * 16);
1742       }
1743
1744       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1745     }
1746
1747     return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1748                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1749                                    &ShufBytes[0], ShufBytes.size()));
1750   }
1751 }
1752
1753 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1754 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1755 /// permutation vector, V3, is monotonically increasing with one "exception"
1756 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1757 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1758 /// In either case, the net result is going to eventually invoke SHUFB to
1759 /// permute/shuffle the bytes from V1 and V2.
1760 /// \note
1761 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1762 /// control word for byte/halfword/word insertion. This takes care of a single
1763 /// element move from V2 into V1.
1764 /// \note
1765 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1766 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1767   SDValue V1 = Op.getOperand(0);
1768   SDValue V2 = Op.getOperand(1);
1769   SDValue PermMask = Op.getOperand(2);
1770   DebugLoc dl = Op.getDebugLoc();
1771
1772   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1773
1774   // If we have a single element being moved from V1 to V2, this can be handled
1775   // using the C*[DX] compute mask instructions, but the vector elements have
1776   // to be monotonically increasing with one exception element.
1777   MVT VecVT = V1.getValueType();
1778   MVT EltVT = VecVT.getVectorElementType();
1779   unsigned EltsFromV2 = 0;
1780   unsigned V2Elt = 0;
1781   unsigned V2EltIdx0 = 0;
1782   unsigned CurrElt = 0;
1783   unsigned MaxElts = VecVT.getVectorNumElements();
1784   unsigned PrevElt = 0;
1785   unsigned V0Elt = 0;
1786   bool monotonic = true;
1787   bool rotate = true;
1788
1789   if (EltVT == MVT::i8) {
1790     V2EltIdx0 = 16;
1791   } else if (EltVT == MVT::i16) {
1792     V2EltIdx0 = 8;
1793   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1794     V2EltIdx0 = 4;
1795   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1796     V2EltIdx0 = 2;
1797   } else
1798     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1799
1800   for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1801     if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1802       unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1803
1804       if (monotonic) {
1805         if (SrcElt >= V2EltIdx0) {
1806           if (1 >= (++EltsFromV2)) {
1807             V2Elt = (V2EltIdx0 - SrcElt) << 2;
1808           }
1809         } else if (CurrElt != SrcElt) {
1810           monotonic = false;
1811         }
1812
1813         ++CurrElt;
1814       }
1815
1816       if (rotate) {
1817         if (PrevElt > 0 && SrcElt < MaxElts) {
1818           if ((PrevElt == SrcElt - 1)
1819               || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1820             PrevElt = SrcElt;
1821             if (SrcElt == 0)
1822               V0Elt = i;
1823           } else {
1824             rotate = false;
1825           }
1826         } else if (PrevElt == 0) {
1827           // First time through, need to keep track of previous element
1828           PrevElt = SrcElt;
1829         } else {
1830           // This isn't a rotation, takes elements from vector 2
1831           rotate = false;
1832         }
1833       }
1834     }
1835   }
1836
1837   if (EltsFromV2 == 1 && monotonic) {
1838     // Compute mask and shuffle
1839     MachineFunction &MF = DAG.getMachineFunction();
1840     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1841     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1842     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1843     // Initialize temporary register to 0
1844     SDValue InitTempReg =
1845       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1846     // Copy register's contents as index in SHUFFLE_MASK:
1847     SDValue ShufMaskOp =
1848       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1849                   DAG.getTargetConstant(V2Elt, MVT::i32),
1850                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1851     // Use shuffle mask in SHUFB synthetic instruction:
1852     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1853                        ShufMaskOp);
1854   } else if (rotate) {
1855     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1856
1857     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1858                        V1, DAG.getConstant(rotamt, MVT::i16));
1859   } else {
1860    // Convert the SHUFFLE_VECTOR mask's input element units to the
1861    // actual bytes.
1862     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1863
1864     SmallVector<SDValue, 16> ResultMask;
1865     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1866       unsigned SrcElt;
1867       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1868         SrcElt = 0;
1869       else
1870         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1871
1872       for (unsigned j = 0; j < BytesPerElement; ++j) {
1873         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1874                                              MVT::i8));
1875       }
1876     }
1877
1878     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1879                                     &ResultMask[0], ResultMask.size());
1880     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1881   }
1882 }
1883
1884 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1885   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1886
1887   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1888     // For a constant, build the appropriate constant vector, which will
1889     // eventually simplify to a vector register load.
1890
1891     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1892     SmallVector<SDValue, 16> ConstVecValues;
1893     MVT VT;
1894     size_t n_copies;
1895
1896     // Create a constant vector:
1897     switch (Op.getValueType().getSimpleVT()) {
1898     default: assert(0 && "Unexpected constant value type in "
1899                          "LowerSCALAR_TO_VECTOR");
1900     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1901     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1902     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1903     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1904     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1905     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1906     }
1907
1908     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1909     for (size_t j = 0; j < n_copies; ++j)
1910       ConstVecValues.push_back(CValue);
1911
1912     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1913                        &ConstVecValues[0], ConstVecValues.size());
1914   } else {
1915     // Otherwise, copy the value from one register to another:
1916     switch (Op0.getValueType().getSimpleVT()) {
1917     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1918     case MVT::i8:
1919     case MVT::i16:
1920     case MVT::i32:
1921     case MVT::i64:
1922     case MVT::f32:
1923     case MVT::f64:
1924       return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1925     }
1926   }
1927
1928   return SDValue();
1929 }
1930
1931 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1932   MVT VT = Op.getValueType();
1933   SDValue N = Op.getOperand(0);
1934   SDValue Elt = Op.getOperand(1);
1935   SDValue retval;
1936
1937   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1938     // Constant argument:
1939     int EltNo = (int) C->getZExtValue();
1940
1941     // sanity checks:
1942     if (VT == MVT::i8 && EltNo >= 16)
1943       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1944     else if (VT == MVT::i16 && EltNo >= 8)
1945       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1946     else if (VT == MVT::i32 && EltNo >= 4)
1947       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1948     else if (VT == MVT::i64 && EltNo >= 2)
1949       assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1950
1951     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1952       // i32 and i64: Element 0 is the preferred slot
1953       return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1954     }
1955
1956     // Need to generate shuffle mask and extract:
1957     int prefslot_begin = -1, prefslot_end = -1;
1958     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1959
1960     switch (VT.getSimpleVT()) {
1961     default:
1962       assert(false && "Invalid value type!");
1963     case MVT::i8: {
1964       prefslot_begin = prefslot_end = 3;
1965       break;
1966     }
1967     case MVT::i16: {
1968       prefslot_begin = 2; prefslot_end = 3;
1969       break;
1970     }
1971     case MVT::i32:
1972     case MVT::f32: {
1973       prefslot_begin = 0; prefslot_end = 3;
1974       break;
1975     }
1976     case MVT::i64:
1977     case MVT::f64: {
1978       prefslot_begin = 0; prefslot_end = 7;
1979       break;
1980     }
1981     }
1982
1983     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1984            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1985
1986     unsigned int ShufBytes[16];
1987     for (int i = 0; i < 16; ++i) {
1988       // zero fill uppper part of preferred slot, don't care about the
1989       // other slots:
1990       unsigned int mask_val;
1991       if (i <= prefslot_end) {
1992         mask_val =
1993           ((i < prefslot_begin)
1994            ? 0x80
1995            : elt_byte + (i - prefslot_begin));
1996
1997         ShufBytes[i] = mask_val;
1998       } else
1999         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2000     }
2001
2002     SDValue ShufMask[4];
2003     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2004       unsigned bidx = i * 4;
2005       unsigned int bits = ((ShufBytes[bidx] << 24) |
2006                            (ShufBytes[bidx+1] << 16) |
2007                            (ShufBytes[bidx+2] << 8) |
2008                            ShufBytes[bidx+3]);
2009       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2010     }
2011
2012     SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2013                                       &ShufMask[0],
2014                                       sizeof(ShufMask) / sizeof(ShufMask[0]));
2015
2016     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2017                          DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2018                                      N, N, ShufMaskVec));
2019   } else {
2020     // Variable index: Rotate the requested element into slot 0, then replicate
2021     // slot 0 across the vector
2022     MVT VecVT = N.getValueType();
2023     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2024       cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2025       abort();
2026     }
2027
2028     // Make life easier by making sure the index is zero-extended to i32
2029     if (Elt.getValueType() != MVT::i32)
2030       Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2031
2032     // Scale the index to a bit/byte shift quantity
2033     APInt scaleFactor =
2034             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2035     unsigned scaleShift = scaleFactor.logBase2();
2036     SDValue vecShift;
2037
2038     if (scaleShift > 0) {
2039       // Scale the shift factor:
2040       Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2041                         DAG.getConstant(scaleShift, MVT::i32));
2042     }
2043
2044     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2045
2046     // Replicate the bytes starting at byte 0 across the entire vector (for
2047     // consistency with the notion of a unified register set)
2048     SDValue replicate;
2049
2050     switch (VT.getSimpleVT()) {
2051     default:
2052       cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2053       abort();
2054       /*NOTREACHED*/
2055     case MVT::i8: {
2056       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2057       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2058                               factor, factor);
2059       break;
2060     }
2061     case MVT::i16: {
2062       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2063       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2064                               factor, factor);
2065       break;
2066     }
2067     case MVT::i32:
2068     case MVT::f32: {
2069       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2070       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2071                               factor, factor);
2072       break;
2073     }
2074     case MVT::i64:
2075     case MVT::f64: {
2076       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2077       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2078       replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2079                               loFactor, hiFactor);
2080       break;
2081     }
2082     }
2083
2084     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2085                          DAG.getNode(SPUISD::SHUFB, VecVT,
2086                                      vecShift, vecShift, replicate));
2087   }
2088
2089   return retval;
2090 }
2091
2092 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2093   SDValue VecOp = Op.getOperand(0);
2094   SDValue ValOp = Op.getOperand(1);
2095   SDValue IdxOp = Op.getOperand(2);
2096   MVT VT = Op.getValueType();
2097
2098   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2099   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2100
2101   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2102   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2103   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2104                                 DAG.getRegister(SPU::R1, PtrVT),
2105                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2106   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2107
2108   SDValue result =
2109     DAG.getNode(SPUISD::SHUFB, VT,
2110                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2111                 VecOp,
2112                 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2113
2114   return result;
2115 }
2116
2117 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2118                            const TargetLowering &TLI)
2119 {
2120   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2121   MVT ShiftVT = TLI.getShiftAmountTy();
2122
2123   assert(Op.getValueType() == MVT::i8);
2124   switch (Opc) {
2125   default:
2126     assert(0 && "Unhandled i8 math operator");
2127     /*NOTREACHED*/
2128     break;
2129   case ISD::ADD: {
2130     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2131     // the result:
2132     SDValue N1 = Op.getOperand(1);
2133     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2134     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2135     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2136                        DAG.getNode(Opc, MVT::i16, N0, N1));
2137
2138   }
2139
2140   case ISD::SUB: {
2141     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2142     // the result:
2143     SDValue N1 = Op.getOperand(1);
2144     N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2145     N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2146     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2147                        DAG.getNode(Opc, MVT::i16, N0, N1));
2148   }
2149   case ISD::ROTR:
2150   case ISD::ROTL: {
2151     SDValue N1 = Op.getOperand(1);
2152     unsigned N1Opc;
2153     N0 = (N0.getOpcode() != ISD::Constant
2154           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2155           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2156                             MVT::i16));
2157     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2158             ? ISD::ZERO_EXTEND
2159             : ISD::TRUNCATE;
2160     N1 = (N1.getOpcode() != ISD::Constant
2161           ? DAG.getNode(N1Opc, ShiftVT, N1)
2162           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2163                             TLI.getShiftAmountTy()));
2164     SDValue ExpandArg =
2165       DAG.getNode(ISD::OR, MVT::i16, N0,
2166                   DAG.getNode(ISD::SHL, MVT::i16,
2167                               N0, DAG.getConstant(8, MVT::i32)));
2168     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2169                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2170   }
2171   case ISD::SRL:
2172   case ISD::SHL: {
2173     SDValue N1 = Op.getOperand(1);
2174     unsigned N1Opc;
2175     N0 = (N0.getOpcode() != ISD::Constant
2176           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2177           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2178                             MVT::i32));
2179     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2180             ? ISD::ZERO_EXTEND
2181             : ISD::TRUNCATE;
2182     N1 = (N1.getOpcode() != ISD::Constant
2183           ? DAG.getNode(N1Opc, ShiftVT, N1)
2184           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2185     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2186                        DAG.getNode(Opc, MVT::i16, N0, N1));
2187   }
2188   case ISD::SRA: {
2189     SDValue N1 = Op.getOperand(1);
2190     unsigned N1Opc;
2191     N0 = (N0.getOpcode() != ISD::Constant
2192           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2193           : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2194                             MVT::i16));
2195     N1Opc = N1.getValueType().bitsLT(ShiftVT)
2196             ? ISD::SIGN_EXTEND
2197             : ISD::TRUNCATE;
2198     N1 = (N1.getOpcode() != ISD::Constant
2199           ? DAG.getNode(N1Opc, ShiftVT, N1)
2200           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2201                             ShiftVT));
2202     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2203                        DAG.getNode(Opc, MVT::i16, N0, N1));
2204   }
2205   case ISD::MUL: {
2206     SDValue N1 = Op.getOperand(1);
2207     unsigned N1Opc;
2208     N0 = (N0.getOpcode() != ISD::Constant
2209           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2210           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2211                             MVT::i16));
2212     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2213     N1 = (N1.getOpcode() != ISD::Constant
2214           ? DAG.getNode(N1Opc, MVT::i16, N1)
2215           : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2216                             MVT::i16));
2217     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218                        DAG.getNode(Opc, MVT::i16, N0, N1));
2219     break;
2220   }
2221   }
2222
2223   return SDValue();
2224 }
2225
2226 //! Generate the carry-generate shuffle mask.
2227 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2228   SmallVector<SDValue, 16 > ShufBytes;
2229
2230   // Create the shuffle mask for "rotating" the borrow up one register slot
2231   // once the borrow is generated.
2232   ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2233   ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2234   ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2235   ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2236
2237   return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2238                      &ShufBytes[0], ShufBytes.size());
2239 }
2240
2241 //! Generate the borrow-generate shuffle mask
2242 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2243   SmallVector<SDValue, 16 > ShufBytes;
2244
2245   // Create the shuffle mask for "rotating" the borrow up one register slot
2246   // once the borrow is generated.
2247   ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2248   ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2249   ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2250   ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2251
2252   return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2253                      &ShufBytes[0], ShufBytes.size());
2254 }
2255
2256 //! Lower byte immediate operations for v16i8 vectors:
2257 static SDValue
2258 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2259   SDValue ConstVec;
2260   SDValue Arg;
2261   MVT VT = Op.getValueType();
2262
2263   ConstVec = Op.getOperand(0);
2264   Arg = Op.getOperand(1);
2265   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2266     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2267       ConstVec = ConstVec.getOperand(0);
2268     } else {
2269       ConstVec = Op.getOperand(1);
2270       Arg = Op.getOperand(0);
2271       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2272         ConstVec = ConstVec.getOperand(0);
2273       }
2274     }
2275   }
2276
2277   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2278     uint64_t VectorBits[2];
2279     uint64_t UndefBits[2];
2280     uint64_t SplatBits, SplatUndef;
2281     int SplatSize;
2282
2283     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2284         && isConstantSplat(VectorBits, UndefBits,
2285                            VT.getVectorElementType().getSizeInBits(),
2286                            SplatBits, SplatUndef, SplatSize)) {
2287       SDValue tcVec[16];
2288       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2289       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2290
2291       // Turn the BUILD_VECTOR into a set of target constants:
2292       for (size_t i = 0; i < tcVecSize; ++i)
2293         tcVec[i] = tc;
2294
2295       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2296                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2297     }
2298   }
2299
2300   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2301   // lowered.  Return the operation, rather than a null SDValue.
2302   return Op;
2303 }
2304
2305 //! Custom lowering for CTPOP (count population)
2306 /*!
2307   Custom lowering code that counts the number ones in the input
2308   operand. SPU has such an instruction, but it counts the number of
2309   ones per byte, which then have to be accumulated.
2310 */
2311 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2312   MVT VT = Op.getValueType();
2313   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2314   DebugLoc dl = Op.getDebugLoc();
2315
2316   switch (VT.getSimpleVT()) {
2317   default:
2318     assert(false && "Invalid value type!");
2319   case MVT::i8: {
2320     SDValue N = Op.getOperand(0);
2321     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2322
2323     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2324     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2325
2326     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2327   }
2328
2329   case MVT::i16: {
2330     MachineFunction &MF = DAG.getMachineFunction();
2331     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2332
2333     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2334
2335     SDValue N = Op.getOperand(0);
2336     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2337     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2338     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2339
2340     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2341     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2342
2343     // CNTB_result becomes the chain to which all of the virtual registers
2344     // CNTB_reg, SUM1_reg become associated:
2345     SDValue CNTB_result =
2346       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2347
2348     SDValue CNTB_rescopy =
2349       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2350
2351     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2352
2353     return DAG.getNode(ISD::AND, dl, MVT::i16,
2354                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2355                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2356                                                Tmp1, Shift1),
2357                                    Tmp1),
2358                        Mask0);
2359   }
2360
2361   case MVT::i32: {
2362     MachineFunction &MF = DAG.getMachineFunction();
2363     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2364
2365     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2366     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2367
2368     SDValue N = Op.getOperand(0);
2369     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2370     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2371     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2372     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2373
2374     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2375     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2376
2377     // CNTB_result becomes the chain to which all of the virtual registers
2378     // CNTB_reg, SUM1_reg become associated:
2379     SDValue CNTB_result =
2380       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2381
2382     SDValue CNTB_rescopy =
2383       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2384
2385     SDValue Comp1 =
2386       DAG.getNode(ISD::SRL, dl, MVT::i32,
2387                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2388                   Shift1);
2389
2390     SDValue Sum1 =
2391       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2392                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2393
2394     SDValue Sum1_rescopy =
2395       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2396
2397     SDValue Comp2 =
2398       DAG.getNode(ISD::SRL, dl, MVT::i32,
2399                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2400                   Shift2);
2401     SDValue Sum2 =
2402       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2403                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2404
2405     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2406   }
2407
2408   case MVT::i64:
2409     break;
2410   }
2411
2412   return SDValue();
2413 }
2414
2415 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2416 /*!
2417  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2418  All conversions to i64 are expanded to a libcall.
2419  */
2420 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2421                               SPUTargetLowering &TLI) {
2422   MVT OpVT = Op.getValueType();
2423   SDValue Op0 = Op.getOperand(0);
2424   MVT Op0VT = Op0.getValueType();
2425
2426   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2427       || OpVT == MVT::i64) {
2428     // Convert f32 / f64 to i32 / i64 via libcall.
2429     RTLIB::Libcall LC =
2430             (Op.getOpcode() == ISD::FP_TO_SINT)
2431              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2432              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2433     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2434     SDValue Dummy;
2435     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2436   }
2437
2438   return Op;                    // return unmolested, legalized op
2439 }
2440
2441 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2442 /*!
2443  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2444  All conversions from i64 are expanded to a libcall.
2445  */
2446 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2447                               SPUTargetLowering &TLI) {
2448   MVT OpVT = Op.getValueType();
2449   SDValue Op0 = Op.getOperand(0);
2450   MVT Op0VT = Op0.getValueType();
2451
2452   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2453       || Op0VT == MVT::i64) {
2454     // Convert i32, i64 to f64 via libcall:
2455     RTLIB::Libcall LC =
2456             (Op.getOpcode() == ISD::SINT_TO_FP)
2457              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2458              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2459     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2460     SDValue Dummy;
2461     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2462   }
2463
2464   return Op;                    // return unmolested, legalized
2465 }
2466
2467 //! Lower ISD::SETCC
2468 /*!
2469  This handles MVT::f64 (double floating point) condition lowering
2470  */
2471 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2472                           const TargetLowering &TLI) {
2473   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2474   DebugLoc dl = Op.getNode()->getDebugLoc();
2475   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2476
2477   SDValue lhs = Op.getOperand(0);
2478   SDValue rhs = Op.getOperand(1);
2479   MVT lhsVT = lhs.getValueType();
2480   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2481
2482   MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2483   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2484   MVT IntVT(MVT::i64);
2485
2486   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2487   // selected to a NOP:
2488   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2489   SDValue lhsHi32 =
2490           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2491                       DAG.getNode(ISD::SRL, dl, IntVT,
2492                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2493   SDValue lhsHi32abs =
2494           DAG.getNode(ISD::AND, dl, MVT::i32,
2495                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2496   SDValue lhsLo32 =
2497           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2498
2499   // SETO and SETUO only use the lhs operand:
2500   if (CC->get() == ISD::SETO) {
2501     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2502     // SETUO
2503     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2504     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2505                        DAG.getSetCC(dl, ccResultVT,
2506                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2507                                     ISD::SETUO),
2508                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2509   } else if (CC->get() == ISD::SETUO) {
2510     // Evaluates to true if Op0 is [SQ]NaN
2511     return DAG.getNode(ISD::AND, dl, ccResultVT,
2512                        DAG.getSetCC(dl, ccResultVT,
2513                                     lhsHi32abs,
2514                                     DAG.getConstant(0x7ff00000, MVT::i32),
2515                                     ISD::SETGE),
2516                        DAG.getSetCC(dl, ccResultVT,
2517                                     lhsLo32,
2518                                     DAG.getConstant(0, MVT::i32),
2519                                     ISD::SETGT));
2520   }
2521
2522   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2523   SDValue rhsHi32 =
2524           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2525                       DAG.getNode(ISD::SRL, dl, IntVT,
2526                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2527
2528   // If a value is negative, subtract from the sign magnitude constant:
2529   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2530
2531   // Convert the sign-magnitude representation into 2's complement:
2532   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2533                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2534   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2535   SDValue lhsSelect =
2536           DAG.getNode(ISD::SELECT, dl, IntVT,
2537                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2538
2539   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2540                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2541   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2542   SDValue rhsSelect =
2543           DAG.getNode(ISD::SELECT, dl, IntVT,
2544                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2545
2546   unsigned compareOp;
2547
2548   switch (CC->get()) {
2549   case ISD::SETOEQ:
2550   case ISD::SETUEQ:
2551     compareOp = ISD::SETEQ; break;
2552   case ISD::SETOGT:
2553   case ISD::SETUGT:
2554     compareOp = ISD::SETGT; break;
2555   case ISD::SETOGE:
2556   case ISD::SETUGE:
2557     compareOp = ISD::SETGE; break;
2558   case ISD::SETOLT:
2559   case ISD::SETULT:
2560     compareOp = ISD::SETLT; break;
2561   case ISD::SETOLE:
2562   case ISD::SETULE:
2563     compareOp = ISD::SETLE; break;
2564   case ISD::SETUNE:
2565   case ISD::SETONE:
2566     compareOp = ISD::SETNE; break;
2567   default:
2568     cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2569     abort();
2570     break;
2571   }
2572
2573   SDValue result =
2574           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2575                        (ISD::CondCode) compareOp);
2576
2577   if ((CC->get() & 0x8) == 0) {
2578     // Ordered comparison:
2579     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2580                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2581                                   ISD::SETO);
2582     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2583                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2584                                   ISD::SETO);
2585     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2586
2587     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2588   }
2589
2590   return result;
2591 }
2592
2593 //! Lower ISD::SELECT_CC
2594 /*!
2595   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2596   SELB instruction.
2597
2598   \note Need to revisit this in the future: if the code path through the true
2599   and false value computations is longer than the latency of a branch (6
2600   cycles), then it would be more advantageous to branch and insert a new basic
2601   block and branch on the condition. However, this code does not make that
2602   assumption, given the simplisitc uses so far.
2603  */
2604
2605 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2606                               const TargetLowering &TLI) {
2607   MVT VT = Op.getValueType();
2608   SDValue lhs = Op.getOperand(0);
2609   SDValue rhs = Op.getOperand(1);
2610   SDValue trueval = Op.getOperand(2);
2611   SDValue falseval = Op.getOperand(3);
2612   SDValue condition = Op.getOperand(4);
2613
2614   // NOTE: SELB's arguments: $rA, $rB, $mask
2615   //
2616   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2617   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2618   // condition was true and 0s where the condition was false. Hence, the
2619   // arguments to SELB get reversed.
2620
2621   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2622   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2623   // with another "cannot select select_cc" assert:
2624
2625   SDValue compare = DAG.getNode(ISD::SETCC,
2626                                 TLI.getSetCCResultType(Op.getValueType()),
2627                                 lhs, rhs, condition);
2628   return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2629 }
2630
2631 //! Custom lower ISD::TRUNCATE
2632 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2633 {
2634   MVT VT = Op.getValueType();
2635   MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2636   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2637
2638   SDValue Op0 = Op.getOperand(0);
2639   MVT Op0VT = Op0.getValueType();
2640   MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2641
2642   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2643     // Create shuffle mask, least significant doubleword of quadword
2644     unsigned maskHigh = 0x08090a0b;
2645     unsigned maskLow = 0x0c0d0e0f;
2646     // Use a shuffle to perform the truncation
2647     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2648                                    DAG.getConstant(maskHigh, MVT::i32),
2649                                    DAG.getConstant(maskLow, MVT::i32),
2650                                    DAG.getConstant(maskHigh, MVT::i32),
2651                                    DAG.getConstant(maskLow, MVT::i32));
2652
2653
2654     SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2655
2656     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2657                                        PromoteScalar, PromoteScalar, shufMask);
2658
2659     return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2660                        DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2661   }
2662
2663   return SDValue();             // Leave the truncate unmolested
2664 }
2665
2666 //! Custom (target-specific) lowering entry point
2667 /*!
2668   This is where LLVM's DAG selection process calls to do target-specific
2669   lowering of nodes.
2670  */
2671 SDValue
2672 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2673 {
2674   unsigned Opc = (unsigned) Op.getOpcode();
2675   MVT VT = Op.getValueType();
2676
2677   switch (Opc) {
2678   default: {
2679     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2680     cerr << "Op.getOpcode() = " << Opc << "\n";
2681     cerr << "*Op.getNode():\n";
2682     Op.getNode()->dump();
2683     abort();
2684   }
2685   case ISD::LOAD:
2686   case ISD::EXTLOAD:
2687   case ISD::SEXTLOAD:
2688   case ISD::ZEXTLOAD:
2689     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::STORE:
2691     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2692   case ISD::ConstantPool:
2693     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2694   case ISD::GlobalAddress:
2695     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2696   case ISD::JumpTable:
2697     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2698   case ISD::ConstantFP:
2699     return LowerConstantFP(Op, DAG);
2700   case ISD::FORMAL_ARGUMENTS:
2701     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2702   case ISD::CALL:
2703     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2704   case ISD::RET:
2705     return LowerRET(Op, DAG, getTargetMachine());
2706
2707   // i8, i64 math ops:
2708   case ISD::ADD:
2709   case ISD::SUB:
2710   case ISD::ROTR:
2711   case ISD::ROTL:
2712   case ISD::SRL:
2713   case ISD::SHL:
2714   case ISD::SRA: {
2715     if (VT == MVT::i8)
2716       return LowerI8Math(Op, DAG, Opc, *this);
2717     break;
2718   }
2719
2720   case ISD::FP_TO_SINT:
2721   case ISD::FP_TO_UINT:
2722     return LowerFP_TO_INT(Op, DAG, *this);
2723
2724   case ISD::SINT_TO_FP:
2725   case ISD::UINT_TO_FP:
2726     return LowerINT_TO_FP(Op, DAG, *this);
2727
2728   // Vector-related lowering.
2729   case ISD::BUILD_VECTOR:
2730     return LowerBUILD_VECTOR(Op, DAG);
2731   case ISD::SCALAR_TO_VECTOR:
2732     return LowerSCALAR_TO_VECTOR(Op, DAG);
2733   case ISD::VECTOR_SHUFFLE:
2734     return LowerVECTOR_SHUFFLE(Op, DAG);
2735   case ISD::EXTRACT_VECTOR_ELT:
2736     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2737   case ISD::INSERT_VECTOR_ELT:
2738     return LowerINSERT_VECTOR_ELT(Op, DAG);
2739
2740   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2741   case ISD::AND:
2742   case ISD::OR:
2743   case ISD::XOR:
2744     return LowerByteImmed(Op, DAG);
2745
2746   // Vector and i8 multiply:
2747   case ISD::MUL:
2748     if (VT == MVT::i8)
2749       return LowerI8Math(Op, DAG, Opc, *this);
2750
2751   case ISD::CTPOP:
2752     return LowerCTPOP(Op, DAG);
2753
2754   case ISD::SELECT_CC:
2755     return LowerSELECT_CC(Op, DAG, *this);
2756
2757   case ISD::SETCC:
2758     return LowerSETCC(Op, DAG, *this);
2759
2760   case ISD::TRUNCATE:
2761     return LowerTRUNCATE(Op, DAG);
2762   }
2763
2764   return SDValue();
2765 }
2766
2767 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2768                                            SmallVectorImpl<SDValue>&Results,
2769                                            SelectionDAG &DAG)
2770 {
2771 #if 0
2772   unsigned Opc = (unsigned) N->getOpcode();
2773   MVT OpVT = N->getValueType(0);
2774
2775   switch (Opc) {
2776   default: {
2777     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2778     cerr << "Op.getOpcode() = " << Opc << "\n";
2779     cerr << "*Op.getNode():\n";
2780     N->dump();
2781     abort();
2782     /*NOTREACHED*/
2783   }
2784   }
2785 #endif
2786
2787   /* Otherwise, return unchanged */
2788 }
2789
2790 //===----------------------------------------------------------------------===//
2791 // Target Optimization Hooks
2792 //===----------------------------------------------------------------------===//
2793
2794 SDValue
2795 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2796 {
2797 #if 0
2798   TargetMachine &TM = getTargetMachine();
2799 #endif
2800   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2801   SelectionDAG &DAG = DCI.DAG;
2802   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2803   MVT NodeVT = N->getValueType(0);      // The node's value type
2804   MVT Op0VT = Op0.getValueType();       // The first operand's result
2805   SDValue Result;                       // Initially, empty result
2806
2807   switch (N->getOpcode()) {
2808   default: break;
2809   case ISD::ADD: {
2810     SDValue Op1 = N->getOperand(1);
2811
2812     if (Op0.getOpcode() == SPUISD::IndirectAddr
2813         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2814       // Normalize the operands to reduce repeated code
2815       SDValue IndirectArg = Op0, AddArg = Op1;
2816
2817       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2818         IndirectArg = Op1;
2819         AddArg = Op0;
2820       }
2821
2822       if (isa<ConstantSDNode>(AddArg)) {
2823         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2824         SDValue IndOp1 = IndirectArg.getOperand(1);
2825
2826         if (CN0->isNullValue()) {
2827           // (add (SPUindirect <arg>, <arg>), 0) ->
2828           // (SPUindirect <arg>, <arg>)
2829
2830 #if !defined(NDEBUG)
2831           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832             cerr << "\n"
2833                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2834                  << "With:    (SPUindirect <arg>, <arg>)\n";
2835           }
2836 #endif
2837
2838           return IndirectArg;
2839         } else if (isa<ConstantSDNode>(IndOp1)) {
2840           // (add (SPUindirect <arg>, <const>), <const>) ->
2841           // (SPUindirect <arg>, <const + const>)
2842           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2843           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2844           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2845
2846 #if !defined(NDEBUG)
2847           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2848             cerr << "\n"
2849                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2850                  << "), " << CN0->getSExtValue() << ")\n"
2851                  << "With:    (SPUindirect <arg>, "
2852                  << combinedConst << ")\n";
2853           }
2854 #endif
2855
2856           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2857                              IndirectArg, combinedValue);
2858         }
2859       }
2860     }
2861     break;
2862   }
2863   case ISD::SIGN_EXTEND:
2864   case ISD::ZERO_EXTEND:
2865   case ISD::ANY_EXTEND: {
2866     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2867       // (any_extend (SPUextract_elt0 <arg>)) ->
2868       // (SPUextract_elt0 <arg>)
2869       // Types must match, however...
2870 #if !defined(NDEBUG)
2871       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2872         cerr << "\nReplace: ";
2873         N->dump(&DAG);
2874         cerr << "\nWith:    ";
2875         Op0.getNode()->dump(&DAG);
2876         cerr << "\n";
2877       }
2878 #endif
2879
2880       return Op0;
2881     }
2882     break;
2883   }
2884   case SPUISD::IndirectAddr: {
2885     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2886       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2887       if (CN != 0 && CN->getZExtValue() == 0) {
2888         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2889         // (SPUaform <addr>, 0)
2890
2891         DEBUG(cerr << "Replace: ");
2892         DEBUG(N->dump(&DAG));
2893         DEBUG(cerr << "\nWith:    ");
2894         DEBUG(Op0.getNode()->dump(&DAG));
2895         DEBUG(cerr << "\n");
2896
2897         return Op0;
2898       }
2899     } else if (Op0.getOpcode() == ISD::ADD) {
2900       SDValue Op1 = N->getOperand(1);
2901       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2902         // (SPUindirect (add <arg>, <arg>), 0) ->
2903         // (SPUindirect <arg>, <arg>)
2904         if (CN1->isNullValue()) {
2905
2906 #if !defined(NDEBUG)
2907           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2908             cerr << "\n"
2909                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2910                  << "With:    (SPUindirect <arg>, <arg>)\n";
2911           }
2912 #endif
2913
2914           return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2915                              Op0.getOperand(0), Op0.getOperand(1));
2916         }
2917       }
2918     }
2919     break;
2920   }
2921   case SPUISD::SHLQUAD_L_BITS:
2922   case SPUISD::SHLQUAD_L_BYTES:
2923   case SPUISD::VEC_SHL:
2924   case SPUISD::VEC_SRL:
2925   case SPUISD::VEC_SRA:
2926   case SPUISD::ROTBYTES_LEFT: {
2927     SDValue Op1 = N->getOperand(1);
2928
2929     // Kill degenerate vector shifts:
2930     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2931       if (CN->isNullValue()) {
2932         Result = Op0;
2933       }
2934     }
2935     break;
2936   }
2937   case SPUISD::PREFSLOT2VEC: {
2938     switch (Op0.getOpcode()) {
2939     default:
2940       break;
2941     case ISD::ANY_EXTEND:
2942     case ISD::ZERO_EXTEND:
2943     case ISD::SIGN_EXTEND: {
2944       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2945       // <arg>
2946       // but only if the SPUprefslot2vec and <arg> types match.
2947       SDValue Op00 = Op0.getOperand(0);
2948       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2949         SDValue Op000 = Op00.getOperand(0);
2950         if (Op000.getValueType() == NodeVT) {
2951           Result = Op000;
2952         }
2953       }
2954       break;
2955     }
2956     case SPUISD::VEC2PREFSLOT: {
2957       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2958       // <arg>
2959       Result = Op0.getOperand(0);
2960       break;
2961     }
2962     }
2963     break;
2964   }
2965   }
2966
2967   // Otherwise, return unchanged.
2968 #ifndef NDEBUG
2969   if (Result.getNode()) {
2970     DEBUG(cerr << "\nReplace.SPU: ");
2971     DEBUG(N->dump(&DAG));
2972     DEBUG(cerr << "\nWith:        ");
2973     DEBUG(Result.getNode()->dump(&DAG));
2974     DEBUG(cerr << "\n");
2975   }
2976 #endif
2977
2978   return Result;
2979 }
2980
2981 //===----------------------------------------------------------------------===//
2982 // Inline Assembly Support
2983 //===----------------------------------------------------------------------===//
2984
2985 /// getConstraintType - Given a constraint letter, return the type of
2986 /// constraint it is for this target.
2987 SPUTargetLowering::ConstraintType
2988 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2989   if (ConstraintLetter.size() == 1) {
2990     switch (ConstraintLetter[0]) {
2991     default: break;
2992     case 'b':
2993     case 'r':
2994     case 'f':
2995     case 'v':
2996     case 'y':
2997       return C_RegisterClass;
2998     }
2999   }
3000   return TargetLowering::getConstraintType(ConstraintLetter);
3001 }
3002
3003 std::pair<unsigned, const TargetRegisterClass*>
3004 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3005                                                 MVT VT) const
3006 {
3007   if (Constraint.size() == 1) {
3008     // GCC RS6000 Constraint Letters
3009     switch (Constraint[0]) {
3010     case 'b':   // R1-R31
3011     case 'r':   // R0-R31
3012       if (VT == MVT::i64)
3013         return std::make_pair(0U, SPU::R64CRegisterClass);
3014       return std::make_pair(0U, SPU::R32CRegisterClass);
3015     case 'f':
3016       if (VT == MVT::f32)
3017         return std::make_pair(0U, SPU::R32FPRegisterClass);
3018       else if (VT == MVT::f64)
3019         return std::make_pair(0U, SPU::R64FPRegisterClass);
3020       break;
3021     case 'v':
3022       return std::make_pair(0U, SPU::GPRCRegisterClass);
3023     }
3024   }
3025
3026   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3027 }
3028
3029 //! Compute used/known bits for a SPU operand
3030 void
3031 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3032                                                   const APInt &Mask,
3033                                                   APInt &KnownZero,
3034                                                   APInt &KnownOne,
3035                                                   const SelectionDAG &DAG,
3036                                                   unsigned Depth ) const {
3037 #if 0
3038   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3039
3040   switch (Op.getOpcode()) {
3041   default:
3042     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3043     break;
3044   case CALL:
3045   case SHUFB:
3046   case SHUFFLE_MASK:
3047   case CNTB:
3048   case SPUISD::PREFSLOT2VEC:
3049   case SPUISD::LDRESULT:
3050   case SPUISD::VEC2PREFSLOT:
3051   case SPUISD::SHLQUAD_L_BITS:
3052   case SPUISD::SHLQUAD_L_BYTES:
3053   case SPUISD::VEC_SHL:
3054   case SPUISD::VEC_SRL:
3055   case SPUISD::VEC_SRA:
3056   case SPUISD::VEC_ROTL:
3057   case SPUISD::VEC_ROTR:
3058   case SPUISD::ROTBYTES_LEFT:
3059   case SPUISD::SELECT_MASK:
3060   case SPUISD::SELB:
3061   }
3062 #endif
3063 }
3064
3065 unsigned
3066 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3067                                                    unsigned Depth) const {
3068   switch (Op.getOpcode()) {
3069   default:
3070     return 1;
3071
3072   case ISD::SETCC: {
3073     MVT VT = Op.getValueType();
3074
3075     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3076       VT = MVT::i32;
3077     }
3078     return VT.getSizeInBits();
3079   }
3080   }
3081 }
3082
3083 // LowerAsmOperandForConstraint
3084 void
3085 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3086                                                 char ConstraintLetter,
3087                                                 bool hasMemory,
3088                                                 std::vector<SDValue> &Ops,
3089                                                 SelectionDAG &DAG) const {
3090   // Default, for the time being, to the base class handler
3091   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3092                                                Ops, DAG);
3093 }
3094
3095 /// isLegalAddressImmediate - Return true if the integer value can be used
3096 /// as the offset of the target addressing mode.
3097 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3098                                                 const Type *Ty) const {
3099   // SPU's addresses are 256K:
3100   return (V > -(1 << 18) && V < (1 << 18) - 1);
3101 }
3102
3103 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3104   return false;
3105 }
3106
3107 bool
3108 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3109   // The SPU target isn't yet aware of offsets.
3110   return false;
3111 }