lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDOperand &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDOperand &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 194   setOperationAction(ISD::FREM , MVT::f32, Expand);
 195
 196   // If we're enabling GP optimizations, use hardware square root
 197   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 198   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 199
 200   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 201   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 202
 203   // SPU can do rotate right and left, so legalize it... but customize for i8
 204   // because instructions don't exist.
 205   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 206   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 207   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 208   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 209   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 210   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 211   // SPU has no native version of shift left/right for i8
 212   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 213   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 214   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 215   // And SPU needs custom lowering for shift left/right for i64
 216   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 217   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 218   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 219
 220   // Custom lower i32 multiplications
 221   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 222
 223   // Need to custom handle (some) common i8, i64 math ops
 224   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 225   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 226   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 227   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 228
 229   // SPU does not have BSWAP. It does have i32 support CTLZ.
 230   // CTPOP has to be custom lowered.
 231   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 232   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 233
 234   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 235   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 236   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 237   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 238
 239   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 240   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 241
 242   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 243
 244   // SPU has a version of select that implements (a&~c)|(b&c), just like
 245   // select ought to work:
 246   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 247   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 248   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 249   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 250   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 251
 252   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 253   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 254   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 255   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 256   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 257
 258   // Zero extension and sign extension for i64 have to be
 259   // custom legalized
 260   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 261   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 262   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 263
 264   // SPU has a legal FP -> signed INT instruction
 265   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 266   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 267   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 268   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 269
 270   // FDIV on SPU requires custom lowering
 271   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 272   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 273
 274   // SPU has [U|S]INT_TO_FP
 275   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 276   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 277   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 278   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 279   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 280   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 281   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 282   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 283
 284   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 285   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 286   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 287   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 288
 289   // We cannot sextinreg(i1).  Expand to shifts.
 290   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 291
 292   // Support label based line numbers.
 293   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 294   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 295
 296   // We want to legalize GlobalAddress and ConstantPool nodes into the
 297   // appropriate instructions to materialize the address.
 298   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 299        ++sctype) {
 300     MVT VT = (MVT::SimpleValueType)sctype;
 301
 302     setOperationAction(ISD::GlobalAddress, VT, Custom);
 303     setOperationAction(ISD::ConstantPool,  VT, Custom);
 304     setOperationAction(ISD::JumpTable,     VT, Custom);
 305   }
 306
 307   // RET must be custom lowered, to meet ABI requirements
 308   setOperationAction(ISD::RET,           MVT::Other, Custom);
 309
 310   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 311   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 312
 313   // Use the default implementation.
 314   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 315   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 316   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 317   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 318   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 319   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 320   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 321
 322   // Cell SPU has instructions for converting between i64 and fp.
 323   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 324   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 325
 326   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 327   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 328
 329   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 330   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 331
 332   // First set operation action for all vector types to expand. Then we
 333   // will selectively turn on ones that can be effectively codegen'd.
 334   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 335   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 336   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 337   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 338   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 339   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 340
 341   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 342        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 343     MVT VT = (MVT::SimpleValueType)i;
 344
 345     // add/sub are legal for all supported vector VT's.
 346     setOperationAction(ISD::ADD , VT, Legal);
 347     setOperationAction(ISD::SUB , VT, Legal);
 348     // mul has to be custom lowered.
 349     setOperationAction(ISD::MUL , VT, Custom);
 350
 351     setOperationAction(ISD::AND   , VT, Legal);
 352     setOperationAction(ISD::OR    , VT, Legal);
 353     setOperationAction(ISD::XOR   , VT, Legal);
 354     setOperationAction(ISD::LOAD  , VT, Legal);
 355     setOperationAction(ISD::SELECT, VT, Legal);
 356     setOperationAction(ISD::STORE,  VT, Legal);
 357
 358     // These operations need to be expanded:
 359     setOperationAction(ISD::SDIV, VT, Expand);
 360     setOperationAction(ISD::SREM, VT, Expand);
 361     setOperationAction(ISD::UDIV, VT, Expand);
 362     setOperationAction(ISD::UREM, VT, Expand);
 363     setOperationAction(ISD::FDIV, VT, Custom);
 364
 365     // Custom lower build_vector, constant pool spills, insert and
 366     // extract vector elements:
 367     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 368     setOperationAction(ISD::ConstantPool, VT, Custom);
 369     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 370     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 371     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 372     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 373   }
 374
 375   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 376   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 377   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 378   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 379   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 380
 381   setShiftAmountType(MVT::i32);
 382   setSetCCResultContents(ZeroOrOneSetCCResult);
 383
 384   setStackPointerRegisterToSaveRestore(SPU::R1);
 385
 386   // We have target-specific dag combine patterns for the following nodes:
 387   setTargetDAGCombine(ISD::ADD);
 388   setTargetDAGCombine(ISD::ZERO_EXTEND);
 389   setTargetDAGCombine(ISD::SIGN_EXTEND);
 390   setTargetDAGCombine(ISD::ANY_EXTEND);
 391
 392   computeRegisterProperties();
 393 }
 394
 395 const char *
 396 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 397 {
 398   if (node_names.empty()) {
 399     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 400     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 401     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 402     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 403     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 404     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 405     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 406     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 407     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 408     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 409     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 410     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 411     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 412     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 413     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 414     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 415     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 416     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 417     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 418     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 419     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 420     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 421     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 422     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 423     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 424     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 425     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 426     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 427     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 428     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 429       "SPUISD::ROTQUAD_RZ_BYTES";
 430     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 431       "SPUISD::ROTQUAD_RZ_BITS";
 432     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 433       "SPUISD::ROTBYTES_RIGHT_S";
 434     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 435     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 436       "SPUISD::ROTBYTES_LEFT_CHAINED";
 437     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 438       "SPUISD::ROTBYTES_LEFT_BITS";
 439     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 440     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 441     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 442     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 443     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 444     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 445     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 446     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 447     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 448   }
 449
 450   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 451
 452   return ((i != node_names.end()) ? i->second : 0);
 453 }
 454
 455 MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
 456   MVT VT = Op.getValueType();
 457   if (VT.isInteger())
 458     return VT;
 459   else
 460     return MVT::i32;
 461 }
 462
 463 //===----------------------------------------------------------------------===//
 464 // Calling convention code:
 465 //===----------------------------------------------------------------------===//
 466
 467 #include "SPUGenCallingConv.inc"
 468
 469 //===----------------------------------------------------------------------===//
 470 //  LowerOperation implementation
 471 //===----------------------------------------------------------------------===//
 472
 473 /// Aligned load common code for CellSPU
 474 /*!
 475   \param[in] Op The SelectionDAG load or store operand
 476   \param[in] DAG The selection DAG
 477   \param[in] ST CellSPU subtarget information structure
 478   \param[in,out] alignment Caller initializes this to the load or store node's
 479   value from getAlignment(), may be updated while generating the aligned load
 480   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 481   offset (divisible by 16, modulo 16 == 0)
 482   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 483   offset of the preferred slot (modulo 16 != 0)
 484   \param[in,out] VT Caller initializes this value type to the the load or store
 485   node's loaded or stored value type; may be updated if an i1-extended load or
 486   store.
 487   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 488   otherwise false. Can help to determine if the chunk needs to be rotated.
 489
 490  Both load and store lowering load a block of data aligned on a 16-byte
 491  boundary. This is the common aligned load code shared between both.
 492  */
 493 static SDOperand
 494 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 495             LSBaseSDNode *LSN,
 496             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 497             MVT &VT, bool &was16aligned)
 498 {
 499   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 500   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 501   SDOperand basePtr = LSN->getBasePtr();
 502   SDOperand chain = LSN->getChain();
 503
 504   if (basePtr.getOpcode() == ISD::ADD) {
 505     SDOperand Op1 = basePtr.Val->getOperand(1);
 506
 507     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 508       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 509
 510       alignOffs = (int) CN->getValue();
 511       prefSlotOffs = (int) (alignOffs & 0xf);
 512
 513       // Adjust the rotation amount to ensure that the final result ends up in
 514       // the preferred slot:
 515       prefSlotOffs -= vtm->prefslot_byte;
 516       basePtr = basePtr.getOperand(0);
 517
 518       // Loading from memory, can we adjust alignment?
 519       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 520         SDOperand APtr = basePtr.getOperand(0);
 521         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 522           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 523           alignment = GSDN->getGlobal()->getAlignment();
 524         }
 525       }
 526     } else {
 527       alignOffs = 0;
 528       prefSlotOffs = -vtm->prefslot_byte;
 529     }
 530   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 531     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 532     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 533     prefSlotOffs = (int) (alignOffs & 0xf);
 534     prefSlotOffs -= vtm->prefslot_byte;
 535     basePtr = DAG.getRegister(SPU::R1, VT);
 536   } else {
 537     alignOffs = 0;
 538     prefSlotOffs = -vtm->prefslot_byte;
 539   }
 540
 541   if (alignment == 16) {
 542     // Realign the base pointer as a D-Form address:
 543     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 544       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 545                             basePtr,
 546                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 547     }
 548
 549     // Emit the vector load:
 550     was16aligned = true;
 551     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 552                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 553                        LSN->isVolatile(), 16);
 554   }
 555
 556   // Unaligned load or we're using the "large memory" model, which means that
 557   // we have to be very pessimistic:
 558   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 559     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 560   }
 561
 562   // Add the offset
 563   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 564                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 565   was16aligned = false;
 566   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 567                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 568                      LSN->isVolatile(), 16);
 569 }
 570
 571 /// Custom lower loads for CellSPU
 572 /*!
 573  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 574  within a 16-byte block, we have to rotate to extract the requested element.
 575  */
 576 static SDOperand
 577 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 578   LoadSDNode *LN = cast<LoadSDNode>(Op);
 579   SDOperand the_chain = LN->getChain();
 580   MVT VT = LN->getMemoryVT();
 581   MVT OpVT = Op.Val->getValueType(0);
 582   ISD::LoadExtType ExtType = LN->getExtensionType();
 583   unsigned alignment = LN->getAlignment();
 584   SDOperand Ops[8];
 585
 586   switch (LN->getAddressingMode()) {
 587   case ISD::UNINDEXED: {
 588     int offset, rotamt;
 589     bool was16aligned;
 590     SDOperand result =
 591       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 592
 593     if (result.Val == 0)
 594       return result;
 595
 596     the_chain = result.getValue(1);
 597     // Rotate the chunk if necessary
 598     if (rotamt < 0)
 599       rotamt += 16;
 600     if (rotamt != 0 || !was16aligned) {
 601       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 602
 603       Ops[0] = the_chain;
 604       Ops[1] = result;
 605       if (was16aligned) {
 606         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 607       } else {
 608         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 609         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 610         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 611                              DAG.getConstant(rotamt, PtrVT));
 612       }
 613
 614       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 615       the_chain = result.getValue(1);
 616     }
 617
 618     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 619       SDVTList scalarvts;
 620       MVT vecVT = MVT::v16i8;
 621
 622       // Convert the loaded v16i8 vector to the appropriate vector type
 623       // specified by the operand:
 624       if (OpVT == VT) {
 625         if (VT != MVT::i1)
 626           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 627       } else
 628         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 629
 630       Ops[0] = the_chain;
 631       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 632       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 633       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 634       the_chain = result.getValue(1);
 635     } else {
 636       // Handle the sign and zero-extending loads for i1 and i8:
 637       unsigned NewOpC;
 638
 639       if (ExtType == ISD::SEXTLOAD) {
 640         NewOpC = (OpVT == MVT::i1
 641                   ? SPUISD::EXTRACT_I1_SEXT
 642                   : SPUISD::EXTRACT_I8_SEXT);
 643       } else {
 644         assert(ExtType == ISD::ZEXTLOAD);
 645         NewOpC = (OpVT == MVT::i1
 646                   ? SPUISD::EXTRACT_I1_ZEXT
 647                   : SPUISD::EXTRACT_I8_ZEXT);
 648       }
 649
 650       result = DAG.getNode(NewOpC, OpVT, result);
 651     }
 652
 653     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 654     SDOperand retops[2] = {
 655       result,
 656       the_chain
 657     };
 658
 659     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 660                          retops, sizeof(retops) / sizeof(retops[0]));
 661     return result;
 662   }
 663   case ISD::PRE_INC:
 664   case ISD::PRE_DEC:
 665   case ISD::POST_INC:
 666   case ISD::POST_DEC:
 667   case ISD::LAST_INDEXED_MODE:
 668     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 669             "UNINDEXED\n";
 670     cerr << (unsigned) LN->getAddressingMode() << "\n";
 671     abort();
 672     /*NOTREACHED*/
 673   }
 674
 675   return SDOperand();
 676 }
 677
 678 /// Custom lower stores for CellSPU
 679 /*!
 680  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 681  within a 16-byte block, we have to generate a shuffle to insert the
 682  requested element into its place, then store the resulting block.
 683  */
 684 static SDOperand
 685 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 686   StoreSDNode *SN = cast<StoreSDNode>(Op);
 687   SDOperand Value = SN->getValue();
 688   MVT VT = Value.getValueType();
 689   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 690   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 691   unsigned alignment = SN->getAlignment();
 692
 693   switch (SN->getAddressingMode()) {
 694   case ISD::UNINDEXED: {
 695     int chunk_offset, slot_offset;
 696     bool was16aligned;
 697
 698     // The vector type we really want to load from the 16-byte chunk, except
 699     // in the case of MVT::i1, which has to be v16i8.
 700     MVT vecVT, stVecVT = MVT::v16i8;
 701
 702     if (StVT != MVT::i1)
 703       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 704     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 705
 706     SDOperand alignLoadVec =
 707       AlignedLoad(Op, DAG, ST, SN, alignment,
 708                   chunk_offset, slot_offset, VT, was16aligned);
 709
 710     if (alignLoadVec.Val == 0)
 711       return alignLoadVec;
 712
 713     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 714     SDOperand basePtr = LN->getBasePtr();
 715     SDOperand the_chain = alignLoadVec.getValue(1);
 716     SDOperand theValue = SN->getValue();
 717     SDOperand result;
 718
 719     if (StVT != VT
 720         && (theValue.getOpcode() == ISD::AssertZext
 721             || theValue.getOpcode() == ISD::AssertSext)) {
 722       // Drill down and get the value for zero- and sign-extended
 723       // quantities
 724       theValue = theValue.getOperand(0);
 725     }
 726
 727     chunk_offset &= 0xf;
 728
 729     SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 730     SDOperand insertEltPtr;
 731     SDOperand insertEltOp;
 732
 733     // If the base pointer is already a D-form address, then just create
 734     // a new D-form address with a slot offset and the orignal base pointer.
 735     // Otherwise generate a D-form address with the slot offset relative
 736     // to the stack pointer, which is always aligned.
 737     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 738     DEBUG(basePtr.Val->dump(&DAG));
 739     DEBUG(cerr << "\n");
 740
 741     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 742         (basePtr.getOpcode() == ISD::ADD
 743          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 744       insertEltPtr = basePtr;
 745     } else {
 746       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 747     }
 748
 749     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 750     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 751                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 752                          alignLoadVec,
 753                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 754
 755     result = DAG.getStore(the_chain, result, basePtr,
 756                           LN->getSrcValue(), LN->getSrcValueOffset(),
 757                           LN->isVolatile(), LN->getAlignment());
 758
 759     return result;
 760     /*UNREACHED*/
 761   }
 762   case ISD::PRE_INC:
 763   case ISD::PRE_DEC:
 764   case ISD::POST_INC:
 765   case ISD::POST_DEC:
 766   case ISD::LAST_INDEXED_MODE:
 767     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 768             "UNINDEXED\n";
 769     cerr << (unsigned) SN->getAddressingMode() << "\n";
 770     abort();
 771     /*NOTREACHED*/
 772   }
 773
 774   return SDOperand();
 775 }
 776
 777 /// Generate the address of a constant pool entry.
 778 static SDOperand
 779 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 780   MVT PtrVT = Op.getValueType();
 781   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 782   Constant *C = CP->getConstVal();
 783   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 784   SDOperand Zero = DAG.getConstant(0, PtrVT);
 785   const TargetMachine &TM = DAG.getTarget();
 786
 787   if (TM.getRelocationModel() == Reloc::Static) {
 788     if (!ST->usingLargeMem()) {
 789       // Just return the SDOperand with the constant pool address in it.
 790       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 791     } else {
 792       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 793       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 794       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 795     }
 796   }
 797
 798   assert(0 &&
 799          "LowerConstantPool: Relocation model other than static not supported.");
 800   return SDOperand();
 801 }
 802
 803 static SDOperand
 804 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 805   MVT PtrVT = Op.getValueType();
 806   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 807   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 808   SDOperand Zero = DAG.getConstant(0, PtrVT);
 809   const TargetMachine &TM = DAG.getTarget();
 810
 811   if (TM.getRelocationModel() == Reloc::Static) {
 812     if (!ST->usingLargeMem()) {
 813       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 814     } else {
 815       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 816       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 817       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 818     }
 819   }
 820
 821   assert(0 &&
 822          "LowerJumpTable: Relocation model other than static not supported.");
 823   return SDOperand();
 824 }
 825
 826 static SDOperand
 827 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 828   MVT PtrVT = Op.getValueType();
 829   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 830   GlobalValue *GV = GSDN->getGlobal();
 831   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 832   const TargetMachine &TM = DAG.getTarget();
 833   SDOperand Zero = DAG.getConstant(0, PtrVT);
 834
 835   if (TM.getRelocationModel() == Reloc::Static) {
 836     if (!ST->usingLargeMem()) {
 837       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 838     } else {
 839       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 840       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 841       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 842     }
 843   } else {
 844     cerr << "LowerGlobalAddress: Relocation model other than static not "
 845          << "supported.\n";
 846     abort();
 847     /*NOTREACHED*/
 848   }
 849
 850   return SDOperand();
 851 }
 852
 853 //! Custom lower i64 integer constants
 854 /*!
 855  This code inserts all of the necessary juggling that needs to occur to load
 856  a 64-bit constant into a register.
 857  */
 858 static SDOperand
 859 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 860   MVT VT = Op.getValueType();
 861   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 862
 863   if (VT == MVT::i64) {
 864     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 865     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 866                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 867   } else {
 868     cerr << "LowerConstant: unhandled constant type "
 869          << VT.getMVTString()
 870          << "\n";
 871     abort();
 872     /*NOTREACHED*/
 873   }
 874
 875   return SDOperand();
 876 }
 877
 878 //! Custom lower double precision floating point constants
 879 static SDOperand
 880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 881   MVT VT = Op.getValueType();
 882   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 883
 884   assert((FP != 0) &&
 885          "LowerConstantFP: Node is not ConstantFPSDNode");
 886
 887   if (VT == MVT::f64) {
 888     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 889     return DAG.getNode(ISD::BIT_CONVERT, VT,
 890                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 891   }
 892
 893   return SDOperand();
 894 }
 895
 896 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 897 static SDOperand
 898 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
 899 {
 900   SDOperand Cond = Op.getOperand(1);
 901   MVT CondVT = Cond.getValueType();
 902   MVT CondNVT;
 903
 904   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 905     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 906     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 907                       Op.getOperand(0),
 908                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 909                       Op.getOperand(2));
 910   } else
 911     return SDOperand();                // Unchanged
 912 }
 913
 914 static SDOperand
 915 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 916 {
 917   MachineFunction &MF = DAG.getMachineFunction();
 918   MachineFrameInfo *MFI = MF.getFrameInfo();
 919   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 920   SmallVector<SDOperand, 8> ArgValues;
 921   SDOperand Root = Op.getOperand(0);
 922   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 923
 924   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 925   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 926
 927   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 928   unsigned ArgRegIdx = 0;
 929   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 930
 931   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 932
 933   // Add DAG nodes to load the arguments or copy them out of registers.
 934   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 935     SDOperand ArgVal;
 936     bool needsLoad = false;
 937     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 938     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 939
 940     switch (ObjectVT.getSimpleVT()) {
 941     default: {
 942       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 943            << ObjectVT.getMVTString()
 944            << "\n";
 945       abort();
 946     }
 947     case MVT::i8:
 948       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 949         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 950         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 951         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 952         ++ArgRegIdx;
 953       } else {
 954         needsLoad = true;
 955       }
 956       break;
 957     case MVT::i16:
 958       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 959         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 960         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 961         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 962         ++ArgRegIdx;
 963       } else {
 964         needsLoad = true;
 965       }
 966       break;
 967     case MVT::i32:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i64:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::f32:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::f64:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::v2f64:
1008     case MVT::v4f32:
1009     case MVT::v2i64:
1010     case MVT::v4i32:
1011     case MVT::v8i16:
1012     case MVT::v16i8:
1013       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1014         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1015         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1016         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1017         ++ArgRegIdx;
1018       } else {
1019         needsLoad = true;
1020       }
1021       break;
1022     }
1023
1024     // We need to load the argument to a virtual register if we determined above
1025     // that we ran out of physical registers of the appropriate type
1026     if (needsLoad) {
1027       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1028       SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1029       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1030       ArgOffset += StackSlotSize;
1031     }
1032
1033     ArgValues.push_back(ArgVal);
1034   }
1035
1036   // If the function takes variable number of arguments, make a frame index for
1037   // the start of the first vararg value... for expansion of llvm.va_start.
1038   if (isVarArg) {
1039     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1040                                                ArgOffset);
1041     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1042     // If this function is vararg, store any remaining integer argument regs to
1043     // their spots on the stack so that they may be loaded by deferencing the
1044     // result of va_next.
1045     SmallVector<SDOperand, 8> MemOps;
1046     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1047       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1048       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1049       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1050       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1051       MemOps.push_back(Store);
1052       // Increment the address by four for the next argument to store
1053       SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1054       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1055     }
1056     if (!MemOps.empty())
1057       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1058   }
1059
1060   ArgValues.push_back(Root);
1061
1062   // Return the new list of results.
1063   return DAG.getMergeValues(Op.Val->getVTList(), &ArgValues[0],
1064                             ArgValues.size());
1065 }
1066
1067 /// isLSAAddress - Return the immediate to use if the specified
1068 /// value is representable as a LSA address.
1069 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1070   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1071   if (!C) return 0;
1072
1073   int Addr = C->getValue();
1074   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1075       (Addr << 14 >> 14) != Addr)
1076     return 0;  // Top 14 bits have to be sext of immediate.
1077
1078   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1079 }
1080
1081 static
1082 SDOperand
1083 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1084   SDOperand Chain = Op.getOperand(0);
1085 #if 0
1086   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1087   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1088 #endif
1089   SDOperand Callee    = Op.getOperand(4);
1090   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1091   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1092   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1093   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1094
1095   // Handy pointer type
1096   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1097
1098   // Accumulate how many bytes are to be pushed on the stack, including the
1099   // linkage area, and parameter passing area.  According to the SPU ABI,
1100   // we minimally need space for [LR] and [SP]
1101   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1102
1103   // Set up a copy of the stack pointer for use loading and storing any
1104   // arguments that may not fit in the registers available for argument
1105   // passing.
1106   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1107
1108   // Figure out which arguments are going to go in registers, and which in
1109   // memory.
1110   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1111   unsigned ArgRegIdx = 0;
1112
1113   // Keep track of registers passing arguments
1114   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1115   // And the arguments passed on the stack
1116   SmallVector<SDOperand, 8> MemOpChains;
1117
1118   for (unsigned i = 0; i != NumOps; ++i) {
1119     SDOperand Arg = Op.getOperand(5+2*i);
1120
1121     // PtrOff will be used to store the current argument to the stack if a
1122     // register cannot be found for it.
1123     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1124     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1125
1126     switch (Arg.getValueType().getSimpleVT()) {
1127     default: assert(0 && "Unexpected ValueType for argument!");
1128     case MVT::i32:
1129     case MVT::i64:
1130     case MVT::i128:
1131       if (ArgRegIdx != NumArgRegs) {
1132         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1133       } else {
1134         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1135         ArgOffset += StackSlotSize;
1136       }
1137       break;
1138     case MVT::f32:
1139     case MVT::f64:
1140       if (ArgRegIdx != NumArgRegs) {
1141         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1142       } else {
1143         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1144         ArgOffset += StackSlotSize;
1145       }
1146       break;
1147     case MVT::v4f32:
1148     case MVT::v4i32:
1149     case MVT::v8i16:
1150     case MVT::v16i8:
1151       if (ArgRegIdx != NumArgRegs) {
1152         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1153       } else {
1154         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155         ArgOffset += StackSlotSize;
1156       }
1157       break;
1158     }
1159   }
1160
1161   // Update number of stack bytes actually used, insert a call sequence start
1162   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1163   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1164
1165   if (!MemOpChains.empty()) {
1166     // Adjust the stack pointer for the stack arguments.
1167     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1168                         &MemOpChains[0], MemOpChains.size());
1169   }
1170
1171   // Build a sequence of copy-to-reg nodes chained together with token chain
1172   // and flag operands which copy the outgoing args into the appropriate regs.
1173   SDOperand InFlag;
1174   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1175     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1176                              InFlag);
1177     InFlag = Chain.getValue(1);
1178   }
1179
1180   SmallVector<SDOperand, 8> Ops;
1181   unsigned CallOpc = SPUISD::CALL;
1182
1183   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1184   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1185   // node so that legalize doesn't hack it.
1186   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1187     GlobalValue *GV = G->getGlobal();
1188     MVT CalleeVT = Callee.getValueType();
1189     SDOperand Zero = DAG.getConstant(0, PtrVT);
1190     SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1191
1192     if (!ST->usingLargeMem()) {
1193       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1194       // style calls, otherwise, external symbols are BRASL calls. This assumes
1195       // that declared/defined symbols are in the same compilation unit and can
1196       // be reached through PC-relative jumps.
1197       //
1198       // NOTE:
1199       // This may be an unsafe assumption for JIT and really large compilation
1200       // units.
1201       if (GV->isDeclaration()) {
1202         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1203       } else {
1204         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1205       }
1206     } else {
1207       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1208       // address pairs:
1209       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1210     }
1211   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1212     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1213   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1214     // If this is an absolute destination address that appears to be a legal
1215     // local store address, use the munged value.
1216     Callee = SDOperand(Dest, 0);
1217   }
1218
1219   Ops.push_back(Chain);
1220   Ops.push_back(Callee);
1221
1222   // Add argument registers to the end of the list so that they are known live
1223   // into the call.
1224   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1225     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1226                                   RegsToPass[i].second.getValueType()));
1227
1228   if (InFlag.Val)
1229     Ops.push_back(InFlag);
1230   // Returns a chain and a flag for retval copy to use.
1231   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1232                       &Ops[0], Ops.size());
1233   InFlag = Chain.getValue(1);
1234
1235   Chain = DAG.getCALLSEQ_END(Chain,
1236                              DAG.getConstant(NumStackBytes, PtrVT),
1237                              DAG.getConstant(0, PtrVT),
1238                              InFlag);
1239   if (Op.Val->getValueType(0) != MVT::Other)
1240     InFlag = Chain.getValue(1);
1241
1242   SDOperand ResultVals[3];
1243   unsigned NumResults = 0;
1244
1245   // If the call has results, copy the values out of the ret val registers.
1246   switch (Op.Val->getValueType(0).getSimpleVT()) {
1247   default: assert(0 && "Unexpected ret value!");
1248   case MVT::Other: break;
1249   case MVT::i32:
1250     if (Op.Val->getValueType(1) == MVT::i32) {
1251       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1252       ResultVals[0] = Chain.getValue(0);
1253       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1254                                  Chain.getValue(2)).getValue(1);
1255       ResultVals[1] = Chain.getValue(0);
1256       NumResults = 2;
1257     } else {
1258       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1259       ResultVals[0] = Chain.getValue(0);
1260       NumResults = 1;
1261     }
1262     break;
1263   case MVT::i64:
1264     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1265     ResultVals[0] = Chain.getValue(0);
1266     NumResults = 1;
1267     break;
1268   case MVT::f32:
1269   case MVT::f64:
1270     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1271                                InFlag).getValue(1);
1272     ResultVals[0] = Chain.getValue(0);
1273     NumResults = 1;
1274     break;
1275   case MVT::v2f64:
1276   case MVT::v4f32:
1277   case MVT::v4i32:
1278   case MVT::v8i16:
1279   case MVT::v16i8:
1280     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1281                                    InFlag).getValue(1);
1282     ResultVals[0] = Chain.getValue(0);
1283     NumResults = 1;
1284     break;
1285   }
1286
1287   // If the function returns void, just return the chain.
1288   if (NumResults == 0)
1289     return Chain;
1290
1291   // Otherwise, merge everything together with a MERGE_VALUES node.
1292   ResultVals[NumResults++] = Chain;
1293   SDOperand Res = DAG.getMergeValues(ResultVals, NumResults);
1294   return Res.getValue(Op.ResNo);
1295 }
1296
1297 static SDOperand
1298 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1299   SmallVector<CCValAssign, 16> RVLocs;
1300   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1301   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1302   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1303   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1304
1305   // If this is the first return lowered for this function, add the regs to the
1306   // liveout set for the function.
1307   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1308     for (unsigned i = 0; i != RVLocs.size(); ++i)
1309       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1310   }
1311
1312   SDOperand Chain = Op.getOperand(0);
1313   SDOperand Flag;
1314
1315   // Copy the result values into the output registers.
1316   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1317     CCValAssign &VA = RVLocs[i];
1318     assert(VA.isRegLoc() && "Can only return in registers!");
1319     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1320     Flag = Chain.getValue(1);
1321   }
1322
1323   if (Flag.Val)
1324     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1325   else
1326     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1327 }
1328
1329
1330 //===----------------------------------------------------------------------===//
1331 // Vector related lowering:
1332 //===----------------------------------------------------------------------===//
1333
1334 static ConstantSDNode *
1335 getVecImm(SDNode *N) {
1336   SDOperand OpVal(0, 0);
1337
1338   // Check to see if this buildvec has a single non-undef value in its elements.
1339   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1340     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1341     if (OpVal.Val == 0)
1342       OpVal = N->getOperand(i);
1343     else if (OpVal != N->getOperand(i))
1344       return 0;
1345   }
1346
1347   if (OpVal.Val != 0) {
1348     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1349       return CN;
1350     }
1351   }
1352
1353   return 0; // All UNDEF: use implicit def.; not Constant node
1354 }
1355
1356 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1357 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1358 /// constant
1359 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1360                               MVT ValueType) {
1361   if (ConstantSDNode *CN = getVecImm(N)) {
1362     uint64_t Value = CN->getValue();
1363     if (ValueType == MVT::i64) {
1364       uint64_t UValue = CN->getValue();
1365       uint32_t upper = uint32_t(UValue >> 32);
1366       uint32_t lower = uint32_t(UValue);
1367       if (upper != lower)
1368         return SDOperand();
1369       Value = Value >> 32;
1370     }
1371     if (Value <= 0x3ffff)
1372       return DAG.getConstant(Value, ValueType);
1373   }
1374
1375   return SDOperand();
1376 }
1377
1378 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1379 /// and the value fits into a signed 16-bit constant, and if so, return the
1380 /// constant
1381 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1382                               MVT ValueType) {
1383   if (ConstantSDNode *CN = getVecImm(N)) {
1384     int64_t Value = CN->getSignExtended();
1385     if (ValueType == MVT::i64) {
1386       uint64_t UValue = CN->getValue();
1387       uint32_t upper = uint32_t(UValue >> 32);
1388       uint32_t lower = uint32_t(UValue);
1389       if (upper != lower)
1390         return SDOperand();
1391       Value = Value >> 32;
1392     }
1393     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1394       return DAG.getConstant(Value, ValueType);
1395     }
1396   }
1397
1398   return SDOperand();
1399 }
1400
1401 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1402 /// and the value fits into a signed 10-bit constant, and if so, return the
1403 /// constant
1404 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1405                               MVT ValueType) {
1406   if (ConstantSDNode *CN = getVecImm(N)) {
1407     int64_t Value = CN->getSignExtended();
1408     if (ValueType == MVT::i64) {
1409       uint64_t UValue = CN->getValue();
1410       uint32_t upper = uint32_t(UValue >> 32);
1411       uint32_t lower = uint32_t(UValue);
1412       if (upper != lower)
1413         return SDOperand();
1414       Value = Value >> 32;
1415     }
1416     if (isS10Constant(Value))
1417       return DAG.getConstant(Value, ValueType);
1418   }
1419
1420   return SDOperand();
1421 }
1422
1423 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1424 /// and the value fits into a signed 8-bit constant, and if so, return the
1425 /// constant.
1426 ///
1427 /// @note: The incoming vector is v16i8 because that's the only way we can load
1428 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1429 /// same value.
1430 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1431                              MVT ValueType) {
1432   if (ConstantSDNode *CN = getVecImm(N)) {
1433     int Value = (int) CN->getValue();
1434     if (ValueType == MVT::i16
1435         && Value <= 0xffff                 /* truncated from uint64_t */
1436         && ((short) Value >> 8) == ((short) Value & 0xff))
1437       return DAG.getConstant(Value & 0xff, ValueType);
1438     else if (ValueType == MVT::i8
1439              && (Value & 0xff) == Value)
1440       return DAG.getConstant(Value, ValueType);
1441   }
1442
1443   return SDOperand();
1444 }
1445
1446 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1447 /// and the value fits into a signed 16-bit constant, and if so, return the
1448 /// constant
1449 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1450                                MVT ValueType) {
1451   if (ConstantSDNode *CN = getVecImm(N)) {
1452     uint64_t Value = CN->getValue();
1453     if ((ValueType == MVT::i32
1454           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1455         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1456       return DAG.getConstant(Value >> 16, ValueType);
1457   }
1458
1459   return SDOperand();
1460 }
1461
1462 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1463 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1464   if (ConstantSDNode *CN = getVecImm(N)) {
1465     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1466   }
1467
1468   return SDOperand();
1469 }
1470
1471 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1472 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1473   if (ConstantSDNode *CN = getVecImm(N)) {
1474     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1475   }
1476
1477   return SDOperand();
1478 }
1479
1480 // If this is a vector of constants or undefs, get the bits.  A bit in
1481 // UndefBits is set if the corresponding element of the vector is an
1482 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1483 // zero.   Return true if this is not an array of constants, false if it is.
1484 //
1485 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1486                                        uint64_t UndefBits[2]) {
1487   // Start with zero'd results.
1488   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1489
1490   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1491   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1492     SDOperand OpVal = BV->getOperand(i);
1493
1494     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1495     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1496
1497     uint64_t EltBits = 0;
1498     if (OpVal.getOpcode() == ISD::UNDEF) {
1499       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1500       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1501       continue;
1502     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1503       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1504     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1505       const APFloat &apf = CN->getValueAPF();
1506       EltBits = (CN->getValueType(0) == MVT::f32
1507                  ? FloatToBits(apf.convertToFloat())
1508                  : DoubleToBits(apf.convertToDouble()));
1509     } else {
1510       // Nonconstant element.
1511       return true;
1512     }
1513
1514     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1515   }
1516
1517   //printf("%llx %llx  %llx %llx\n",
1518   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1519   return false;
1520 }
1521
1522 /// If this is a splat (repetition) of a value across the whole vector, return
1523 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1524 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1525 /// SplatSize = 1 byte.
1526 static bool isConstantSplat(const uint64_t Bits128[2],
1527                             const uint64_t Undef128[2],
1528                             int MinSplatBits,
1529                             uint64_t &SplatBits, uint64_t &SplatUndef,
1530                             int &SplatSize) {
1531   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1532   // the same as the lower 64-bits, ignoring undefs.
1533   uint64_t Bits64  = Bits128[0] | Bits128[1];
1534   uint64_t Undef64 = Undef128[0] & Undef128[1];
1535   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1536   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1537   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1538   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1539
1540   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1541     if (MinSplatBits < 64) {
1542
1543       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1544       // undefs.
1545       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1546         if (MinSplatBits < 32) {
1547
1548           // If the top 16-bits are different than the lower 16-bits, ignoring
1549           // undefs, we have an i32 splat.
1550           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1551             if (MinSplatBits < 16) {
1552               // If the top 8-bits are different than the lower 8-bits, ignoring
1553               // undefs, we have an i16 splat.
1554               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1555                 // Otherwise, we have an 8-bit splat.
1556                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1557                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1558                 SplatSize = 1;
1559                 return true;
1560               }
1561             } else {
1562               SplatBits = Bits16;
1563               SplatUndef = Undef16;
1564               SplatSize = 2;
1565               return true;
1566             }
1567           }
1568         } else {
1569           SplatBits = Bits32;
1570           SplatUndef = Undef32;
1571           SplatSize = 4;
1572           return true;
1573         }
1574       }
1575     } else {
1576       SplatBits = Bits128[0];
1577       SplatUndef = Undef128[0];
1578       SplatSize = 8;
1579       return true;
1580     }
1581   }
1582
1583   return false;  // Can't be a splat if two pieces don't match.
1584 }
1585
1586 // If this is a case we can't handle, return null and let the default
1587 // expansion code take care of it.  If we CAN select this case, and if it
1588 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1589 // this case more efficiently than a constant pool load, lower it to the
1590 // sequence of ops that should be used.
1591 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1592   MVT VT = Op.getValueType();
1593   // If this is a vector of constants or undefs, get the bits.  A bit in
1594   // UndefBits is set if the corresponding element of the vector is an
1595   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1596   // zero.
1597   uint64_t VectorBits[2];
1598   uint64_t UndefBits[2];
1599   uint64_t SplatBits, SplatUndef;
1600   int SplatSize;
1601   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1602       || !isConstantSplat(VectorBits, UndefBits,
1603                           VT.getVectorElementType().getSizeInBits(),
1604                           SplatBits, SplatUndef, SplatSize))
1605     return SDOperand();   // Not a constant vector, not a splat.
1606
1607   switch (VT.getSimpleVT()) {
1608   default:
1609   case MVT::v4f32: {
1610     uint32_t Value32 = SplatBits;
1611     assert(SplatSize == 4
1612            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1613     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1614     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1615     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1616                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1617     break;
1618   }
1619   case MVT::v2f64: {
1620     uint64_t f64val = SplatBits;
1621     assert(SplatSize == 8
1622            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1623     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1624     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1625     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1626                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1627     break;
1628   }
1629   case MVT::v16i8: {
1630    // 8-bit constants have to be expanded to 16-bits
1631    unsigned short Value16 = SplatBits | (SplatBits << 8);
1632    SDOperand Ops[8];
1633    for (int i = 0; i < 8; ++i)
1634      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1635    return DAG.getNode(ISD::BIT_CONVERT, VT,
1636                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1637   }
1638   case MVT::v8i16: {
1639     unsigned short Value16;
1640     if (SplatSize == 2)
1641       Value16 = (unsigned short) (SplatBits & 0xffff);
1642     else
1643       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1644     SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1645     SDOperand Ops[8];
1646     for (int i = 0; i < 8; ++i) Ops[i] = T;
1647     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1648   }
1649   case MVT::v4i32: {
1650     unsigned int Value = SplatBits;
1651     SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1652     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1653   }
1654   case MVT::v2i64: {
1655     uint64_t val = SplatBits;
1656     uint32_t upper = uint32_t(val >> 32);
1657     uint32_t lower = uint32_t(val);
1658
1659     if (upper == lower) {
1660       // Magic constant that can be matched by IL, ILA, et. al.
1661       SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1662       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1663     } else {
1664       SDOperand LO32;
1665       SDOperand HI32;
1666       SmallVector<SDOperand, 16> ShufBytes;
1667       SDOperand Result;
1668       bool upper_special, lower_special;
1669
1670       // NOTE: This code creates common-case shuffle masks that can be easily
1671       // detected as common expressions. It is not attempting to create highly
1672       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1673
1674       // Detect if the upper or lower half is a special shuffle mask pattern:
1675       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1677
1678       // Create lower vector if not a special pattern
1679       if (!lower_special) {
1680         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1681         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1682                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1683                                        LO32C, LO32C, LO32C, LO32C));
1684       }
1685
1686       // Create upper vector if not a special pattern
1687       if (!upper_special) {
1688         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1689         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1690                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1691                                        HI32C, HI32C, HI32C, HI32C));
1692       }
1693
1694       // If either upper or lower are special, then the two input operands are
1695       // the same (basically, one of them is a "don't care")
1696       if (lower_special)
1697         LO32 = HI32;
1698       if (upper_special)
1699         HI32 = LO32;
1700       if (lower_special && upper_special) {
1701         // Unhappy situation... both upper and lower are special, so punt with
1702         // a target constant:
1703         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1704         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1705                                   Zero, Zero);
1706       }
1707
1708       for (int i = 0; i < 4; ++i) {
1709         uint64_t val = 0;
1710         for (int j = 0; j < 4; ++j) {
1711           SDOperand V;
1712           bool process_upper, process_lower;
1713           val <<= 8;
1714           process_upper = (upper_special && (i & 1) == 0);
1715           process_lower = (lower_special && (i & 1) == 1);
1716
1717           if (process_upper || process_lower) {
1718             if ((process_upper && upper == 0)
1719                 || (process_lower && lower == 0))
1720               val |= 0x80;
1721             else if ((process_upper && upper == 0xffffffff)
1722                      || (process_lower && lower == 0xffffffff))
1723               val |= 0xc0;
1724             else if ((process_upper && upper == 0x80000000)
1725                      || (process_lower && lower == 0x80000000))
1726               val |= (j == 0 ? 0xe0 : 0x80);
1727           } else
1728             val |= i * 4 + j + ((i & 1) * 16);
1729         }
1730
1731         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1732       }
1733
1734       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1735                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1736                                      &ShufBytes[0], ShufBytes.size()));
1737     }
1738   }
1739   }
1740
1741   return SDOperand();
1742 }
1743
1744 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1745 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1746 /// permutation vector, V3, is monotonically increasing with one "exception"
1747 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1748 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1749 /// In either case, the net result is going to eventually invoke SHUFB to
1750 /// permute/shuffle the bytes from V1 and V2.
1751 /// \note
1752 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1753 /// control word for byte/halfword/word insertion. This takes care of a single
1754 /// element move from V2 into V1.
1755 /// \note
1756 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1757 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1758   SDOperand V1 = Op.getOperand(0);
1759   SDOperand V2 = Op.getOperand(1);
1760   SDOperand PermMask = Op.getOperand(2);
1761
1762   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1763
1764   // If we have a single element being moved from V1 to V2, this can be handled
1765   // using the C*[DX] compute mask instructions, but the vector elements have
1766   // to be monotonically increasing with one exception element.
1767   MVT EltVT = V1.getValueType().getVectorElementType();
1768   unsigned EltsFromV2 = 0;
1769   unsigned V2Elt = 0;
1770   unsigned V2EltIdx0 = 0;
1771   unsigned CurrElt = 0;
1772   bool monotonic = true;
1773   if (EltVT == MVT::i8)
1774     V2EltIdx0 = 16;
1775   else if (EltVT == MVT::i16)
1776     V2EltIdx0 = 8;
1777   else if (EltVT == MVT::i32)
1778     V2EltIdx0 = 4;
1779   else
1780     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1781
1782   for (unsigned i = 0, e = PermMask.getNumOperands();
1783        EltsFromV2 <= 1 && monotonic && i != e;
1784        ++i) {
1785     unsigned SrcElt;
1786     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1787       SrcElt = 0;
1788     else
1789       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1790
1791     if (SrcElt >= V2EltIdx0) {
1792       ++EltsFromV2;
1793       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1794     } else if (CurrElt != SrcElt) {
1795       monotonic = false;
1796     }
1797
1798     ++CurrElt;
1799   }
1800
1801   if (EltsFromV2 == 1 && monotonic) {
1802     // Compute mask and shuffle
1803     MachineFunction &MF = DAG.getMachineFunction();
1804     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1805     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1806     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1807     // Initialize temporary register to 0
1808     SDOperand InitTempReg =
1809       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1810     // Copy register's contents as index in INSERT_MASK:
1811     SDOperand ShufMaskOp =
1812       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1813                   DAG.getTargetConstant(V2Elt, MVT::i32),
1814                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1815     // Use shuffle mask in SHUFB synthetic instruction:
1816     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1817   } else {
1818     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1819     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1820
1821     SmallVector<SDOperand, 16> ResultMask;
1822     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1823       unsigned SrcElt;
1824       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1825         SrcElt = 0;
1826       else
1827         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1828
1829       for (unsigned j = 0; j < BytesPerElement; ++j) {
1830         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1831                                              MVT::i8));
1832       }
1833     }
1834
1835     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1836                                       &ResultMask[0], ResultMask.size());
1837     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1838   }
1839 }
1840
1841 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1842   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1843
1844   if (Op0.Val->getOpcode() == ISD::Constant) {
1845     // For a constant, build the appropriate constant vector, which will
1846     // eventually simplify to a vector register load.
1847
1848     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1849     SmallVector<SDOperand, 16> ConstVecValues;
1850     MVT VT;
1851     size_t n_copies;
1852
1853     // Create a constant vector:
1854     switch (Op.getValueType().getSimpleVT()) {
1855     default: assert(0 && "Unexpected constant value type in "
1856                          "LowerSCALAR_TO_VECTOR");
1857     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1858     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1859     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1860     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1861     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1862     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1863     }
1864
1865     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1866     for (size_t j = 0; j < n_copies; ++j)
1867       ConstVecValues.push_back(CValue);
1868
1869     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1870                        &ConstVecValues[0], ConstVecValues.size());
1871   } else {
1872     // Otherwise, copy the value from one register to another:
1873     switch (Op0.getValueType().getSimpleVT()) {
1874     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1875     case MVT::i8:
1876     case MVT::i16:
1877     case MVT::i32:
1878     case MVT::i64:
1879     case MVT::f32:
1880     case MVT::f64:
1881       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1882     }
1883   }
1884
1885   return SDOperand();
1886 }
1887
1888 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1889   switch (Op.getValueType().getSimpleVT()) {
1890   default:
1891     cerr << "CellSPU: Unknown vector multiplication, got "
1892          << Op.getValueType().getMVTString()
1893          << "\n";
1894     abort();
1895     /*NOTREACHED*/
1896
1897   case MVT::v4i32: {
1898     SDOperand rA = Op.getOperand(0);
1899     SDOperand rB = Op.getOperand(1);
1900     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1901     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1902     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1903     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1904
1905     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1906     break;
1907   }
1908
1909   // Multiply two v8i16 vectors (pipeline friendly version):
1910   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1911   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1912   // c) Use SELB to select upper and lower halves from the intermediate results
1913   //
1914   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1915   // dual-issue. This code does manage to do this, even if it's a little on
1916   // the wacky side
1917   case MVT::v8i16: {
1918     MachineFunction &MF = DAG.getMachineFunction();
1919     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1920     SDOperand Chain = Op.getOperand(0);
1921     SDOperand rA = Op.getOperand(0);
1922     SDOperand rB = Op.getOperand(1);
1923     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1924     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925
1926     SDOperand FSMBOp =
1927       DAG.getCopyToReg(Chain, FSMBIreg,
1928                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1929                                    DAG.getConstant(0xcccc, MVT::i16)));
1930
1931     SDOperand HHProd =
1932       DAG.getCopyToReg(FSMBOp, HiProdReg,
1933                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1934
1935     SDOperand HHProd_v4i32 =
1936       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1937                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1938
1939     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1940                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1941                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1942                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1943                                                HHProd_v4i32,
1944                                                DAG.getConstant(16, MVT::i16))),
1945                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1946   }
1947
1948   // This M00sE is N@stI! (apologies to Monty Python)
1949   //
1950   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1951   // is to break it all apart, sign extend, and reassemble the various
1952   // intermediate products.
1953   case MVT::v16i8: {
1954     SDOperand rA = Op.getOperand(0);
1955     SDOperand rB = Op.getOperand(1);
1956     SDOperand c8 = DAG.getConstant(8, MVT::i32);
1957     SDOperand c16 = DAG.getConstant(16, MVT::i32);
1958
1959     SDOperand LLProd =
1960       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1961                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1962                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1963
1964     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1965
1966     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1967
1968     SDOperand LHProd =
1969       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1970                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1971
1972     SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1973                                      DAG.getConstant(0x2222, MVT::i16));
1974
1975     SDOperand LoProdParts =
1976       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1977                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1978                               LLProd, LHProd, FSMBmask));
1979
1980     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1981
1982     SDOperand LoProd =
1983       DAG.getNode(ISD::AND, MVT::v4i32,
1984                   LoProdParts,
1985                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1986                               LoProdMask, LoProdMask,
1987                               LoProdMask, LoProdMask));
1988
1989     SDOperand rAH =
1990       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1991                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1992
1993     SDOperand rBH =
1994       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1995                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1996
1997     SDOperand HLProd =
1998       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1999                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2000                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2001
2002     SDOperand HHProd_1 =
2003       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2004                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2006                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2007                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2008
2009     SDOperand HHProd =
2010       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2011                   HLProd,
2012                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2013                   FSMBmask);
2014
2015     SDOperand HiProd =
2016       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2017
2018     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2019                        DAG.getNode(ISD::OR, MVT::v4i32,
2020                                    LoProd, HiProd));
2021   }
2022   }
2023
2024   return SDOperand();
2025 }
2026
2027 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2028   MachineFunction &MF = DAG.getMachineFunction();
2029   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2030
2031   SDOperand A = Op.getOperand(0);
2032   SDOperand B = Op.getOperand(1);
2033   MVT VT = Op.getValueType();
2034
2035   unsigned VRegBR, VRegC;
2036
2037   if (VT == MVT::f32) {
2038     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2039     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040   } else {
2041     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2042     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043   }
2044   // TODO: make sure we're feeding FPInterp the right arguments
2045   // Right now: fi B, frest(B)
2046
2047   // Computes BRcpl =
2048   // (Floating Interpolate (FP Reciprocal Estimate B))
2049   SDOperand BRcpl =
2050       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2051                        DAG.getNode(SPUISD::FPInterp, VT, B,
2052                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2053
2054   // Computes A * BRcpl and stores in a temporary register
2055   SDOperand AxBRcpl =
2056       DAG.getCopyToReg(BRcpl, VRegC,
2057                  DAG.getNode(ISD::FMUL, VT, A,
2058                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2059   // What's the Chain variable do? It's magic!
2060   // TODO: set Chain = Op(0).getEntryNode()
2061
2062   return DAG.getNode(ISD::FADD, VT,
2063                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2064                 DAG.getNode(ISD::FMUL, VT,
2065                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2066                         DAG.getNode(ISD::FSUB, VT, A,
2067                             DAG.getNode(ISD::FMUL, VT, B,
2068                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2069 }
2070
2071 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2072   MVT VT = Op.getValueType();
2073   SDOperand N = Op.getOperand(0);
2074   SDOperand Elt = Op.getOperand(1);
2075   SDOperand ShufMask[16];
2076   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2077
2078   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2079
2080   int EltNo = (int) C->getValue();
2081
2082   // sanity checks:
2083   if (VT == MVT::i8 && EltNo >= 16)
2084     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2085   else if (VT == MVT::i16 && EltNo >= 8)
2086     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2087   else if (VT == MVT::i32 && EltNo >= 4)
2088     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2089   else if (VT == MVT::i64 && EltNo >= 2)
2090     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2091
2092   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2093     // i32 and i64: Element 0 is the preferred slot
2094     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2095   }
2096
2097   // Need to generate shuffle mask and extract:
2098   int prefslot_begin = -1, prefslot_end = -1;
2099   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2100
2101   switch (VT.getSimpleVT()) {
2102   default:
2103     assert(false && "Invalid value type!");
2104   case MVT::i8: {
2105     prefslot_begin = prefslot_end = 3;
2106     break;
2107   }
2108   case MVT::i16: {
2109     prefslot_begin = 2; prefslot_end = 3;
2110     break;
2111   }
2112   case MVT::i32: {
2113     prefslot_begin = 0; prefslot_end = 3;
2114     break;
2115   }
2116   case MVT::i64: {
2117     prefslot_begin = 0; prefslot_end = 7;
2118     break;
2119   }
2120   }
2121
2122   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2123          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2124
2125   for (int i = 0; i < 16; ++i) {
2126     // zero fill uppper part of preferred slot, don't care about the
2127     // other slots:
2128     unsigned int mask_val;
2129
2130     if (i <= prefslot_end) {
2131       mask_val =
2132         ((i < prefslot_begin)
2133          ? 0x80
2134          : elt_byte + (i - prefslot_begin));
2135
2136       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2137     } else
2138       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2139   }
2140
2141   SDOperand ShufMaskVec =
2142     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2143                 &ShufMask[0],
2144                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2145
2146   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2147                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2148                                  N, N, ShufMaskVec));
2149
2150 }
2151
2152 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2153   SDOperand VecOp = Op.getOperand(0);
2154   SDOperand ValOp = Op.getOperand(1);
2155   SDOperand IdxOp = Op.getOperand(2);
2156   MVT VT = Op.getValueType();
2157
2158   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2159   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2160
2161   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2162   // Use $2 because it's always 16-byte aligned and it's available:
2163   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2164
2165   SDOperand result =
2166     DAG.getNode(SPUISD::SHUFB, VT,
2167                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2168                 VecOp,
2169                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2170                             DAG.getNode(ISD::ADD, PtrVT,
2171                                         PtrBase,
2172                                         DAG.getConstant(CN->getValue(),
2173                                                         PtrVT))));
2174
2175   return result;
2176 }
2177
2178 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2179 {
2180   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2181
2182   assert(Op.getValueType() == MVT::i8);
2183   switch (Opc) {
2184   default:
2185     assert(0 && "Unhandled i8 math operator");
2186     /*NOTREACHED*/
2187     break;
2188   case ISD::SUB: {
2189     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2190     // the result:
2191     SDOperand N1 = Op.getOperand(1);
2192     N0 = (N0.getOpcode() != ISD::Constant
2193           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2194           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2195     N1 = (N1.getOpcode() != ISD::Constant
2196           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2197           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2198     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2199                        DAG.getNode(Opc, MVT::i16, N0, N1));
2200   }
2201   case ISD::ROTR:
2202   case ISD::ROTL: {
2203     SDOperand N1 = Op.getOperand(1);
2204     unsigned N1Opc;
2205     N0 = (N0.getOpcode() != ISD::Constant
2206           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2207           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2208     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2209     N1 = (N1.getOpcode() != ISD::Constant
2210           ? DAG.getNode(N1Opc, MVT::i16, N1)
2211           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2212     SDOperand ExpandArg =
2213       DAG.getNode(ISD::OR, MVT::i16, N0,
2214                   DAG.getNode(ISD::SHL, MVT::i16,
2215                               N0, DAG.getConstant(8, MVT::i16)));
2216     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2217                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2218   }
2219   case ISD::SRL:
2220   case ISD::SHL: {
2221     SDOperand N1 = Op.getOperand(1);
2222     unsigned N1Opc;
2223     N0 = (N0.getOpcode() != ISD::Constant
2224           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2225           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2226     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2227     N1 = (N1.getOpcode() != ISD::Constant
2228           ? DAG.getNode(N1Opc, MVT::i16, N1)
2229           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2230     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2231                        DAG.getNode(Opc, MVT::i16, N0, N1));
2232   }
2233   case ISD::SRA: {
2234     SDOperand N1 = Op.getOperand(1);
2235     unsigned N1Opc;
2236     N0 = (N0.getOpcode() != ISD::Constant
2237           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2238           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2239     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2240     N1 = (N1.getOpcode() != ISD::Constant
2241           ? DAG.getNode(N1Opc, MVT::i16, N1)
2242           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2243     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2244                        DAG.getNode(Opc, MVT::i16, N0, N1));
2245   }
2246   case ISD::MUL: {
2247     SDOperand N1 = Op.getOperand(1);
2248     unsigned N1Opc;
2249     N0 = (N0.getOpcode() != ISD::Constant
2250           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2251           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2252     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2253     N1 = (N1.getOpcode() != ISD::Constant
2254           ? DAG.getNode(N1Opc, MVT::i16, N1)
2255           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2256     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2257                        DAG.getNode(Opc, MVT::i16, N0, N1));
2258     break;
2259   }
2260   }
2261
2262   return SDOperand();
2263 }
2264
2265 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2266 {
2267   MVT VT = Op.getValueType();
2268   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2269
2270   SDOperand Op0 = Op.getOperand(0);
2271
2272   switch (Opc) {
2273   case ISD::ZERO_EXTEND:
2274   case ISD::SIGN_EXTEND:
2275   case ISD::ANY_EXTEND: {
2276     MVT Op0VT = Op0.getValueType();
2277     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2278
2279     assert(Op0VT == MVT::i32
2280            && "CellSPU: Zero/sign extending something other than i32");
2281     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2282
2283     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2284                       ? SPUISD::ROTBYTES_RIGHT_S
2285                       : SPUISD::ROTQUAD_RZ_BYTES);
2286     SDOperand PromoteScalar =
2287       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2288
2289     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2290                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2291                                    DAG.getNode(NewOpc, Op0VecVT,
2292                                                PromoteScalar,
2293                                                DAG.getConstant(4, MVT::i32))));
2294   }
2295
2296   case ISD::ADD: {
2297     // Turn operands into vectors to satisfy type checking (shufb works on
2298     // vectors)
2299     SDOperand Op0 =
2300       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2301     SDOperand Op1 =
2302       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2303     SmallVector<SDOperand, 16> ShufBytes;
2304
2305     // Create the shuffle mask for "rotating" the borrow up one register slot
2306     // once the borrow is generated.
2307     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2308     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2309     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2310     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2311
2312     SDOperand CarryGen =
2313       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2314     SDOperand ShiftedCarry =
2315       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2316                   CarryGen, CarryGen,
2317                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2318                               &ShufBytes[0], ShufBytes.size()));
2319
2320     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2321                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2322                                    Op0, Op1, ShiftedCarry));
2323   }
2324
2325   case ISD::SUB: {
2326     // Turn operands into vectors to satisfy type checking (shufb works on
2327     // vectors)
2328     SDOperand Op0 =
2329       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2330     SDOperand Op1 =
2331       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2332     SmallVector<SDOperand, 16> ShufBytes;
2333
2334     // Create the shuffle mask for "rotating" the borrow up one register slot
2335     // once the borrow is generated.
2336     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2337     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2338     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2339     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2340
2341     SDOperand BorrowGen =
2342       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2343     SDOperand ShiftedBorrow =
2344       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2345                   BorrowGen, BorrowGen,
2346                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2347                               &ShufBytes[0], ShufBytes.size()));
2348
2349     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2350                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2351                                    Op0, Op1, ShiftedBorrow));
2352   }
2353
2354   case ISD::SHL: {
2355     SDOperand ShiftAmt = Op.getOperand(1);
2356     MVT ShiftAmtVT = ShiftAmt.getValueType();
2357     SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2358     SDOperand MaskLower =
2359       DAG.getNode(SPUISD::SELB, VecVT,
2360                   Op0Vec,
2361                   DAG.getConstant(0, VecVT),
2362                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2363                               DAG.getConstant(0xff00ULL, MVT::i16)));
2364     SDOperand ShiftAmtBytes =
2365       DAG.getNode(ISD::SRL, ShiftAmtVT,
2366                   ShiftAmt,
2367                   DAG.getConstant(3, ShiftAmtVT));
2368     SDOperand ShiftAmtBits =
2369       DAG.getNode(ISD::AND, ShiftAmtVT,
2370                   ShiftAmt,
2371                   DAG.getConstant(7, ShiftAmtVT));
2372
2373     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2374                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2375                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2376                                                MaskLower, ShiftAmtBytes),
2377                                    ShiftAmtBits));
2378   }
2379
2380   case ISD::SRL: {
2381     MVT VT = Op.getValueType();
2382     SDOperand ShiftAmt = Op.getOperand(1);
2383     MVT ShiftAmtVT = ShiftAmt.getValueType();
2384     SDOperand ShiftAmtBytes =
2385       DAG.getNode(ISD::SRL, ShiftAmtVT,
2386                   ShiftAmt,
2387                   DAG.getConstant(3, ShiftAmtVT));
2388     SDOperand ShiftAmtBits =
2389       DAG.getNode(ISD::AND, ShiftAmtVT,
2390                   ShiftAmt,
2391                   DAG.getConstant(7, ShiftAmtVT));
2392
2393     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2394                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2395                                    Op0, ShiftAmtBytes),
2396                        ShiftAmtBits);
2397   }
2398
2399   case ISD::SRA: {
2400     // Promote Op0 to vector
2401     SDOperand Op0 =
2402       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2403     SDOperand ShiftAmt = Op.getOperand(1);
2404     MVT ShiftVT = ShiftAmt.getValueType();
2405
2406     // Negate variable shift amounts
2407     if (!isa<ConstantSDNode>(ShiftAmt)) {
2408       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2409                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2410     }
2411
2412     SDOperand UpperHalfSign =
2413       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2414                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2415                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2416                                           Op0, DAG.getConstant(31, MVT::i32))));
2417     SDOperand UpperHalfSignMask =
2418       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2419     SDOperand UpperLowerMask =
2420       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2421                   DAG.getConstant(0xff00, MVT::i16));
2422     SDOperand UpperLowerSelect =
2423       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2424                   UpperHalfSignMask, Op0, UpperLowerMask);
2425     SDOperand RotateLeftBytes =
2426       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2427                   UpperLowerSelect, ShiftAmt);
2428     SDOperand RotateLeftBits =
2429       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2430                   RotateLeftBytes, ShiftAmt);
2431
2432     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2433                        RotateLeftBits);
2434   }
2435   }
2436
2437   return SDOperand();
2438 }
2439
2440 //! Lower byte immediate operations for v16i8 vectors:
2441 static SDOperand
2442 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2443   SDOperand ConstVec;
2444   SDOperand Arg;
2445   MVT VT = Op.getValueType();
2446
2447   ConstVec = Op.getOperand(0);
2448   Arg = Op.getOperand(1);
2449   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2450     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2451       ConstVec = ConstVec.getOperand(0);
2452     } else {
2453       ConstVec = Op.getOperand(1);
2454       Arg = Op.getOperand(0);
2455       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2456         ConstVec = ConstVec.getOperand(0);
2457       }
2458     }
2459   }
2460
2461   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2462     uint64_t VectorBits[2];
2463     uint64_t UndefBits[2];
2464     uint64_t SplatBits, SplatUndef;
2465     int SplatSize;
2466
2467     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2468         && isConstantSplat(VectorBits, UndefBits,
2469                            VT.getVectorElementType().getSizeInBits(),
2470                            SplatBits, SplatUndef, SplatSize)) {
2471       SDOperand tcVec[16];
2472       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2473       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2474
2475       // Turn the BUILD_VECTOR into a set of target constants:
2476       for (size_t i = 0; i < tcVecSize; ++i)
2477         tcVec[i] = tc;
2478
2479       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2480                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2481     }
2482   }
2483
2484   return SDOperand();
2485 }
2486
2487 //! Lower i32 multiplication
2488 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2489                           unsigned Opc) {
2490   switch (VT.getSimpleVT()) {
2491   default:
2492     cerr << "CellSPU: Unknown LowerMUL value type, got "
2493          << Op.getValueType().getMVTString()
2494          << "\n";
2495     abort();
2496     /*NOTREACHED*/
2497
2498   case MVT::i32: {
2499     SDOperand rA = Op.getOperand(0);
2500     SDOperand rB = Op.getOperand(1);
2501
2502     return DAG.getNode(ISD::ADD, MVT::i32,
2503                        DAG.getNode(ISD::ADD, MVT::i32,
2504                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2505                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2506                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2507   }
2508   }
2509
2510   return SDOperand();
2511 }
2512
2513 //! Custom lowering for CTPOP (count population)
2514 /*!
2515   Custom lowering code that counts the number ones in the input
2516   operand. SPU has such an instruction, but it counts the number of
2517   ones per byte, which then have to be accumulated.
2518 */
2519 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2520   MVT VT = Op.getValueType();
2521   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2522
2523   switch (VT.getSimpleVT()) {
2524   default:
2525     assert(false && "Invalid value type!");
2526   case MVT::i8: {
2527     SDOperand N = Op.getOperand(0);
2528     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2529
2530     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2531     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2532
2533     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2534   }
2535
2536   case MVT::i16: {
2537     MachineFunction &MF = DAG.getMachineFunction();
2538     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2539
2540     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2541
2542     SDOperand N = Op.getOperand(0);
2543     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2544     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2545     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2546
2547     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2548     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2549
2550     // CNTB_result becomes the chain to which all of the virtual registers
2551     // CNTB_reg, SUM1_reg become associated:
2552     SDOperand CNTB_result =
2553       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2554
2555     SDOperand CNTB_rescopy =
2556       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2557
2558     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2559
2560     return DAG.getNode(ISD::AND, MVT::i16,
2561                        DAG.getNode(ISD::ADD, MVT::i16,
2562                                    DAG.getNode(ISD::SRL, MVT::i16,
2563                                                Tmp1, Shift1),
2564                                    Tmp1),
2565                        Mask0);
2566   }
2567
2568   case MVT::i32: {
2569     MachineFunction &MF = DAG.getMachineFunction();
2570     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2571
2572     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2573     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2574
2575     SDOperand N = Op.getOperand(0);
2576     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2577     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2578     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2579     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2580
2581     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2582     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2583
2584     // CNTB_result becomes the chain to which all of the virtual registers
2585     // CNTB_reg, SUM1_reg become associated:
2586     SDOperand CNTB_result =
2587       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2588
2589     SDOperand CNTB_rescopy =
2590       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2591
2592     SDOperand Comp1 =
2593       DAG.getNode(ISD::SRL, MVT::i32,
2594                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2595
2596     SDOperand Sum1 =
2597       DAG.getNode(ISD::ADD, MVT::i32,
2598                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2599
2600     SDOperand Sum1_rescopy =
2601       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2602
2603     SDOperand Comp2 =
2604       DAG.getNode(ISD::SRL, MVT::i32,
2605                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2606                   Shift2);
2607     SDOperand Sum2 =
2608       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2609                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2610
2611     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2612   }
2613
2614   case MVT::i64:
2615     break;
2616   }
2617
2618   return SDOperand();
2619 }
2620
2621 /// LowerOperation - Provide custom lowering hooks for some operations.
2622 ///
2623 SDOperand
2624 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2625 {
2626   unsigned Opc = (unsigned) Op.getOpcode();
2627   MVT VT = Op.getValueType();
2628
2629   switch (Opc) {
2630   default: {
2631     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2632     cerr << "Op.getOpcode() = " << Opc << "\n";
2633     cerr << "*Op.Val:\n";
2634     Op.Val->dump();
2635     abort();
2636   }
2637   case ISD::LOAD:
2638   case ISD::SEXTLOAD:
2639   case ISD::ZEXTLOAD:
2640     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2641   case ISD::STORE:
2642     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2643   case ISD::ConstantPool:
2644     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2645   case ISD::GlobalAddress:
2646     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2647   case ISD::JumpTable:
2648     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2649   case ISD::Constant:
2650     return LowerConstant(Op, DAG);
2651   case ISD::ConstantFP:
2652     return LowerConstantFP(Op, DAG);
2653   case ISD::BRCOND:
2654     return LowerBRCOND(Op, DAG);
2655   case ISD::FORMAL_ARGUMENTS:
2656     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2657   case ISD::CALL:
2658     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2659   case ISD::RET:
2660     return LowerRET(Op, DAG, getTargetMachine());
2661
2662
2663   // i8, i64 math ops:
2664   case ISD::ZERO_EXTEND:
2665   case ISD::SIGN_EXTEND:
2666   case ISD::ANY_EXTEND:
2667   case ISD::ADD:
2668   case ISD::SUB:
2669   case ISD::ROTR:
2670   case ISD::ROTL:
2671   case ISD::SRL:
2672   case ISD::SHL:
2673   case ISD::SRA: {
2674     if (VT == MVT::i8)
2675       return LowerI8Math(Op, DAG, Opc);
2676     else if (VT == MVT::i64)
2677       return LowerI64Math(Op, DAG, Opc);
2678     break;
2679   }
2680
2681   // Vector-related lowering.
2682   case ISD::BUILD_VECTOR:
2683     return LowerBUILD_VECTOR(Op, DAG);
2684   case ISD::SCALAR_TO_VECTOR:
2685     return LowerSCALAR_TO_VECTOR(Op, DAG);
2686   case ISD::VECTOR_SHUFFLE:
2687     return LowerVECTOR_SHUFFLE(Op, DAG);
2688   case ISD::EXTRACT_VECTOR_ELT:
2689     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2690   case ISD::INSERT_VECTOR_ELT:
2691     return LowerINSERT_VECTOR_ELT(Op, DAG);
2692
2693   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2694   case ISD::AND:
2695   case ISD::OR:
2696   case ISD::XOR:
2697     return LowerByteImmed(Op, DAG);
2698
2699   // Vector and i8 multiply:
2700   case ISD::MUL:
2701     if (VT.isVector())
2702       return LowerVectorMUL(Op, DAG);
2703     else if (VT == MVT::i8)
2704       return LowerI8Math(Op, DAG, Opc);
2705     else
2706       return LowerMUL(Op, DAG, VT, Opc);
2707
2708   case ISD::FDIV:
2709     if (VT == MVT::f32 || VT == MVT::v4f32)
2710       return LowerFDIVf32(Op, DAG);
2711 //    else if (Op.getValueType() == MVT::f64)
2712 //      return LowerFDIVf64(Op, DAG);
2713     else
2714       assert(0 && "Calling FDIV on unsupported MVT");
2715
2716   case ISD::CTPOP:
2717     return LowerCTPOP(Op, DAG);
2718   }
2719
2720   return SDOperand();
2721 }
2722
2723 //===----------------------------------------------------------------------===//
2724 // Target Optimization Hooks
2725 //===----------------------------------------------------------------------===//
2726
2727 SDOperand
2728 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2729 {
2730 #if 0
2731   TargetMachine &TM = getTargetMachine();
2732 #endif
2733   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2734   SelectionDAG &DAG = DCI.DAG;
2735   SDOperand Op0 = N->getOperand(0);      // everything has at least one operand
2736   SDOperand Result;                     // Initially, NULL result
2737
2738   switch (N->getOpcode()) {
2739   default: break;
2740   case ISD::ADD: {
2741     SDOperand Op1 = N->getOperand(1);
2742
2743     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2744       SDOperand Op01 = Op0.getOperand(1);
2745       if (Op01.getOpcode() == ISD::Constant
2746           || Op01.getOpcode() == ISD::TargetConstant) {
2747         // (add <const>, (SPUindirect <arg>, <const>)) ->
2748         // (SPUindirect <arg>, <const + const>)
2749         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2750         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2751         SDOperand combinedConst =
2752           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2753                           Op0.getValueType());
2754
2755         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2756                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2757         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2758                    << CN0->getValue() + CN1->getValue() << ")\n");
2759         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2760                            Op0.getOperand(0), combinedConst);
2761       }
2762     } else if (isa<ConstantSDNode>(Op0)
2763                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2764       SDOperand Op11 = Op1.getOperand(1);
2765       if (Op11.getOpcode() == ISD::Constant
2766           || Op11.getOpcode() == ISD::TargetConstant) {
2767         // (add (SPUindirect <arg>, <const>), <const>) ->
2768         // (SPUindirect <arg>, <const + const>)
2769         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2770         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2771         SDOperand combinedConst =
2772           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2773                           Op0.getValueType());
2774
2775         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2776                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2777         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2778                    << CN0->getValue() + CN1->getValue() << ")\n");
2779
2780         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2781                            Op1.getOperand(0), combinedConst);
2782       }
2783     }
2784     break;
2785   }
2786   case ISD::SIGN_EXTEND:
2787   case ISD::ZERO_EXTEND:
2788   case ISD::ANY_EXTEND: {
2789     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2790         N->getValueType(0) == Op0.getValueType()) {
2791       // (any_extend (SPUextract_elt0 <arg>)) ->
2792       // (SPUextract_elt0 <arg>)
2793       // Types must match, however...
2794       DEBUG(cerr << "Replace: ");
2795       DEBUG(N->dump(&DAG));
2796       DEBUG(cerr << "\nWith:    ");
2797       DEBUG(Op0.Val->dump(&DAG));
2798       DEBUG(cerr << "\n");
2799
2800       return Op0;
2801     }
2802     break;
2803   }
2804   case SPUISD::IndirectAddr: {
2805     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2806       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2807       if (CN->getValue() == 0) {
2808         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2809         // (SPUaform <addr>, 0)
2810
2811         DEBUG(cerr << "Replace: ");
2812         DEBUG(N->dump(&DAG));
2813         DEBUG(cerr << "\nWith:    ");
2814         DEBUG(Op0.Val->dump(&DAG));
2815         DEBUG(cerr << "\n");
2816
2817         return Op0;
2818       }
2819     }
2820     break;
2821   }
2822   case SPUISD::SHLQUAD_L_BITS:
2823   case SPUISD::SHLQUAD_L_BYTES:
2824   case SPUISD::VEC_SHL:
2825   case SPUISD::VEC_SRL:
2826   case SPUISD::VEC_SRA:
2827   case SPUISD::ROTQUAD_RZ_BYTES:
2828   case SPUISD::ROTQUAD_RZ_BITS: {
2829     SDOperand Op1 = N->getOperand(1);
2830
2831     if (isa<ConstantSDNode>(Op1)) {
2832       // Kill degenerate vector shifts:
2833       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2834
2835       if (CN->getValue() == 0) {
2836         Result = Op0;
2837       }
2838     }
2839     break;
2840   }
2841   case SPUISD::PROMOTE_SCALAR: {
2842     switch (Op0.getOpcode()) {
2843     default:
2844       break;
2845     case ISD::ANY_EXTEND:
2846     case ISD::ZERO_EXTEND:
2847     case ISD::SIGN_EXTEND: {
2848       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2849       // <arg>
2850       // but only if the SPUpromote_scalar and <arg> types match.
2851       SDOperand Op00 = Op0.getOperand(0);
2852       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2853         SDOperand Op000 = Op00.getOperand(0);
2854         if (Op000.getValueType() == N->getValueType(0)) {
2855           Result = Op000;
2856         }
2857       }
2858       break;
2859     }
2860     case SPUISD::EXTRACT_ELT0: {
2861       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2862       // <arg>
2863       Result = Op0.getOperand(0);
2864       break;
2865     }
2866     }
2867     break;
2868   }
2869   }
2870   // Otherwise, return unchanged.
2871 #if 1
2872   if (Result.Val) {
2873     DEBUG(cerr << "\nReplace.SPU: ");
2874     DEBUG(N->dump(&DAG));
2875     DEBUG(cerr << "\nWith:        ");
2876     DEBUG(Result.Val->dump(&DAG));
2877     DEBUG(cerr << "\n");
2878   }
2879 #endif
2880
2881   return Result;
2882 }
2883
2884 //===----------------------------------------------------------------------===//
2885 // Inline Assembly Support
2886 //===----------------------------------------------------------------------===//
2887
2888 /// getConstraintType - Given a constraint letter, return the type of
2889 /// constraint it is for this target.
2890 SPUTargetLowering::ConstraintType
2891 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2892   if (ConstraintLetter.size() == 1) {
2893     switch (ConstraintLetter[0]) {
2894     default: break;
2895     case 'b':
2896     case 'r':
2897     case 'f':
2898     case 'v':
2899     case 'y':
2900       return C_RegisterClass;
2901     }
2902   }
2903   return TargetLowering::getConstraintType(ConstraintLetter);
2904 }
2905
2906 std::pair<unsigned, const TargetRegisterClass*>
2907 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2908                                                 MVT VT) const
2909 {
2910   if (Constraint.size() == 1) {
2911     // GCC RS6000 Constraint Letters
2912     switch (Constraint[0]) {
2913     case 'b':   // R1-R31
2914     case 'r':   // R0-R31
2915       if (VT == MVT::i64)
2916         return std::make_pair(0U, SPU::R64CRegisterClass);
2917       return std::make_pair(0U, SPU::R32CRegisterClass);
2918     case 'f':
2919       if (VT == MVT::f32)
2920         return std::make_pair(0U, SPU::R32FPRegisterClass);
2921       else if (VT == MVT::f64)
2922         return std::make_pair(0U, SPU::R64FPRegisterClass);
2923       break;
2924     case 'v':
2925       return std::make_pair(0U, SPU::GPRCRegisterClass);
2926     }
2927   }
2928
2929   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2930 }
2931
2932 //! Compute used/known bits for a SPU operand
2933 void
2934 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2935                                                   const APInt &Mask,
2936                                                   APInt &KnownZero,
2937                                                   APInt &KnownOne,
2938                                                   const SelectionDAG &DAG,
2939                                                   unsigned Depth ) const {
2940 #if 0
2941   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2942 #endif
2943
2944   switch (Op.getOpcode()) {
2945   default:
2946     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2947     break;
2948
2949 #if 0
2950   case CALL:
2951   case SHUFB:
2952   case INSERT_MASK:
2953   case CNTB:
2954 #endif
2955
2956   case SPUISD::PROMOTE_SCALAR: {
2957     SDOperand Op0 = Op.getOperand(0);
2958     MVT Op0VT = Op0.getValueType();
2959     unsigned Op0VTBits = Op0VT.getSizeInBits();
2960     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2961     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2962     KnownOne |= APInt(Op0VTBits, InMask, false);
2963     break;
2964   }
2965
2966   case SPUISD::LDRESULT:
2967   case SPUISD::EXTRACT_ELT0:
2968   case SPUISD::EXTRACT_ELT0_CHAINED: {
2969     MVT OpVT = Op.getValueType();
2970     unsigned OpVTBits = OpVT.getSizeInBits();
2971     uint64_t InMask = OpVT.getIntegerVTBitMask();
2972     KnownZero |= APInt(OpVTBits, ~InMask, false);
2973     KnownOne |= APInt(OpVTBits, InMask, false);
2974     break;
2975   }
2976
2977 #if 0
2978   case EXTRACT_I1_ZEXT:
2979   case EXTRACT_I1_SEXT:
2980   case EXTRACT_I8_ZEXT:
2981   case EXTRACT_I8_SEXT:
2982   case MPY:
2983   case MPYU:
2984   case MPYH:
2985   case MPYHH:
2986   case SPUISD::SHLQUAD_L_BITS:
2987   case SPUISD::SHLQUAD_L_BYTES:
2988   case SPUISD::VEC_SHL:
2989   case SPUISD::VEC_SRL:
2990   case SPUISD::VEC_SRA:
2991   case SPUISD::VEC_ROTL:
2992   case SPUISD::VEC_ROTR:
2993   case SPUISD::ROTQUAD_RZ_BYTES:
2994   case SPUISD::ROTQUAD_RZ_BITS:
2995   case SPUISD::ROTBYTES_RIGHT_S:
2996   case SPUISD::ROTBYTES_LEFT:
2997   case SPUISD::ROTBYTES_LEFT_CHAINED:
2998   case SPUISD::SELECT_MASK:
2999   case SPUISD::SELB:
3000   case SPUISD::FPInterp:
3001   case SPUISD::FPRecipEst:
3002   case SPUISD::SEXT32TO64:
3003 #endif
3004   }
3005 }
3006
3007 // LowerAsmOperandForConstraint
3008 void
3009 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3010                                                 char ConstraintLetter,
3011                                                 std::vector<SDOperand> &Ops,
3012                                                 SelectionDAG &DAG) const {
3013   // Default, for the time being, to the base class handler
3014   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3015 }
3016
3017 /// isLegalAddressImmediate - Return true if the integer value can be used
3018 /// as the offset of the target addressing mode.
3019 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3020   // SPU's addresses are 256K:
3021   return (V > -(1 << 18) && V < (1 << 18) - 1);
3022 }
3023
3024 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3025   return false;
3026 }