lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 194   setOperationAction(ISD::FREM , MVT::f32, Expand);
 195
 196   // If we're enabling GP optimizations, use hardware square root
 197   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 198   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 199
 200   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 201   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 202
 203   // SPU can do rotate right and left, so legalize it... but customize for i8
 204   // because instructions don't exist.
 205   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 206   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 207   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 208   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 209   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 210   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 211   // SPU has no native version of shift left/right for i8
 212   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 213   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 214   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 215   // And SPU needs custom lowering for shift left/right for i64
 216   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 217   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 218   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 219
 220   // Custom lower i8, i32 and i64 multiplications
 221   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 222   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 223   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 224
 225   // Need to custom handle (some) common i8, i64 math ops
 226   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 227   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 228   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 229
 230   // SPU does not have BSWAP. It does have i32 support CTLZ.
 231   // CTPOP has to be custom lowered.
 232   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 233   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 234
 235   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 236   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 237   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 238   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 239
 240   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 241   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 242
 243   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 244
 245   // SPU has a version of select that implements (a&~c)|(b&c), just like
 246   // select ought to work:
 247   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 248   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 249   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 250   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 251   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 252
 253   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 254   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 255   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 256   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 257   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 258
 259   // Zero extension and sign extension for i64 have to be
 260   // custom legalized
 261   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 262   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 263   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 264
 265   // SPU has a legal FP -> signed INT instruction
 266   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 267   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 268   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 269   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 270
 271   // FDIV on SPU requires custom lowering
 272   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 273   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 274
 275   // SPU has [U|S]INT_TO_FP
 276   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 277   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 278   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 279   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 280   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 281   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 282   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 283   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 284
 285   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 286   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 287   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 288   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 289
 290   // We cannot sextinreg(i1).  Expand to shifts.
 291   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 292
 293   // Support label based line numbers.
 294   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 295   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 296
 297   // We want to legalize GlobalAddress and ConstantPool nodes into the
 298   // appropriate instructions to materialize the address.
 299   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 300        ++sctype) {
 301     MVT VT = (MVT::SimpleValueType)sctype;
 302
 303     setOperationAction(ISD::GlobalAddress, VT, Custom);
 304     setOperationAction(ISD::ConstantPool,  VT, Custom);
 305     setOperationAction(ISD::JumpTable,     VT, Custom);
 306   }
 307
 308   // RET must be custom lowered, to meet ABI requirements
 309   setOperationAction(ISD::RET,           MVT::Other, Custom);
 310
 311   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 312   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 313
 314   // Use the default implementation.
 315   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 316   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 317   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 318   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 319   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 320   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 321   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 322
 323   // Cell SPU has instructions for converting between i64 and fp.
 324   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 325   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 326
 327   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 328   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 329
 330   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 331   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 332
 333   // First set operation action for all vector types to expand. Then we
 334   // will selectively turn on ones that can be effectively codegen'd.
 335   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 336   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 337   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 338   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 339   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 340   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 341
 342   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 343        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 344     MVT VT = (MVT::SimpleValueType)i;
 345
 346     // add/sub are legal for all supported vector VT's.
 347     setOperationAction(ISD::ADD , VT, Legal);
 348     setOperationAction(ISD::SUB , VT, Legal);
 349     // mul has to be custom lowered.
 350     setOperationAction(ISD::MUL , VT, Custom);
 351
 352     setOperationAction(ISD::AND   , VT, Legal);
 353     setOperationAction(ISD::OR    , VT, Legal);
 354     setOperationAction(ISD::XOR   , VT, Legal);
 355     setOperationAction(ISD::LOAD  , VT, Legal);
 356     setOperationAction(ISD::SELECT, VT, Legal);
 357     setOperationAction(ISD::STORE,  VT, Legal);
 358
 359     // These operations need to be expanded:
 360     setOperationAction(ISD::SDIV, VT, Expand);
 361     setOperationAction(ISD::SREM, VT, Expand);
 362     setOperationAction(ISD::UDIV, VT, Expand);
 363     setOperationAction(ISD::UREM, VT, Expand);
 364     setOperationAction(ISD::FDIV, VT, Custom);
 365
 366     // Custom lower build_vector, constant pool spills, insert and
 367     // extract vector elements:
 368     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 369     setOperationAction(ISD::ConstantPool, VT, Custom);
 370     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 371     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 372     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 373     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 374   }
 375
 376   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 377   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 378   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 379   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 380   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 381
 382   setShiftAmountType(MVT::i32);
 383   setSetCCResultContents(ZeroOrOneSetCCResult);
 384
 385   setStackPointerRegisterToSaveRestore(SPU::R1);
 386
 387   // We have target-specific dag combine patterns for the following nodes:
 388   setTargetDAGCombine(ISD::ADD);
 389   setTargetDAGCombine(ISD::ZERO_EXTEND);
 390   setTargetDAGCombine(ISD::SIGN_EXTEND);
 391   setTargetDAGCombine(ISD::ANY_EXTEND);
 392
 393   computeRegisterProperties();
 394 }
 395
 396 const char *
 397 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 398 {
 399   if (node_names.empty()) {
 400     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 401     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 402     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 403     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 404     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 405     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 406     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 407     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 408     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 409     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 410     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 411     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 412     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 413     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 414     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 415     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 416     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 417     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 418     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 419     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 420     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 421     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 422     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 423     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 424     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 425     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 426     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 427     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 428     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 429     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 430       "SPUISD::ROTQUAD_RZ_BYTES";
 431     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 432       "SPUISD::ROTQUAD_RZ_BITS";
 433     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 434       "SPUISD::ROTBYTES_RIGHT_S";
 435     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 436     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 437       "SPUISD::ROTBYTES_LEFT_CHAINED";
 438     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 439       "SPUISD::ROTBYTES_LEFT_BITS";
 440     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 441     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 442     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 443     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 444     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 445     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 446     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 447     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 448     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 449   }
 450
 451   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 452
 453   return ((i != node_names.end()) ? i->second : 0);
 454 }
 455
 456 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 457   MVT VT = Op.getValueType();
 458   if (VT.isInteger())
 459     return VT;
 460   else
 461     return MVT::i32;
 462 }
 463
 464 //===----------------------------------------------------------------------===//
 465 // Calling convention code:
 466 //===----------------------------------------------------------------------===//
 467
 468 #include "SPUGenCallingConv.inc"
 469
 470 //===----------------------------------------------------------------------===//
 471 //  LowerOperation implementation
 472 //===----------------------------------------------------------------------===//
 473
 474 /// Aligned load common code for CellSPU
 475 /*!
 476   \param[in] Op The SelectionDAG load or store operand
 477   \param[in] DAG The selection DAG
 478   \param[in] ST CellSPU subtarget information structure
 479   \param[in,out] alignment Caller initializes this to the load or store node's
 480   value from getAlignment(), may be updated while generating the aligned load
 481   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 482   offset (divisible by 16, modulo 16 == 0)
 483   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 484   offset of the preferred slot (modulo 16 != 0)
 485   \param[in,out] VT Caller initializes this value type to the the load or store
 486   node's loaded or stored value type; may be updated if an i1-extended load or
 487   store.
 488   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 489   otherwise false. Can help to determine if the chunk needs to be rotated.
 490
 491  Both load and store lowering load a block of data aligned on a 16-byte
 492  boundary. This is the common aligned load code shared between both.
 493  */
 494 static SDValue
 495 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 496             LSBaseSDNode *LSN,
 497             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 498             MVT &VT, bool &was16aligned)
 499 {
 500   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 501   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 502   SDValue basePtr = LSN->getBasePtr();
 503   SDValue chain = LSN->getChain();
 504
 505   if (basePtr.getOpcode() == ISD::ADD) {
 506     SDValue Op1 = basePtr.getNode()->getOperand(1);
 507
 508     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 509       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 510
 511       alignOffs = (int) CN->getValue();
 512       prefSlotOffs = (int) (alignOffs & 0xf);
 513
 514       // Adjust the rotation amount to ensure that the final result ends up in
 515       // the preferred slot:
 516       prefSlotOffs -= vtm->prefslot_byte;
 517       basePtr = basePtr.getOperand(0);
 518
 519       // Loading from memory, can we adjust alignment?
 520       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 521         SDValue APtr = basePtr.getOperand(0);
 522         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 523           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 524           alignment = GSDN->getGlobal()->getAlignment();
 525         }
 526       }
 527     } else {
 528       alignOffs = 0;
 529       prefSlotOffs = -vtm->prefslot_byte;
 530     }
 531   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 532     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 533     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 534     prefSlotOffs = (int) (alignOffs & 0xf);
 535     prefSlotOffs -= vtm->prefslot_byte;
 536     basePtr = DAG.getRegister(SPU::R1, VT);
 537   } else {
 538     alignOffs = 0;
 539     prefSlotOffs = -vtm->prefslot_byte;
 540   }
 541
 542   if (alignment == 16) {
 543     // Realign the base pointer as a D-Form address:
 544     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 545       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 546                             basePtr,
 547                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 548     }
 549
 550     // Emit the vector load:
 551     was16aligned = true;
 552     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 553                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 554                        LSN->isVolatile(), 16);
 555   }
 556
 557   // Unaligned load or we're using the "large memory" model, which means that
 558   // we have to be very pessimistic:
 559   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 560     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 561   }
 562
 563   // Add the offset
 564   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 565                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 566   was16aligned = false;
 567   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 568                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 569                      LSN->isVolatile(), 16);
 570 }
 571
 572 /// Custom lower loads for CellSPU
 573 /*!
 574  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 575  within a 16-byte block, we have to rotate to extract the requested element.
 576  */
 577 static SDValue
 578 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 579   LoadSDNode *LN = cast<LoadSDNode>(Op);
 580   SDValue the_chain = LN->getChain();
 581   MVT VT = LN->getMemoryVT();
 582   MVT OpVT = Op.getNode()->getValueType(0);
 583   ISD::LoadExtType ExtType = LN->getExtensionType();
 584   unsigned alignment = LN->getAlignment();
 585   SDValue Ops[8];
 586
 587   switch (LN->getAddressingMode()) {
 588   case ISD::UNINDEXED: {
 589     int offset, rotamt;
 590     bool was16aligned;
 591     SDValue result =
 592       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 593
 594     if (result.getNode() == 0)
 595       return result;
 596
 597     the_chain = result.getValue(1);
 598     // Rotate the chunk if necessary
 599     if (rotamt < 0)
 600       rotamt += 16;
 601     if (rotamt != 0 || !was16aligned) {
 602       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 603
 604       Ops[0] = the_chain;
 605       Ops[1] = result;
 606       if (was16aligned) {
 607         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 608       } else {
 609         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 610         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 611         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 612                              DAG.getConstant(rotamt, PtrVT));
 613       }
 614
 615       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 616       the_chain = result.getValue(1);
 617     }
 618
 619     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 620       SDVTList scalarvts;
 621       MVT vecVT = MVT::v16i8;
 622
 623       // Convert the loaded v16i8 vector to the appropriate vector type
 624       // specified by the operand:
 625       if (OpVT == VT) {
 626         if (VT != MVT::i1)
 627           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 628       } else
 629         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 630
 631       Ops[0] = the_chain;
 632       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 633       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 634       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 635       the_chain = result.getValue(1);
 636     } else {
 637       // Handle the sign and zero-extending loads for i1 and i8:
 638       unsigned NewOpC;
 639
 640       if (ExtType == ISD::SEXTLOAD) {
 641         NewOpC = (OpVT == MVT::i1
 642                   ? SPUISD::EXTRACT_I1_SEXT
 643                   : SPUISD::EXTRACT_I8_SEXT);
 644       } else {
 645         assert(ExtType == ISD::ZEXTLOAD);
 646         NewOpC = (OpVT == MVT::i1
 647                   ? SPUISD::EXTRACT_I1_ZEXT
 648                   : SPUISD::EXTRACT_I8_ZEXT);
 649       }
 650
 651       result = DAG.getNode(NewOpC, OpVT, result);
 652     }
 653
 654     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 655     SDValue retops[2] = {
 656       result,
 657       the_chain
 658     };
 659
 660     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 661                          retops, sizeof(retops) / sizeof(retops[0]));
 662     return result;
 663   }
 664   case ISD::PRE_INC:
 665   case ISD::PRE_DEC:
 666   case ISD::POST_INC:
 667   case ISD::POST_DEC:
 668   case ISD::LAST_INDEXED_MODE:
 669     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 670             "UNINDEXED\n";
 671     cerr << (unsigned) LN->getAddressingMode() << "\n";
 672     abort();
 673     /*NOTREACHED*/
 674   }
 675
 676   return SDValue();
 677 }
 678
 679 /// Custom lower stores for CellSPU
 680 /*!
 681  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 682  within a 16-byte block, we have to generate a shuffle to insert the
 683  requested element into its place, then store the resulting block.
 684  */
 685 static SDValue
 686 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 687   StoreSDNode *SN = cast<StoreSDNode>(Op);
 688   SDValue Value = SN->getValue();
 689   MVT VT = Value.getValueType();
 690   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 691   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 692   unsigned alignment = SN->getAlignment();
 693
 694   switch (SN->getAddressingMode()) {
 695   case ISD::UNINDEXED: {
 696     int chunk_offset, slot_offset;
 697     bool was16aligned;
 698
 699     // The vector type we really want to load from the 16-byte chunk, except
 700     // in the case of MVT::i1, which has to be v16i8.
 701     MVT vecVT, stVecVT = MVT::v16i8;
 702
 703     if (StVT != MVT::i1)
 704       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 705     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 706
 707     SDValue alignLoadVec =
 708       AlignedLoad(Op, DAG, ST, SN, alignment,
 709                   chunk_offset, slot_offset, VT, was16aligned);
 710
 711     if (alignLoadVec.getNode() == 0)
 712       return alignLoadVec;
 713
 714     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 715     SDValue basePtr = LN->getBasePtr();
 716     SDValue the_chain = alignLoadVec.getValue(1);
 717     SDValue theValue = SN->getValue();
 718     SDValue result;
 719
 720     if (StVT != VT
 721         && (theValue.getOpcode() == ISD::AssertZext
 722             || theValue.getOpcode() == ISD::AssertSext)) {
 723       // Drill down and get the value for zero- and sign-extended
 724       // quantities
 725       theValue = theValue.getOperand(0);
 726     }
 727
 728     chunk_offset &= 0xf;
 729
 730     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 731     SDValue insertEltPtr;
 732     SDValue insertEltOp;
 733
 734     // If the base pointer is already a D-form address, then just create
 735     // a new D-form address with a slot offset and the orignal base pointer.
 736     // Otherwise generate a D-form address with the slot offset relative
 737     // to the stack pointer, which is always aligned.
 738     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 739     DEBUG(basePtr.getNode()->dump(&DAG));
 740     DEBUG(cerr << "\n");
 741
 742     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 743         (basePtr.getOpcode() == ISD::ADD
 744          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 745       insertEltPtr = basePtr;
 746     } else {
 747       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 748     }
 749
 750     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 751     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 752                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 753                          alignLoadVec,
 754                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 755
 756     result = DAG.getStore(the_chain, result, basePtr,
 757                           LN->getSrcValue(), LN->getSrcValueOffset(),
 758                           LN->isVolatile(), LN->getAlignment());
 759
 760     return result;
 761     /*UNREACHED*/
 762   }
 763   case ISD::PRE_INC:
 764   case ISD::PRE_DEC:
 765   case ISD::POST_INC:
 766   case ISD::POST_DEC:
 767   case ISD::LAST_INDEXED_MODE:
 768     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 769             "UNINDEXED\n";
 770     cerr << (unsigned) SN->getAddressingMode() << "\n";
 771     abort();
 772     /*NOTREACHED*/
 773   }
 774
 775   return SDValue();
 776 }
 777
 778 /// Generate the address of a constant pool entry.
 779 static SDValue
 780 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 781   MVT PtrVT = Op.getValueType();
 782   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 783   Constant *C = CP->getConstVal();
 784   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 785   SDValue Zero = DAG.getConstant(0, PtrVT);
 786   const TargetMachine &TM = DAG.getTarget();
 787
 788   if (TM.getRelocationModel() == Reloc::Static) {
 789     if (!ST->usingLargeMem()) {
 790       // Just return the SDValue with the constant pool address in it.
 791       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 792     } else {
 793       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 794       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 795       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 796     }
 797   }
 798
 799   assert(0 &&
 800          "LowerConstantPool: Relocation model other than static not supported.");
 801   return SDValue();
 802 }
 803
 804 static SDValue
 805 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 806   MVT PtrVT = Op.getValueType();
 807   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 808   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 809   SDValue Zero = DAG.getConstant(0, PtrVT);
 810   const TargetMachine &TM = DAG.getTarget();
 811
 812   if (TM.getRelocationModel() == Reloc::Static) {
 813     if (!ST->usingLargeMem()) {
 814       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 815     } else {
 816       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 817       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 818       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 819     }
 820   }
 821
 822   assert(0 &&
 823          "LowerJumpTable: Relocation model other than static not supported.");
 824   return SDValue();
 825 }
 826
 827 static SDValue
 828 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 829   MVT PtrVT = Op.getValueType();
 830   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 831   GlobalValue *GV = GSDN->getGlobal();
 832   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 833   const TargetMachine &TM = DAG.getTarget();
 834   SDValue Zero = DAG.getConstant(0, PtrVT);
 835
 836   if (TM.getRelocationModel() == Reloc::Static) {
 837     if (!ST->usingLargeMem()) {
 838       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 839     } else {
 840       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 841       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 842       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 843     }
 844   } else {
 845     cerr << "LowerGlobalAddress: Relocation model other than static not "
 846          << "supported.\n";
 847     abort();
 848     /*NOTREACHED*/
 849   }
 850
 851   return SDValue();
 852 }
 853
 854 //! Custom lower i64 integer constants
 855 /*!
 856  This code inserts all of the necessary juggling that needs to occur to load
 857  a 64-bit constant into a register.
 858  */
 859 static SDValue
 860 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 861   MVT VT = Op.getValueType();
 862   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 863
 864   if (VT == MVT::i64) {
 865     SDValue T = DAG.getConstant(CN->getValue(), MVT::i64);
 866     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 867                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 868   } else {
 869     cerr << "LowerConstant: unhandled constant type "
 870          << VT.getMVTString()
 871          << "\n";
 872     abort();
 873     /*NOTREACHED*/
 874   }
 875
 876   return SDValue();
 877 }
 878
 879 //! Custom lower double precision floating point constants
 880 static SDValue
 881 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 882   MVT VT = Op.getValueType();
 883   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 884
 885   assert((FP != 0) &&
 886          "LowerConstantFP: Node is not ConstantFPSDNode");
 887
 888   if (VT == MVT::f64) {
 889     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 890     return DAG.getNode(ISD::BIT_CONVERT, VT,
 891                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 892   }
 893
 894   return SDValue();
 895 }
 896
 897 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 898 static SDValue
 899 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 900 {
 901   SDValue Cond = Op.getOperand(1);
 902   MVT CondVT = Cond.getValueType();
 903   MVT CondNVT;
 904
 905   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 906     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 907     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 908                       Op.getOperand(0),
 909                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 910                       Op.getOperand(2));
 911   } else
 912     return SDValue();                // Unchanged
 913 }
 914
 915 static SDValue
 916 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 917 {
 918   MachineFunction &MF = DAG.getMachineFunction();
 919   MachineFrameInfo *MFI = MF.getFrameInfo();
 920   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 921   SmallVector<SDValue, 8> ArgValues;
 922   SDValue Root = Op.getOperand(0);
 923   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 924
 925   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 926   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 927
 928   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 929   unsigned ArgRegIdx = 0;
 930   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 931
 932   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 933
 934   // Add DAG nodes to load the arguments or copy them out of registers.
 935   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) {
 936     SDValue ArgVal;
 937     bool needsLoad = false;
 938     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 939     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 940
 941     switch (ObjectVT.getSimpleVT()) {
 942     default: {
 943       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 944            << ObjectVT.getMVTString()
 945            << "\n";
 946       abort();
 947     }
 948     case MVT::i8:
 949       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 950         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 951         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 952         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 953         ++ArgRegIdx;
 954       } else {
 955         needsLoad = true;
 956       }
 957       break;
 958     case MVT::i16:
 959       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 960         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 961         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 962         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 963         ++ArgRegIdx;
 964       } else {
 965         needsLoad = true;
 966       }
 967       break;
 968     case MVT::i32:
 969       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 970         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 971         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 972         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 973         ++ArgRegIdx;
 974       } else {
 975         needsLoad = true;
 976       }
 977       break;
 978     case MVT::i64:
 979       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 980         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 981         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 982         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 983         ++ArgRegIdx;
 984       } else {
 985         needsLoad = true;
 986       }
 987       break;
 988     case MVT::f32:
 989       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 990         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 991         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 992         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 993         ++ArgRegIdx;
 994       } else {
 995         needsLoad = true;
 996       }
 997       break;
 998     case MVT::f64:
 999       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1001         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1003         ++ArgRegIdx;
1004       } else {
1005         needsLoad = true;
1006       }
1007       break;
1008     case MVT::v2f64:
1009     case MVT::v4f32:
1010     case MVT::v2i64:
1011     case MVT::v4i32:
1012     case MVT::v8i16:
1013     case MVT::v16i8:
1014       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1015         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1016         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1017         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1018         ++ArgRegIdx;
1019       } else {
1020         needsLoad = true;
1021       }
1022       break;
1023     }
1024
1025     // We need to load the argument to a virtual register if we determined above
1026     // that we ran out of physical registers of the appropriate type
1027     if (needsLoad) {
1028       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1029       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1030       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1031       ArgOffset += StackSlotSize;
1032     }
1033
1034     ArgValues.push_back(ArgVal);
1035   }
1036
1037   // If the function takes variable number of arguments, make a frame index for
1038   // the start of the first vararg value... for expansion of llvm.va_start.
1039   if (isVarArg) {
1040     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1041                                                ArgOffset);
1042     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1043     // If this function is vararg, store any remaining integer argument regs to
1044     // their spots on the stack so that they may be loaded by deferencing the
1045     // result of va_next.
1046     SmallVector<SDValue, 8> MemOps;
1047     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1048       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1049       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1050       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1051       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1052       MemOps.push_back(Store);
1053       // Increment the address by four for the next argument to store
1054       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1055       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1056     }
1057     if (!MemOps.empty())
1058       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1059   }
1060
1061   ArgValues.push_back(Root);
1062
1063   // Return the new list of results.
1064   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1065                             ArgValues.size());
1066 }
1067
1068 /// isLSAAddress - Return the immediate to use if the specified
1069 /// value is representable as a LSA address.
1070 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1071   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1072   if (!C) return 0;
1073
1074   int Addr = C->getValue();
1075   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1076       (Addr << 14 >> 14) != Addr)
1077     return 0;  // Top 14 bits have to be sext of immediate.
1078
1079   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).getNode();
1080 }
1081
1082 static
1083 SDValue
1084 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1085   SDValue Chain = Op.getOperand(0);
1086 #if 0
1087   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1088   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1089 #endif
1090   SDValue Callee    = Op.getOperand(4);
1091   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1092   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1093   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1094   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1095
1096   // Handy pointer type
1097   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1098
1099   // Accumulate how many bytes are to be pushed on the stack, including the
1100   // linkage area, and parameter passing area.  According to the SPU ABI,
1101   // we minimally need space for [LR] and [SP]
1102   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1103
1104   // Set up a copy of the stack pointer for use loading and storing any
1105   // arguments that may not fit in the registers available for argument
1106   // passing.
1107   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1108
1109   // Figure out which arguments are going to go in registers, and which in
1110   // memory.
1111   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1112   unsigned ArgRegIdx = 0;
1113
1114   // Keep track of registers passing arguments
1115   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1116   // And the arguments passed on the stack
1117   SmallVector<SDValue, 8> MemOpChains;
1118
1119   for (unsigned i = 0; i != NumOps; ++i) {
1120     SDValue Arg = Op.getOperand(5+2*i);
1121
1122     // PtrOff will be used to store the current argument to the stack if a
1123     // register cannot be found for it.
1124     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1125     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1126
1127     switch (Arg.getValueType().getSimpleVT()) {
1128     default: assert(0 && "Unexpected ValueType for argument!");
1129     case MVT::i32:
1130     case MVT::i64:
1131     case MVT::i128:
1132       if (ArgRegIdx != NumArgRegs) {
1133         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134       } else {
1135         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136         ArgOffset += StackSlotSize;
1137       }
1138       break;
1139     case MVT::f32:
1140     case MVT::f64:
1141       if (ArgRegIdx != NumArgRegs) {
1142         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1143       } else {
1144         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145         ArgOffset += StackSlotSize;
1146       }
1147       break;
1148     case MVT::v4f32:
1149     case MVT::v4i32:
1150     case MVT::v8i16:
1151     case MVT::v16i8:
1152       if (ArgRegIdx != NumArgRegs) {
1153         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154       } else {
1155         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156         ArgOffset += StackSlotSize;
1157       }
1158       break;
1159     }
1160   }
1161
1162   // Update number of stack bytes actually used, insert a call sequence start
1163   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1164   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1165
1166   if (!MemOpChains.empty()) {
1167     // Adjust the stack pointer for the stack arguments.
1168     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1169                         &MemOpChains[0], MemOpChains.size());
1170   }
1171
1172   // Build a sequence of copy-to-reg nodes chained together with token chain
1173   // and flag operands which copy the outgoing args into the appropriate regs.
1174   SDValue InFlag;
1175   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1176     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1177                              InFlag);
1178     InFlag = Chain.getValue(1);
1179   }
1180
1181   SmallVector<SDValue, 8> Ops;
1182   unsigned CallOpc = SPUISD::CALL;
1183
1184   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1185   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1186   // node so that legalize doesn't hack it.
1187   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1188     GlobalValue *GV = G->getGlobal();
1189     MVT CalleeVT = Callee.getValueType();
1190     SDValue Zero = DAG.getConstant(0, PtrVT);
1191     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1192
1193     if (!ST->usingLargeMem()) {
1194       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1195       // style calls, otherwise, external symbols are BRASL calls. This assumes
1196       // that declared/defined symbols are in the same compilation unit and can
1197       // be reached through PC-relative jumps.
1198       //
1199       // NOTE:
1200       // This may be an unsafe assumption for JIT and really large compilation
1201       // units.
1202       if (GV->isDeclaration()) {
1203         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1204       } else {
1205         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1206       }
1207     } else {
1208       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1209       // address pairs:
1210       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1211     }
1212   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1213     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1214   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1215     // If this is an absolute destination address that appears to be a legal
1216     // local store address, use the munged value.
1217     Callee = SDValue(Dest, 0);
1218   }
1219
1220   Ops.push_back(Chain);
1221   Ops.push_back(Callee);
1222
1223   // Add argument registers to the end of the list so that they are known live
1224   // into the call.
1225   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1226     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1227                                   RegsToPass[i].second.getValueType()));
1228
1229   if (InFlag.getNode())
1230     Ops.push_back(InFlag);
1231   // Returns a chain and a flag for retval copy to use.
1232   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1233                       &Ops[0], Ops.size());
1234   InFlag = Chain.getValue(1);
1235
1236   Chain = DAG.getCALLSEQ_END(Chain,
1237                              DAG.getConstant(NumStackBytes, PtrVT),
1238                              DAG.getConstant(0, PtrVT),
1239                              InFlag);
1240   if (Op.getNode()->getValueType(0) != MVT::Other)
1241     InFlag = Chain.getValue(1);
1242
1243   SDValue ResultVals[3];
1244   unsigned NumResults = 0;
1245
1246   // If the call has results, copy the values out of the ret val registers.
1247   switch (Op.getNode()->getValueType(0).getSimpleVT()) {
1248   default: assert(0 && "Unexpected ret value!");
1249   case MVT::Other: break;
1250   case MVT::i32:
1251     if (Op.getNode()->getValueType(1) == MVT::i32) {
1252       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1253       ResultVals[0] = Chain.getValue(0);
1254       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1255                                  Chain.getValue(2)).getValue(1);
1256       ResultVals[1] = Chain.getValue(0);
1257       NumResults = 2;
1258     } else {
1259       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1260       ResultVals[0] = Chain.getValue(0);
1261       NumResults = 1;
1262     }
1263     break;
1264   case MVT::i64:
1265     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1266     ResultVals[0] = Chain.getValue(0);
1267     NumResults = 1;
1268     break;
1269   case MVT::f32:
1270   case MVT::f64:
1271     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1272                                InFlag).getValue(1);
1273     ResultVals[0] = Chain.getValue(0);
1274     NumResults = 1;
1275     break;
1276   case MVT::v2f64:
1277   case MVT::v4f32:
1278   case MVT::v4i32:
1279   case MVT::v8i16:
1280   case MVT::v16i8:
1281     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1282                                    InFlag).getValue(1);
1283     ResultVals[0] = Chain.getValue(0);
1284     NumResults = 1;
1285     break;
1286   }
1287
1288   // If the function returns void, just return the chain.
1289   if (NumResults == 0)
1290     return Chain;
1291
1292   // Otherwise, merge everything together with a MERGE_VALUES node.
1293   ResultVals[NumResults++] = Chain;
1294   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1295   return Res.getValue(Op.getResNo());
1296 }
1297
1298 static SDValue
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300   SmallVector<CCValAssign, 16> RVLocs;
1301   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1304   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1305
1306   // If this is the first return lowered for this function, add the regs to the
1307   // liveout set for the function.
1308   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1309     for (unsigned i = 0; i != RVLocs.size(); ++i)
1310       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1311   }
1312
1313   SDValue Chain = Op.getOperand(0);
1314   SDValue Flag;
1315
1316   // Copy the result values into the output registers.
1317   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1318     CCValAssign &VA = RVLocs[i];
1319     assert(VA.isRegLoc() && "Can only return in registers!");
1320     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1321     Flag = Chain.getValue(1);
1322   }
1323
1324   if (Flag.getNode())
1325     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1326   else
1327     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1328 }
1329
1330
1331 //===----------------------------------------------------------------------===//
1332 // Vector related lowering:
1333 //===----------------------------------------------------------------------===//
1334
1335 static ConstantSDNode *
1336 getVecImm(SDNode *N) {
1337   SDValue OpVal(0, 0);
1338
1339   // Check to see if this buildvec has a single non-undef value in its elements.
1340   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1341     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1342     if (OpVal.getNode() == 0)
1343       OpVal = N->getOperand(i);
1344     else if (OpVal != N->getOperand(i))
1345       return 0;
1346   }
1347
1348   if (OpVal.getNode() != 0) {
1349     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1350       return CN;
1351     }
1352   }
1353
1354   return 0; // All UNDEF: use implicit def.; not Constant node
1355 }
1356
1357 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1359 /// constant
1360 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1361                               MVT ValueType) {
1362   if (ConstantSDNode *CN = getVecImm(N)) {
1363     uint64_t Value = CN->getValue();
1364     if (ValueType == MVT::i64) {
1365       uint64_t UValue = CN->getValue();
1366       uint32_t upper = uint32_t(UValue >> 32);
1367       uint32_t lower = uint32_t(UValue);
1368       if (upper != lower)
1369         return SDValue();
1370       Value = Value >> 32;
1371     }
1372     if (Value <= 0x3ffff)
1373       return DAG.getConstant(Value, ValueType);
1374   }
1375
1376   return SDValue();
1377 }
1378
1379 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1380 /// and the value fits into a signed 16-bit constant, and if so, return the
1381 /// constant
1382 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1383                               MVT ValueType) {
1384   if (ConstantSDNode *CN = getVecImm(N)) {
1385     int64_t Value = CN->getSignExtended();
1386     if (ValueType == MVT::i64) {
1387       uint64_t UValue = CN->getValue();
1388       uint32_t upper = uint32_t(UValue >> 32);
1389       uint32_t lower = uint32_t(UValue);
1390       if (upper != lower)
1391         return SDValue();
1392       Value = Value >> 32;
1393     }
1394     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1395       return DAG.getConstant(Value, ValueType);
1396     }
1397   }
1398
1399   return SDValue();
1400 }
1401
1402 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 10-bit constant, and if so, return the
1404 /// constant
1405 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1406                               MVT ValueType) {
1407   if (ConstantSDNode *CN = getVecImm(N)) {
1408     int64_t Value = CN->getSignExtended();
1409     if (ValueType == MVT::i64) {
1410       uint64_t UValue = CN->getValue();
1411       uint32_t upper = uint32_t(UValue >> 32);
1412       uint32_t lower = uint32_t(UValue);
1413       if (upper != lower)
1414         return SDValue();
1415       Value = Value >> 32;
1416     }
1417     if (isS10Constant(Value))
1418       return DAG.getConstant(Value, ValueType);
1419   }
1420
1421   return SDValue();
1422 }
1423
1424 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1425 /// and the value fits into a signed 8-bit constant, and if so, return the
1426 /// constant.
1427 ///
1428 /// @note: The incoming vector is v16i8 because that's the only way we can load
1429 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1430 /// same value.
1431 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1432                              MVT ValueType) {
1433   if (ConstantSDNode *CN = getVecImm(N)) {
1434     int Value = (int) CN->getValue();
1435     if (ValueType == MVT::i16
1436         && Value <= 0xffff                 /* truncated from uint64_t */
1437         && ((short) Value >> 8) == ((short) Value & 0xff))
1438       return DAG.getConstant(Value & 0xff, ValueType);
1439     else if (ValueType == MVT::i8
1440              && (Value & 0xff) == Value)
1441       return DAG.getConstant(Value, ValueType);
1442   }
1443
1444   return SDValue();
1445 }
1446
1447 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1448 /// and the value fits into a signed 16-bit constant, and if so, return the
1449 /// constant
1450 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1451                                MVT ValueType) {
1452   if (ConstantSDNode *CN = getVecImm(N)) {
1453     uint64_t Value = CN->getValue();
1454     if ((ValueType == MVT::i32
1455           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1456         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1457       return DAG.getConstant(Value >> 16, ValueType);
1458   }
1459
1460   return SDValue();
1461 }
1462
1463 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1464 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1465   if (ConstantSDNode *CN = getVecImm(N)) {
1466     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1467   }
1468
1469   return SDValue();
1470 }
1471
1472 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1473 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1474   if (ConstantSDNode *CN = getVecImm(N)) {
1475     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1476   }
1477
1478   return SDValue();
1479 }
1480
1481 // If this is a vector of constants or undefs, get the bits.  A bit in
1482 // UndefBits is set if the corresponding element of the vector is an
1483 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1484 // zero.   Return true if this is not an array of constants, false if it is.
1485 //
1486 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1487                                        uint64_t UndefBits[2]) {
1488   // Start with zero'd results.
1489   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1490
1491   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1492   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1493     SDValue OpVal = BV->getOperand(i);
1494
1495     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1496     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1497
1498     uint64_t EltBits = 0;
1499     if (OpVal.getOpcode() == ISD::UNDEF) {
1500       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1501       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1502       continue;
1503     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1504       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1505     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1506       const APFloat &apf = CN->getValueAPF();
1507       EltBits = (CN->getValueType(0) == MVT::f32
1508                  ? FloatToBits(apf.convertToFloat())
1509                  : DoubleToBits(apf.convertToDouble()));
1510     } else {
1511       // Nonconstant element.
1512       return true;
1513     }
1514
1515     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1516   }
1517
1518   //printf("%llx %llx  %llx %llx\n",
1519   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1520   return false;
1521 }
1522
1523 /// If this is a splat (repetition) of a value across the whole vector, return
1524 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1525 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1526 /// SplatSize = 1 byte.
1527 static bool isConstantSplat(const uint64_t Bits128[2],
1528                             const uint64_t Undef128[2],
1529                             int MinSplatBits,
1530                             uint64_t &SplatBits, uint64_t &SplatUndef,
1531                             int &SplatSize) {
1532   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1533   // the same as the lower 64-bits, ignoring undefs.
1534   uint64_t Bits64  = Bits128[0] | Bits128[1];
1535   uint64_t Undef64 = Undef128[0] & Undef128[1];
1536   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1537   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1538   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1539   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1540
1541   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1542     if (MinSplatBits < 64) {
1543
1544       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1545       // undefs.
1546       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1547         if (MinSplatBits < 32) {
1548
1549           // If the top 16-bits are different than the lower 16-bits, ignoring
1550           // undefs, we have an i32 splat.
1551           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1552             if (MinSplatBits < 16) {
1553               // If the top 8-bits are different than the lower 8-bits, ignoring
1554               // undefs, we have an i16 splat.
1555               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1556                 // Otherwise, we have an 8-bit splat.
1557                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1558                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1559                 SplatSize = 1;
1560                 return true;
1561               }
1562             } else {
1563               SplatBits = Bits16;
1564               SplatUndef = Undef16;
1565               SplatSize = 2;
1566               return true;
1567             }
1568           }
1569         } else {
1570           SplatBits = Bits32;
1571           SplatUndef = Undef32;
1572           SplatSize = 4;
1573           return true;
1574         }
1575       }
1576     } else {
1577       SplatBits = Bits128[0];
1578       SplatUndef = Undef128[0];
1579       SplatSize = 8;
1580       return true;
1581     }
1582   }
1583
1584   return false;  // Can't be a splat if two pieces don't match.
1585 }
1586
1587 // If this is a case we can't handle, return null and let the default
1588 // expansion code take care of it.  If we CAN select this case, and if it
1589 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1590 // this case more efficiently than a constant pool load, lower it to the
1591 // sequence of ops that should be used.
1592 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1593   MVT VT = Op.getValueType();
1594   // If this is a vector of constants or undefs, get the bits.  A bit in
1595   // UndefBits is set if the corresponding element of the vector is an
1596   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1597   // zero.
1598   uint64_t VectorBits[2];
1599   uint64_t UndefBits[2];
1600   uint64_t SplatBits, SplatUndef;
1601   int SplatSize;
1602   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1603       || !isConstantSplat(VectorBits, UndefBits,
1604                           VT.getVectorElementType().getSizeInBits(),
1605                           SplatBits, SplatUndef, SplatSize))
1606     return SDValue();   // Not a constant vector, not a splat.
1607
1608   switch (VT.getSimpleVT()) {
1609   default:
1610   case MVT::v4f32: {
1611     uint32_t Value32 = SplatBits;
1612     assert(SplatSize == 4
1613            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1614     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1615     SDValue T = DAG.getConstant(Value32, MVT::i32);
1616     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1617                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1618     break;
1619   }
1620   case MVT::v2f64: {
1621     uint64_t f64val = SplatBits;
1622     assert(SplatSize == 8
1623            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1624     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1625     SDValue T = DAG.getConstant(f64val, MVT::i64);
1626     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1627                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1628     break;
1629   }
1630   case MVT::v16i8: {
1631    // 8-bit constants have to be expanded to 16-bits
1632    unsigned short Value16 = SplatBits | (SplatBits << 8);
1633    SDValue Ops[8];
1634    for (int i = 0; i < 8; ++i)
1635      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1636    return DAG.getNode(ISD::BIT_CONVERT, VT,
1637                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1638   }
1639   case MVT::v8i16: {
1640     unsigned short Value16;
1641     if (SplatSize == 2)
1642       Value16 = (unsigned short) (SplatBits & 0xffff);
1643     else
1644       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1645     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1646     SDValue Ops[8];
1647     for (int i = 0; i < 8; ++i) Ops[i] = T;
1648     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1649   }
1650   case MVT::v4i32: {
1651     unsigned int Value = SplatBits;
1652     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1653     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1654   }
1655   case MVT::v2i64: {
1656     uint64_t val = SplatBits;
1657     uint32_t upper = uint32_t(val >> 32);
1658     uint32_t lower = uint32_t(val);
1659
1660     if (upper == lower) {
1661       // Magic constant that can be matched by IL, ILA, et. al.
1662       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1663       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1664     } else {
1665       SDValue LO32;
1666       SDValue HI32;
1667       SmallVector<SDValue, 16> ShufBytes;
1668       SDValue Result;
1669       bool upper_special, lower_special;
1670
1671       // NOTE: This code creates common-case shuffle masks that can be easily
1672       // detected as common expressions. It is not attempting to create highly
1673       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1674
1675       // Detect if the upper or lower half is a special shuffle mask pattern:
1676       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1677       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1678
1679       // Create lower vector if not a special pattern
1680       if (!lower_special) {
1681         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1682         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1683                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1684                                        LO32C, LO32C, LO32C, LO32C));
1685       }
1686
1687       // Create upper vector if not a special pattern
1688       if (!upper_special) {
1689         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1690         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1691                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1692                                        HI32C, HI32C, HI32C, HI32C));
1693       }
1694
1695       // If either upper or lower are special, then the two input operands are
1696       // the same (basically, one of them is a "don't care")
1697       if (lower_special)
1698         LO32 = HI32;
1699       if (upper_special)
1700         HI32 = LO32;
1701       if (lower_special && upper_special) {
1702         // Unhappy situation... both upper and lower are special, so punt with
1703         // a target constant:
1704         SDValue Zero = DAG.getConstant(0, MVT::i32);
1705         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1706                                   Zero, Zero);
1707       }
1708
1709       for (int i = 0; i < 4; ++i) {
1710         uint64_t val = 0;
1711         for (int j = 0; j < 4; ++j) {
1712           SDValue V;
1713           bool process_upper, process_lower;
1714           val <<= 8;
1715           process_upper = (upper_special && (i & 1) == 0);
1716           process_lower = (lower_special && (i & 1) == 1);
1717
1718           if (process_upper || process_lower) {
1719             if ((process_upper && upper == 0)
1720                 || (process_lower && lower == 0))
1721               val |= 0x80;
1722             else if ((process_upper && upper == 0xffffffff)
1723                      || (process_lower && lower == 0xffffffff))
1724               val |= 0xc0;
1725             else if ((process_upper && upper == 0x80000000)
1726                      || (process_lower && lower == 0x80000000))
1727               val |= (j == 0 ? 0xe0 : 0x80);
1728           } else
1729             val |= i * 4 + j + ((i & 1) * 16);
1730         }
1731
1732         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1733       }
1734
1735       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1736                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1737                                      &ShufBytes[0], ShufBytes.size()));
1738     }
1739   }
1740   }
1741
1742   return SDValue();
1743 }
1744
1745 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1746 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1747 /// permutation vector, V3, is monotonically increasing with one "exception"
1748 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1749 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1750 /// In either case, the net result is going to eventually invoke SHUFB to
1751 /// permute/shuffle the bytes from V1 and V2.
1752 /// \note
1753 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1754 /// control word for byte/halfword/word insertion. This takes care of a single
1755 /// element move from V2 into V1.
1756 /// \note
1757 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1758 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1759   SDValue V1 = Op.getOperand(0);
1760   SDValue V2 = Op.getOperand(1);
1761   SDValue PermMask = Op.getOperand(2);
1762
1763   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1764
1765   // If we have a single element being moved from V1 to V2, this can be handled
1766   // using the C*[DX] compute mask instructions, but the vector elements have
1767   // to be monotonically increasing with one exception element.
1768   MVT EltVT = V1.getValueType().getVectorElementType();
1769   unsigned EltsFromV2 = 0;
1770   unsigned V2Elt = 0;
1771   unsigned V2EltIdx0 = 0;
1772   unsigned CurrElt = 0;
1773   bool monotonic = true;
1774   if (EltVT == MVT::i8)
1775     V2EltIdx0 = 16;
1776   else if (EltVT == MVT::i16)
1777     V2EltIdx0 = 8;
1778   else if (EltVT == MVT::i32)
1779     V2EltIdx0 = 4;
1780   else
1781     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1782
1783   for (unsigned i = 0, e = PermMask.getNumOperands();
1784        EltsFromV2 <= 1 && monotonic && i != e;
1785        ++i) {
1786     unsigned SrcElt;
1787     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1788       SrcElt = 0;
1789     else
1790       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1791
1792     if (SrcElt >= V2EltIdx0) {
1793       ++EltsFromV2;
1794       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1795     } else if (CurrElt != SrcElt) {
1796       monotonic = false;
1797     }
1798
1799     ++CurrElt;
1800   }
1801
1802   if (EltsFromV2 == 1 && monotonic) {
1803     // Compute mask and shuffle
1804     MachineFunction &MF = DAG.getMachineFunction();
1805     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1806     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1807     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1808     // Initialize temporary register to 0
1809     SDValue InitTempReg =
1810       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1811     // Copy register's contents as index in INSERT_MASK:
1812     SDValue ShufMaskOp =
1813       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1814                   DAG.getTargetConstant(V2Elt, MVT::i32),
1815                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1816     // Use shuffle mask in SHUFB synthetic instruction:
1817     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1818   } else {
1819     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1820     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1821
1822     SmallVector<SDValue, 16> ResultMask;
1823     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1824       unsigned SrcElt;
1825       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1826         SrcElt = 0;
1827       else
1828         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1829
1830       for (unsigned j = 0; j < BytesPerElement; ++j) {
1831         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1832                                              MVT::i8));
1833       }
1834     }
1835
1836     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1837                                       &ResultMask[0], ResultMask.size());
1838     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1839   }
1840 }
1841
1842 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1843   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1844
1845   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1846     // For a constant, build the appropriate constant vector, which will
1847     // eventually simplify to a vector register load.
1848
1849     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1850     SmallVector<SDValue, 16> ConstVecValues;
1851     MVT VT;
1852     size_t n_copies;
1853
1854     // Create a constant vector:
1855     switch (Op.getValueType().getSimpleVT()) {
1856     default: assert(0 && "Unexpected constant value type in "
1857                          "LowerSCALAR_TO_VECTOR");
1858     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1859     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1860     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1861     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1862     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1863     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1864     }
1865
1866     SDValue CValue = DAG.getConstant(CN->getValue(), VT);
1867     for (size_t j = 0; j < n_copies; ++j)
1868       ConstVecValues.push_back(CValue);
1869
1870     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1871                        &ConstVecValues[0], ConstVecValues.size());
1872   } else {
1873     // Otherwise, copy the value from one register to another:
1874     switch (Op0.getValueType().getSimpleVT()) {
1875     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1876     case MVT::i8:
1877     case MVT::i16:
1878     case MVT::i32:
1879     case MVT::i64:
1880     case MVT::f32:
1881     case MVT::f64:
1882       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1883     }
1884   }
1885
1886   return SDValue();
1887 }
1888
1889 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1890   switch (Op.getValueType().getSimpleVT()) {
1891   default:
1892     cerr << "CellSPU: Unknown vector multiplication, got "
1893          << Op.getValueType().getMVTString()
1894          << "\n";
1895     abort();
1896     /*NOTREACHED*/
1897
1898   case MVT::v4i32: {
1899     SDValue rA = Op.getOperand(0);
1900     SDValue rB = Op.getOperand(1);
1901     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1902     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1903     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1904     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1905
1906     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1907     break;
1908   }
1909
1910   // Multiply two v8i16 vectors (pipeline friendly version):
1911   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1912   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1913   // c) Use SELB to select upper and lower halves from the intermediate results
1914   //
1915   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1916   // dual-issue. This code does manage to do this, even if it's a little on
1917   // the wacky side
1918   case MVT::v8i16: {
1919     MachineFunction &MF = DAG.getMachineFunction();
1920     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1921     SDValue Chain = Op.getOperand(0);
1922     SDValue rA = Op.getOperand(0);
1923     SDValue rB = Op.getOperand(1);
1924     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1926
1927     SDValue FSMBOp =
1928       DAG.getCopyToReg(Chain, FSMBIreg,
1929                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1930                                    DAG.getConstant(0xcccc, MVT::i16)));
1931
1932     SDValue HHProd =
1933       DAG.getCopyToReg(FSMBOp, HiProdReg,
1934                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1935
1936     SDValue HHProd_v4i32 =
1937       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1938                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1939
1940     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1941                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1942                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1943                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1944                                                HHProd_v4i32,
1945                                                DAG.getConstant(16, MVT::i16))),
1946                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1947   }
1948
1949   // This M00sE is N@stI! (apologies to Monty Python)
1950   //
1951   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1952   // is to break it all apart, sign extend, and reassemble the various
1953   // intermediate products.
1954   case MVT::v16i8: {
1955     SDValue rA = Op.getOperand(0);
1956     SDValue rB = Op.getOperand(1);
1957     SDValue c8 = DAG.getConstant(8, MVT::i32);
1958     SDValue c16 = DAG.getConstant(16, MVT::i32);
1959
1960     SDValue LLProd =
1961       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1962                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1963                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1964
1965     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1966
1967     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1968
1969     SDValue LHProd =
1970       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1971                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1972
1973     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1974                                      DAG.getConstant(0x2222, MVT::i16));
1975
1976     SDValue LoProdParts =
1977       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1978                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1979                               LLProd, LHProd, FSMBmask));
1980
1981     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1982
1983     SDValue LoProd =
1984       DAG.getNode(ISD::AND, MVT::v4i32,
1985                   LoProdParts,
1986                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1987                               LoProdMask, LoProdMask,
1988                               LoProdMask, LoProdMask));
1989
1990     SDValue rAH =
1991       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1993
1994     SDValue rBH =
1995       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1996                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1997
1998     SDValue HLProd =
1999       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2000                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2001                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2002
2003     SDValue HHProd_1 =
2004       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2005                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2006                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2007                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2008                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2009
2010     SDValue HHProd =
2011       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2012                   HLProd,
2013                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2014                   FSMBmask);
2015
2016     SDValue HiProd =
2017       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2018
2019     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2020                        DAG.getNode(ISD::OR, MVT::v4i32,
2021                                    LoProd, HiProd));
2022   }
2023   }
2024
2025   return SDValue();
2026 }
2027
2028 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2029   MachineFunction &MF = DAG.getMachineFunction();
2030   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2031
2032   SDValue A = Op.getOperand(0);
2033   SDValue B = Op.getOperand(1);
2034   MVT VT = Op.getValueType();
2035
2036   unsigned VRegBR, VRegC;
2037
2038   if (VT == MVT::f32) {
2039     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2041   } else {
2042     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2044   }
2045   // TODO: make sure we're feeding FPInterp the right arguments
2046   // Right now: fi B, frest(B)
2047
2048   // Computes BRcpl =
2049   // (Floating Interpolate (FP Reciprocal Estimate B))
2050   SDValue BRcpl =
2051       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2052                        DAG.getNode(SPUISD::FPInterp, VT, B,
2053                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2054
2055   // Computes A * BRcpl and stores in a temporary register
2056   SDValue AxBRcpl =
2057       DAG.getCopyToReg(BRcpl, VRegC,
2058                  DAG.getNode(ISD::FMUL, VT, A,
2059                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2060   // What's the Chain variable do? It's magic!
2061   // TODO: set Chain = Op(0).getEntryNode()
2062
2063   return DAG.getNode(ISD::FADD, VT,
2064                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2065                 DAG.getNode(ISD::FMUL, VT,
2066                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2067                         DAG.getNode(ISD::FSUB, VT, A,
2068                             DAG.getNode(ISD::FMUL, VT, B,
2069                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2070 }
2071
2072 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2073   MVT VT = Op.getValueType();
2074   SDValue N = Op.getOperand(0);
2075   SDValue Elt = Op.getOperand(1);
2076   SDValue ShufMask[16];
2077   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2078
2079   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2080
2081   int EltNo = (int) C->getValue();
2082
2083   // sanity checks:
2084   if (VT == MVT::i8 && EltNo >= 16)
2085     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2086   else if (VT == MVT::i16 && EltNo >= 8)
2087     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2088   else if (VT == MVT::i32 && EltNo >= 4)
2089     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2090   else if (VT == MVT::i64 && EltNo >= 2)
2091     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2092
2093   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2094     // i32 and i64: Element 0 is the preferred slot
2095     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2096   }
2097
2098   // Need to generate shuffle mask and extract:
2099   int prefslot_begin = -1, prefslot_end = -1;
2100   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2101
2102   switch (VT.getSimpleVT()) {
2103   default:
2104     assert(false && "Invalid value type!");
2105   case MVT::i8: {
2106     prefslot_begin = prefslot_end = 3;
2107     break;
2108   }
2109   case MVT::i16: {
2110     prefslot_begin = 2; prefslot_end = 3;
2111     break;
2112   }
2113   case MVT::i32: {
2114     prefslot_begin = 0; prefslot_end = 3;
2115     break;
2116   }
2117   case MVT::i64: {
2118     prefslot_begin = 0; prefslot_end = 7;
2119     break;
2120   }
2121   }
2122
2123   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2124          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2125
2126   for (int i = 0; i < 16; ++i) {
2127     // zero fill uppper part of preferred slot, don't care about the
2128     // other slots:
2129     unsigned int mask_val;
2130
2131     if (i <= prefslot_end) {
2132       mask_val =
2133         ((i < prefslot_begin)
2134          ? 0x80
2135          : elt_byte + (i - prefslot_begin));
2136
2137       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2138     } else
2139       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2140   }
2141
2142   SDValue ShufMaskVec =
2143     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2144                 &ShufMask[0],
2145                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2146
2147   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2148                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2149                                  N, N, ShufMaskVec));
2150
2151 }
2152
2153 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2154   SDValue VecOp = Op.getOperand(0);
2155   SDValue ValOp = Op.getOperand(1);
2156   SDValue IdxOp = Op.getOperand(2);
2157   MVT VT = Op.getValueType();
2158
2159   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2160   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2161
2162   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2163   // Use $2 because it's always 16-byte aligned and it's available:
2164   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2165
2166   SDValue result =
2167     DAG.getNode(SPUISD::SHUFB, VT,
2168                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2169                 VecOp,
2170                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2171                             DAG.getNode(ISD::ADD, PtrVT,
2172                                         PtrBase,
2173                                         DAG.getConstant(CN->getValue(),
2174                                                         PtrVT))));
2175
2176   return result;
2177 }
2178
2179 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2180 {
2181   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2182
2183   assert(Op.getValueType() == MVT::i8);
2184   switch (Opc) {
2185   default:
2186     assert(0 && "Unhandled i8 math operator");
2187     /*NOTREACHED*/
2188     break;
2189   case ISD::SUB: {
2190     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2191     // the result:
2192     SDValue N1 = Op.getOperand(1);
2193     N0 = (N0.getOpcode() != ISD::Constant
2194           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196     N1 = (N1.getOpcode() != ISD::Constant
2197           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2198           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2199     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2200                        DAG.getNode(Opc, MVT::i16, N0, N1));
2201   }
2202   case ISD::ROTR:
2203   case ISD::ROTL: {
2204     SDValue N1 = Op.getOperand(1);
2205     unsigned N1Opc;
2206     N0 = (N0.getOpcode() != ISD::Constant
2207           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2208           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2209     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2210     N1 = (N1.getOpcode() != ISD::Constant
2211           ? DAG.getNode(N1Opc, MVT::i16, N1)
2212           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2213     SDValue ExpandArg =
2214       DAG.getNode(ISD::OR, MVT::i16, N0,
2215                   DAG.getNode(ISD::SHL, MVT::i16,
2216                               N0, DAG.getConstant(8, MVT::i16)));
2217     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2219   }
2220   case ISD::SRL:
2221   case ISD::SHL: {
2222     SDValue N1 = Op.getOperand(1);
2223     unsigned N1Opc;
2224     N0 = (N0.getOpcode() != ISD::Constant
2225           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2226           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2228     N1 = (N1.getOpcode() != ISD::Constant
2229           ? DAG.getNode(N1Opc, MVT::i16, N1)
2230           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232                        DAG.getNode(Opc, MVT::i16, N0, N1));
2233   }
2234   case ISD::SRA: {
2235     SDValue N1 = Op.getOperand(1);
2236     unsigned N1Opc;
2237     N0 = (N0.getOpcode() != ISD::Constant
2238           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2241     N1 = (N1.getOpcode() != ISD::Constant
2242           ? DAG.getNode(N1Opc, MVT::i16, N1)
2243           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245                        DAG.getNode(Opc, MVT::i16, N0, N1));
2246   }
2247   case ISD::MUL: {
2248     SDValue N1 = Op.getOperand(1);
2249     unsigned N1Opc;
2250     N0 = (N0.getOpcode() != ISD::Constant
2251           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2252           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2253     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2254     N1 = (N1.getOpcode() != ISD::Constant
2255           ? DAG.getNode(N1Opc, MVT::i16, N1)
2256           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2257     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2258                        DAG.getNode(Opc, MVT::i16, N0, N1));
2259     break;
2260   }
2261   }
2262
2263   return SDValue();
2264 }
2265
2266 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2267 {
2268   MVT VT = Op.getValueType();
2269   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2270
2271   SDValue Op0 = Op.getOperand(0);
2272
2273   switch (Opc) {
2274   case ISD::ZERO_EXTEND:
2275   case ISD::SIGN_EXTEND:
2276   case ISD::ANY_EXTEND: {
2277     MVT Op0VT = Op0.getValueType();
2278     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2279
2280     assert(Op0VT == MVT::i32
2281            && "CellSPU: Zero/sign extending something other than i32");
2282     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2283
2284     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2285                       ? SPUISD::ROTBYTES_RIGHT_S
2286                       : SPUISD::ROTQUAD_RZ_BYTES);
2287     SDValue PromoteScalar =
2288       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2289
2290     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2291                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2292                                    DAG.getNode(NewOpc, Op0VecVT,
2293                                                PromoteScalar,
2294                                                DAG.getConstant(4, MVT::i32))));
2295   }
2296
2297   case ISD::ADD: {
2298     // Turn operands into vectors to satisfy type checking (shufb works on
2299     // vectors)
2300     SDValue Op0 =
2301       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2302     SDValue Op1 =
2303       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2304     SmallVector<SDValue, 16> ShufBytes;
2305
2306     // Create the shuffle mask for "rotating" the borrow up one register slot
2307     // once the borrow is generated.
2308     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2309     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2310     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2311     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2312
2313     SDValue CarryGen =
2314       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2315     SDValue ShiftedCarry =
2316       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2317                   CarryGen, CarryGen,
2318                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2319                               &ShufBytes[0], ShufBytes.size()));
2320
2321     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2322                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2323                                    Op0, Op1, ShiftedCarry));
2324   }
2325
2326   case ISD::SUB: {
2327     // Turn operands into vectors to satisfy type checking (shufb works on
2328     // vectors)
2329     SDValue Op0 =
2330       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2331     SDValue Op1 =
2332       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2333     SmallVector<SDValue, 16> ShufBytes;
2334
2335     // Create the shuffle mask for "rotating" the borrow up one register slot
2336     // once the borrow is generated.
2337     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2338     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2339     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2340     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2341
2342     SDValue BorrowGen =
2343       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2344     SDValue ShiftedBorrow =
2345       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2346                   BorrowGen, BorrowGen,
2347                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2348                               &ShufBytes[0], ShufBytes.size()));
2349
2350     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2351                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2352                                    Op0, Op1, ShiftedBorrow));
2353   }
2354
2355   case ISD::SHL: {
2356     SDValue ShiftAmt = Op.getOperand(1);
2357     MVT ShiftAmtVT = ShiftAmt.getValueType();
2358     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2359     SDValue MaskLower =
2360       DAG.getNode(SPUISD::SELB, VecVT,
2361                   Op0Vec,
2362                   DAG.getConstant(0, VecVT),
2363                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2364                               DAG.getConstant(0xff00ULL, MVT::i16)));
2365     SDValue ShiftAmtBytes =
2366       DAG.getNode(ISD::SRL, ShiftAmtVT,
2367                   ShiftAmt,
2368                   DAG.getConstant(3, ShiftAmtVT));
2369     SDValue ShiftAmtBits =
2370       DAG.getNode(ISD::AND, ShiftAmtVT,
2371                   ShiftAmt,
2372                   DAG.getConstant(7, ShiftAmtVT));
2373
2374     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2375                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2376                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2377                                                MaskLower, ShiftAmtBytes),
2378                                    ShiftAmtBits));
2379   }
2380
2381   case ISD::SRL: {
2382     MVT VT = Op.getValueType();
2383     SDValue ShiftAmt = Op.getOperand(1);
2384     MVT ShiftAmtVT = ShiftAmt.getValueType();
2385     SDValue ShiftAmtBytes =
2386       DAG.getNode(ISD::SRL, ShiftAmtVT,
2387                   ShiftAmt,
2388                   DAG.getConstant(3, ShiftAmtVT));
2389     SDValue ShiftAmtBits =
2390       DAG.getNode(ISD::AND, ShiftAmtVT,
2391                   ShiftAmt,
2392                   DAG.getConstant(7, ShiftAmtVT));
2393
2394     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2395                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2396                                    Op0, ShiftAmtBytes),
2397                        ShiftAmtBits);
2398   }
2399
2400   case ISD::SRA: {
2401     // Promote Op0 to vector
2402     SDValue Op0 =
2403       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2404     SDValue ShiftAmt = Op.getOperand(1);
2405     MVT ShiftVT = ShiftAmt.getValueType();
2406
2407     // Negate variable shift amounts
2408     if (!isa<ConstantSDNode>(ShiftAmt)) {
2409       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2410                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2411     }
2412
2413     SDValue UpperHalfSign =
2414       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2415                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2416                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2417                                           Op0, DAG.getConstant(31, MVT::i32))));
2418     SDValue UpperHalfSignMask =
2419       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2420     SDValue UpperLowerMask =
2421       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2422                   DAG.getConstant(0xff00, MVT::i16));
2423     SDValue UpperLowerSelect =
2424       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2425                   UpperHalfSignMask, Op0, UpperLowerMask);
2426     SDValue RotateLeftBytes =
2427       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2428                   UpperLowerSelect, ShiftAmt);
2429     SDValue RotateLeftBits =
2430       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2431                   RotateLeftBytes, ShiftAmt);
2432
2433     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2434                        RotateLeftBits);
2435   }
2436   }
2437
2438   return SDValue();
2439 }
2440
2441 //! Lower byte immediate operations for v16i8 vectors:
2442 static SDValue
2443 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2444   SDValue ConstVec;
2445   SDValue Arg;
2446   MVT VT = Op.getValueType();
2447
2448   ConstVec = Op.getOperand(0);
2449   Arg = Op.getOperand(1);
2450   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2451     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2452       ConstVec = ConstVec.getOperand(0);
2453     } else {
2454       ConstVec = Op.getOperand(1);
2455       Arg = Op.getOperand(0);
2456       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2457         ConstVec = ConstVec.getOperand(0);
2458       }
2459     }
2460   }
2461
2462   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2463     uint64_t VectorBits[2];
2464     uint64_t UndefBits[2];
2465     uint64_t SplatBits, SplatUndef;
2466     int SplatSize;
2467
2468     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2469         && isConstantSplat(VectorBits, UndefBits,
2470                            VT.getVectorElementType().getSizeInBits(),
2471                            SplatBits, SplatUndef, SplatSize)) {
2472       SDValue tcVec[16];
2473       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2474       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2475
2476       // Turn the BUILD_VECTOR into a set of target constants:
2477       for (size_t i = 0; i < tcVecSize; ++i)
2478         tcVec[i] = tc;
2479
2480       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2481                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2482     }
2483   }
2484   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2485   // lowered.  Return the operation, rather than a null SDValue.
2486   return Op;
2487 }
2488
2489 //! Lower i32 multiplication
2490 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2491                           unsigned Opc) {
2492   switch (VT.getSimpleVT()) {
2493   default:
2494     cerr << "CellSPU: Unknown LowerMUL value type, got "
2495          << Op.getValueType().getMVTString()
2496          << "\n";
2497     abort();
2498     /*NOTREACHED*/
2499
2500   case MVT::i32: {
2501     SDValue rA = Op.getOperand(0);
2502     SDValue rB = Op.getOperand(1);
2503
2504     return DAG.getNode(ISD::ADD, MVT::i32,
2505                        DAG.getNode(ISD::ADD, MVT::i32,
2506                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2507                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2508                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2509   }
2510   }
2511
2512   return SDValue();
2513 }
2514
2515 //! Custom lowering for CTPOP (count population)
2516 /*!
2517   Custom lowering code that counts the number ones in the input
2518   operand. SPU has such an instruction, but it counts the number of
2519   ones per byte, which then have to be accumulated.
2520 */
2521 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2522   MVT VT = Op.getValueType();
2523   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2524
2525   switch (VT.getSimpleVT()) {
2526   default:
2527     assert(false && "Invalid value type!");
2528   case MVT::i8: {
2529     SDValue N = Op.getOperand(0);
2530     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2531
2532     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2533     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2534
2535     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2536   }
2537
2538   case MVT::i16: {
2539     MachineFunction &MF = DAG.getMachineFunction();
2540     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2541
2542     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2543
2544     SDValue N = Op.getOperand(0);
2545     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2546     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2547     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2548
2549     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2550     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2551
2552     // CNTB_result becomes the chain to which all of the virtual registers
2553     // CNTB_reg, SUM1_reg become associated:
2554     SDValue CNTB_result =
2555       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2556
2557     SDValue CNTB_rescopy =
2558       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2559
2560     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2561
2562     return DAG.getNode(ISD::AND, MVT::i16,
2563                        DAG.getNode(ISD::ADD, MVT::i16,
2564                                    DAG.getNode(ISD::SRL, MVT::i16,
2565                                                Tmp1, Shift1),
2566                                    Tmp1),
2567                        Mask0);
2568   }
2569
2570   case MVT::i32: {
2571     MachineFunction &MF = DAG.getMachineFunction();
2572     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2573
2574     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2575     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2576
2577     SDValue N = Op.getOperand(0);
2578     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2579     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2580     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2581     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2582
2583     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2584     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2585
2586     // CNTB_result becomes the chain to which all of the virtual registers
2587     // CNTB_reg, SUM1_reg become associated:
2588     SDValue CNTB_result =
2589       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2590
2591     SDValue CNTB_rescopy =
2592       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2593
2594     SDValue Comp1 =
2595       DAG.getNode(ISD::SRL, MVT::i32,
2596                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2597
2598     SDValue Sum1 =
2599       DAG.getNode(ISD::ADD, MVT::i32,
2600                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2601
2602     SDValue Sum1_rescopy =
2603       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2604
2605     SDValue Comp2 =
2606       DAG.getNode(ISD::SRL, MVT::i32,
2607                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2608                   Shift2);
2609     SDValue Sum2 =
2610       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2611                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2612
2613     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2614   }
2615
2616   case MVT::i64:
2617     break;
2618   }
2619
2620   return SDValue();
2621 }
2622
2623 /// LowerOperation - Provide custom lowering hooks for some operations.
2624 ///
2625 SDValue
2626 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2627 {
2628   unsigned Opc = (unsigned) Op.getOpcode();
2629   MVT VT = Op.getValueType();
2630
2631   switch (Opc) {
2632   default: {
2633     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2634     cerr << "Op.getOpcode() = " << Opc << "\n";
2635     cerr << "*Op.getNode():\n";
2636     Op.getNode()->dump();
2637     abort();
2638   }
2639   case ISD::LOAD:
2640   case ISD::SEXTLOAD:
2641   case ISD::ZEXTLOAD:
2642     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2643   case ISD::STORE:
2644     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2645   case ISD::ConstantPool:
2646     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2647   case ISD::GlobalAddress:
2648     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2649   case ISD::JumpTable:
2650     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2651   case ISD::Constant:
2652     return LowerConstant(Op, DAG);
2653   case ISD::ConstantFP:
2654     return LowerConstantFP(Op, DAG);
2655   case ISD::BRCOND:
2656     return LowerBRCOND(Op, DAG);
2657   case ISD::FORMAL_ARGUMENTS:
2658     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2659   case ISD::CALL:
2660     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2661   case ISD::RET:
2662     return LowerRET(Op, DAG, getTargetMachine());
2663
2664
2665   // i8, i64 math ops:
2666   case ISD::ZERO_EXTEND:
2667   case ISD::SIGN_EXTEND:
2668   case ISD::ANY_EXTEND:
2669   case ISD::ADD:
2670   case ISD::SUB:
2671   case ISD::ROTR:
2672   case ISD::ROTL:
2673   case ISD::SRL:
2674   case ISD::SHL:
2675   case ISD::SRA: {
2676     if (VT == MVT::i8)
2677       return LowerI8Math(Op, DAG, Opc);
2678     else if (VT == MVT::i64)
2679       return LowerI64Math(Op, DAG, Opc);
2680     break;
2681   }
2682
2683   // Vector-related lowering.
2684   case ISD::BUILD_VECTOR:
2685     return LowerBUILD_VECTOR(Op, DAG);
2686   case ISD::SCALAR_TO_VECTOR:
2687     return LowerSCALAR_TO_VECTOR(Op, DAG);
2688   case ISD::VECTOR_SHUFFLE:
2689     return LowerVECTOR_SHUFFLE(Op, DAG);
2690   case ISD::EXTRACT_VECTOR_ELT:
2691     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2692   case ISD::INSERT_VECTOR_ELT:
2693     return LowerINSERT_VECTOR_ELT(Op, DAG);
2694
2695   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2696   case ISD::AND:
2697   case ISD::OR:
2698   case ISD::XOR:
2699     return LowerByteImmed(Op, DAG);
2700
2701   // Vector and i8 multiply:
2702   case ISD::MUL:
2703     if (VT.isVector())
2704       return LowerVectorMUL(Op, DAG);
2705     else if (VT == MVT::i8)
2706       return LowerI8Math(Op, DAG, Opc);
2707     else
2708       return LowerMUL(Op, DAG, VT, Opc);
2709
2710   case ISD::FDIV:
2711     if (VT == MVT::f32 || VT == MVT::v4f32)
2712       return LowerFDIVf32(Op, DAG);
2713 //    else if (Op.getValueType() == MVT::f64)
2714 //      return LowerFDIVf64(Op, DAG);
2715     else
2716       assert(0 && "Calling FDIV on unsupported MVT");
2717
2718   case ISD::CTPOP:
2719     return LowerCTPOP(Op, DAG);
2720   }
2721
2722   return SDValue();
2723 }
2724
2725 //===----------------------------------------------------------------------===//
2726 // Target Optimization Hooks
2727 //===----------------------------------------------------------------------===//
2728
2729 SDValue
2730 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2731 {
2732 #if 0
2733   TargetMachine &TM = getTargetMachine();
2734 #endif
2735   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2736   SelectionDAG &DAG = DCI.DAG;
2737   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2738   SDValue Result;                     // Initially, NULL result
2739
2740   switch (N->getOpcode()) {
2741   default: break;
2742   case ISD::ADD: {
2743     SDValue Op1 = N->getOperand(1);
2744
2745     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2746       SDValue Op01 = Op0.getOperand(1);
2747       if (Op01.getOpcode() == ISD::Constant
2748           || Op01.getOpcode() == ISD::TargetConstant) {
2749         // (add <const>, (SPUindirect <arg>, <const>)) ->
2750         // (SPUindirect <arg>, <const + const>)
2751         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2752         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2753         SDValue combinedConst =
2754           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2755                           Op0.getValueType());
2756
2757         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2758                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2759         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2760                    << CN0->getValue() + CN1->getValue() << ")\n");
2761         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2762                            Op0.getOperand(0), combinedConst);
2763       }
2764     } else if (isa<ConstantSDNode>(Op0)
2765                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2766       SDValue Op11 = Op1.getOperand(1);
2767       if (Op11.getOpcode() == ISD::Constant
2768           || Op11.getOpcode() == ISD::TargetConstant) {
2769         // (add (SPUindirect <arg>, <const>), <const>) ->
2770         // (SPUindirect <arg>, <const + const>)
2771         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2772         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2773         SDValue combinedConst =
2774           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2775                           Op0.getValueType());
2776
2777         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2778                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2779         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2780                    << CN0->getValue() + CN1->getValue() << ")\n");
2781
2782         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2783                            Op1.getOperand(0), combinedConst);
2784       }
2785     }
2786     break;
2787   }
2788   case ISD::SIGN_EXTEND:
2789   case ISD::ZERO_EXTEND:
2790   case ISD::ANY_EXTEND: {
2791     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2792         N->getValueType(0) == Op0.getValueType()) {
2793       // (any_extend (SPUextract_elt0 <arg>)) ->
2794       // (SPUextract_elt0 <arg>)
2795       // Types must match, however...
2796       DEBUG(cerr << "Replace: ");
2797       DEBUG(N->dump(&DAG));
2798       DEBUG(cerr << "\nWith:    ");
2799       DEBUG(Op0.getNode()->dump(&DAG));
2800       DEBUG(cerr << "\n");
2801
2802       return Op0;
2803     }
2804     break;
2805   }
2806   case SPUISD::IndirectAddr: {
2807     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2808       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2809       if (CN->getValue() == 0) {
2810         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2811         // (SPUaform <addr>, 0)
2812
2813         DEBUG(cerr << "Replace: ");
2814         DEBUG(N->dump(&DAG));
2815         DEBUG(cerr << "\nWith:    ");
2816         DEBUG(Op0.getNode()->dump(&DAG));
2817         DEBUG(cerr << "\n");
2818
2819         return Op0;
2820       }
2821     }
2822     break;
2823   }
2824   case SPUISD::SHLQUAD_L_BITS:
2825   case SPUISD::SHLQUAD_L_BYTES:
2826   case SPUISD::VEC_SHL:
2827   case SPUISD::VEC_SRL:
2828   case SPUISD::VEC_SRA:
2829   case SPUISD::ROTQUAD_RZ_BYTES:
2830   case SPUISD::ROTQUAD_RZ_BITS: {
2831     SDValue Op1 = N->getOperand(1);
2832
2833     if (isa<ConstantSDNode>(Op1)) {
2834       // Kill degenerate vector shifts:
2835       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2836
2837       if (CN->getValue() == 0) {
2838         Result = Op0;
2839       }
2840     }
2841     break;
2842   }
2843   case SPUISD::PROMOTE_SCALAR: {
2844     switch (Op0.getOpcode()) {
2845     default:
2846       break;
2847     case ISD::ANY_EXTEND:
2848     case ISD::ZERO_EXTEND:
2849     case ISD::SIGN_EXTEND: {
2850       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2851       // <arg>
2852       // but only if the SPUpromote_scalar and <arg> types match.
2853       SDValue Op00 = Op0.getOperand(0);
2854       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2855         SDValue Op000 = Op00.getOperand(0);
2856         if (Op000.getValueType() == N->getValueType(0)) {
2857           Result = Op000;
2858         }
2859       }
2860       break;
2861     }
2862     case SPUISD::EXTRACT_ELT0: {
2863       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2864       // <arg>
2865       Result = Op0.getOperand(0);
2866       break;
2867     }
2868     }
2869     break;
2870   }
2871   }
2872   // Otherwise, return unchanged.
2873 #if 1
2874   if (Result.getNode()) {
2875     DEBUG(cerr << "\nReplace.SPU: ");
2876     DEBUG(N->dump(&DAG));
2877     DEBUG(cerr << "\nWith:        ");
2878     DEBUG(Result.getNode()->dump(&DAG));
2879     DEBUG(cerr << "\n");
2880   }
2881 #endif
2882
2883   return Result;
2884 }
2885
2886 //===----------------------------------------------------------------------===//
2887 // Inline Assembly Support
2888 //===----------------------------------------------------------------------===//
2889
2890 /// getConstraintType - Given a constraint letter, return the type of
2891 /// constraint it is for this target.
2892 SPUTargetLowering::ConstraintType
2893 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2894   if (ConstraintLetter.size() == 1) {
2895     switch (ConstraintLetter[0]) {
2896     default: break;
2897     case 'b':
2898     case 'r':
2899     case 'f':
2900     case 'v':
2901     case 'y':
2902       return C_RegisterClass;
2903     }
2904   }
2905   return TargetLowering::getConstraintType(ConstraintLetter);
2906 }
2907
2908 std::pair<unsigned, const TargetRegisterClass*>
2909 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2910                                                 MVT VT) const
2911 {
2912   if (Constraint.size() == 1) {
2913     // GCC RS6000 Constraint Letters
2914     switch (Constraint[0]) {
2915     case 'b':   // R1-R31
2916     case 'r':   // R0-R31
2917       if (VT == MVT::i64)
2918         return std::make_pair(0U, SPU::R64CRegisterClass);
2919       return std::make_pair(0U, SPU::R32CRegisterClass);
2920     case 'f':
2921       if (VT == MVT::f32)
2922         return std::make_pair(0U, SPU::R32FPRegisterClass);
2923       else if (VT == MVT::f64)
2924         return std::make_pair(0U, SPU::R64FPRegisterClass);
2925       break;
2926     case 'v':
2927       return std::make_pair(0U, SPU::GPRCRegisterClass);
2928     }
2929   }
2930
2931   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2932 }
2933
2934 //! Compute used/known bits for a SPU operand
2935 void
2936 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2937                                                   const APInt &Mask,
2938                                                   APInt &KnownZero,
2939                                                   APInt &KnownOne,
2940                                                   const SelectionDAG &DAG,
2941                                                   unsigned Depth ) const {
2942 #if 0
2943   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2944 #endif
2945
2946   switch (Op.getOpcode()) {
2947   default:
2948     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2949     break;
2950
2951 #if 0
2952   case CALL:
2953   case SHUFB:
2954   case INSERT_MASK:
2955   case CNTB:
2956 #endif
2957
2958   case SPUISD::PROMOTE_SCALAR: {
2959     SDValue Op0 = Op.getOperand(0);
2960     MVT Op0VT = Op0.getValueType();
2961     unsigned Op0VTBits = Op0VT.getSizeInBits();
2962     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2963     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2964     KnownOne |= APInt(Op0VTBits, InMask, false);
2965     break;
2966   }
2967
2968   case SPUISD::LDRESULT:
2969   case SPUISD::EXTRACT_ELT0:
2970   case SPUISD::EXTRACT_ELT0_CHAINED: {
2971     MVT OpVT = Op.getValueType();
2972     unsigned OpVTBits = OpVT.getSizeInBits();
2973     uint64_t InMask = OpVT.getIntegerVTBitMask();
2974     KnownZero |= APInt(OpVTBits, ~InMask, false);
2975     KnownOne |= APInt(OpVTBits, InMask, false);
2976     break;
2977   }
2978
2979 #if 0
2980   case EXTRACT_I1_ZEXT:
2981   case EXTRACT_I1_SEXT:
2982   case EXTRACT_I8_ZEXT:
2983   case EXTRACT_I8_SEXT:
2984   case MPY:
2985   case MPYU:
2986   case MPYH:
2987   case MPYHH:
2988   case SPUISD::SHLQUAD_L_BITS:
2989   case SPUISD::SHLQUAD_L_BYTES:
2990   case SPUISD::VEC_SHL:
2991   case SPUISD::VEC_SRL:
2992   case SPUISD::VEC_SRA:
2993   case SPUISD::VEC_ROTL:
2994   case SPUISD::VEC_ROTR:
2995   case SPUISD::ROTQUAD_RZ_BYTES:
2996   case SPUISD::ROTQUAD_RZ_BITS:
2997   case SPUISD::ROTBYTES_RIGHT_S:
2998   case SPUISD::ROTBYTES_LEFT:
2999   case SPUISD::ROTBYTES_LEFT_CHAINED:
3000   case SPUISD::SELECT_MASK:
3001   case SPUISD::SELB:
3002   case SPUISD::FPInterp:
3003   case SPUISD::FPRecipEst:
3004   case SPUISD::SEXT32TO64:
3005 #endif
3006   }
3007 }
3008
3009 // LowerAsmOperandForConstraint
3010 void
3011 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3012                                                 char ConstraintLetter,
3013                                                 std::vector<SDValue> &Ops,
3014                                                 SelectionDAG &DAG) const {
3015   // Default, for the time being, to the base class handler
3016   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3017 }
3018
3019 /// isLegalAddressImmediate - Return true if the integer value can be used
3020 /// as the offset of the target addressing mode.
3021 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3022   // SPU's addresses are 256K:
3023   return (V > -(1 << 18) && V < (1 << 18) - 1);
3024 }
3025
3026 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3027   return false;
3028 }