lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134 #if 0
 135   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 136   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 137   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 138   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 142   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 143 #else
 144   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 145   setLoadExtAction(ISD::SEXTLOAD,  MVT::i1, Promote);
 146   setTruncStoreAction(MVT::i8, MVT::i1, Promote);
 147 #endif
 148
 149   setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
 150   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 151   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 152   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 153   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 154   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 155   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 156   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 157
 158   setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
 159   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 160   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 161
 162   // SPU constant load actions are custom lowered:
 163   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 164   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 165   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 166
 167   // SPU's loads and stores have to be custom lowered:
 168   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 169        ++sctype) {
 170     MVT VT = (MVT::SimpleValueType)sctype;
 171
 172     setOperationAction(ISD::LOAD, VT, Custom);
 173     setOperationAction(ISD::STORE, VT, Custom);
 174   }
 175
 176   // Custom lower BRCOND for i1, i8 to "promote" the result to
 177   // i32 and i16, respectively.
 178   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 179
 180   // Expand the jumptable branches
 181   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 182   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 183   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 184
 185   // SPU has no intrinsics for these particular operations:
 186   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 187
 188   // PowerPC has no SREM/UREM instructions
 189   setOperationAction(ISD::SREM, MVT::i32, Expand);
 190   setOperationAction(ISD::UREM, MVT::i32, Expand);
 191   setOperationAction(ISD::SREM, MVT::i64, Expand);
 192   setOperationAction(ISD::UREM, MVT::i64, Expand);
 193
 194   // We don't support sin/cos/sqrt/fmod
 195   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 196   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 197   setOperationAction(ISD::FREM , MVT::f64, Expand);
 198   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 199   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 200   setOperationAction(ISD::FREM , MVT::f32, Expand);
 201
 202   // If we're enabling GP optimizations, use hardware square root
 203   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 204   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 205
 206   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 207   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 208
 209   // SPU can do rotate right and left, so legalize it... but customize for i8
 210   // because instructions don't exist.
 211
 212   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 213   //        .td files.
 214   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 215   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 216   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 217
 218   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 219   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 220   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 221   // SPU has no native version of shift left/right for i8
 222   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 223   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 224   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 225   // And SPU needs custom lowering for shift left/right for i64
 226   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 227   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 228   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 229
 230   // Custom lower i8, i32 and i64 multiplications
 231   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 232   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 233   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 234
 235   // Need to custom handle (some) common i8, i64 math ops
 236   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 237   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 238   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 239
 240   // SPU does not have BSWAP. It does have i32 support CTLZ.
 241   // CTPOP has to be custom lowered.
 242   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 243   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 244
 245   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 246   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 247   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 248   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 249
 250   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 251   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 252
 253   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 254
 255   // SPU has a version of select that implements (a&~c)|(b&c), just like
 256   // select ought to work:
 257   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 258   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 259   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 260   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 261   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 262
 263   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 264   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 265   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 266   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 267   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 268
 269   // Zero extension and sign extension for i64 have to be
 270   // custom legalized
 271   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 272   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 273   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 274
 275   // SPU has a legal FP -> signed INT instruction
 276   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 277   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 278   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 279   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 280
 281   // FDIV on SPU requires custom lowering
 282   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 283   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 284
 285   // SPU has [U|S]INT_TO_FP
 286   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 287   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 288   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 289   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 290   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 291   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 292   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 293   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 294
 295   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 296   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 297   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 298   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 299
 300   // We cannot sextinreg(i1).  Expand to shifts.
 301   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 302
 303   // Support label based line numbers.
 304   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 305   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 306
 307   // We want to legalize GlobalAddress and ConstantPool nodes into the
 308   // appropriate instructions to materialize the address.
 309   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 310        ++sctype) {
 311     MVT VT = (MVT::SimpleValueType)sctype;
 312
 313     setOperationAction(ISD::GlobalAddress, VT, Custom);
 314     setOperationAction(ISD::ConstantPool,  VT, Custom);
 315     setOperationAction(ISD::JumpTable,     VT, Custom);
 316   }
 317
 318   // RET must be custom lowered, to meet ABI requirements
 319   setOperationAction(ISD::RET,           MVT::Other, Custom);
 320
 321   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 322   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 323
 324   // Use the default implementation.
 325   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 326   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 327   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 328   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 329   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 331   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 332
 333   // Cell SPU has instructions for converting between i64 and fp.
 334   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 335   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 336
 337   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 338   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 339
 340   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 341   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 342
 343   // First set operation action for all vector types to expand. Then we
 344   // will selectively turn on ones that can be effectively codegen'd.
 345   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 346   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 347   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 348   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 349   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 350   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 351
 352   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 353        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 354     MVT VT = (MVT::SimpleValueType)i;
 355
 356     // add/sub are legal for all supported vector VT's.
 357     setOperationAction(ISD::ADD , VT, Legal);
 358     setOperationAction(ISD::SUB , VT, Legal);
 359     // mul has to be custom lowered.
 360     setOperationAction(ISD::MUL , VT, Custom);
 361
 362     setOperationAction(ISD::AND   , VT, Legal);
 363     setOperationAction(ISD::OR    , VT, Legal);
 364     setOperationAction(ISD::XOR   , VT, Legal);
 365     setOperationAction(ISD::LOAD  , VT, Legal);
 366     setOperationAction(ISD::SELECT, VT, Legal);
 367     setOperationAction(ISD::STORE,  VT, Legal);
 368
 369     // These operations need to be expanded:
 370     setOperationAction(ISD::SDIV, VT, Expand);
 371     setOperationAction(ISD::SREM, VT, Expand);
 372     setOperationAction(ISD::UDIV, VT, Expand);
 373     setOperationAction(ISD::UREM, VT, Expand);
 374     setOperationAction(ISD::FDIV, VT, Custom);
 375
 376     // Custom lower build_vector, constant pool spills, insert and
 377     // extract vector elements:
 378     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 379     setOperationAction(ISD::ConstantPool, VT, Custom);
 380     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 381     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 382     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 383     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 384   }
 385
 386   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 387   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 388   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 389   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 390   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 391
 392   setShiftAmountType(MVT::i32);
 393   setSetCCResultContents(ZeroOrOneSetCCResult);
 394
 395   setStackPointerRegisterToSaveRestore(SPU::R1);
 396
 397   // We have target-specific dag combine patterns for the following nodes:
 398   setTargetDAGCombine(ISD::ADD);
 399   setTargetDAGCombine(ISD::ZERO_EXTEND);
 400   setTargetDAGCombine(ISD::SIGN_EXTEND);
 401   setTargetDAGCombine(ISD::ANY_EXTEND);
 402
 403   computeRegisterProperties();
 404 }
 405
 406 const char *
 407 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 408 {
 409   if (node_names.empty()) {
 410     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 411     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 412     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 413     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 414     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 415     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 416     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 417     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 418     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 419     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 420     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 421     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 422     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 423     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 424                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 425     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 426     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 427     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 428     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 429     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 430     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 431     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 432     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 433     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 434     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 435     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 436     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 437     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 438     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 439     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 440     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 441       "SPUISD::ROTQUAD_RZ_BYTES";
 442     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 443       "SPUISD::ROTQUAD_RZ_BITS";
 444     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 445       "SPUISD::ROTBYTES_RIGHT_S";
 446     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 447     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 448       "SPUISD::ROTBYTES_LEFT_CHAINED";
 449     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 450       "SPUISD::ROTBYTES_LEFT_BITS";
 451     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 452     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 453     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 454     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 455     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 456     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 457     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 458     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 459     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 460   }
 461
 462   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 463
 464   return ((i != node_names.end()) ? i->second : 0);
 465 }
 466
 467 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 468   MVT VT = Op.getValueType();
 469   return (VT.isInteger() ? VT : MVT(MVT::i32));
 470 }
 471
 472 //===----------------------------------------------------------------------===//
 473 // Calling convention code:
 474 //===----------------------------------------------------------------------===//
 475
 476 #include "SPUGenCallingConv.inc"
 477
 478 //===----------------------------------------------------------------------===//
 479 //  LowerOperation implementation
 480 //===----------------------------------------------------------------------===//
 481
 482 /// Aligned load common code for CellSPU
 483 /*!
 484   \param[in] Op The SelectionDAG load or store operand
 485   \param[in] DAG The selection DAG
 486   \param[in] ST CellSPU subtarget information structure
 487   \param[in,out] alignment Caller initializes this to the load or store node's
 488   value from getAlignment(), may be updated while generating the aligned load
 489   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 490   offset (divisible by 16, modulo 16 == 0)
 491   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 492   offset of the preferred slot (modulo 16 != 0)
 493   \param[in,out] VT Caller initializes this value type to the the load or store
 494   node's loaded or stored value type; may be updated if an i1-extended load or
 495   store.
 496   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 497   otherwise false. Can help to determine if the chunk needs to be rotated.
 498
 499  Both load and store lowering load a block of data aligned on a 16-byte
 500  boundary. This is the common aligned load code shared between both.
 501  */
 502 static SDValue
 503 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 504             LSBaseSDNode *LSN,
 505             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 506             MVT &VT, bool &was16aligned)
 507 {
 508   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 509   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 510   SDValue basePtr = LSN->getBasePtr();
 511   SDValue chain = LSN->getChain();
 512
 513   if (basePtr.getOpcode() == ISD::ADD) {
 514     SDValue Op1 = basePtr.getNode()->getOperand(1);
 515
 516     if (Op1.getOpcode() == ISD::Constant
 517         || Op1.getOpcode() == ISD::TargetConstant) {
 518       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 519
 520       alignOffs = (int) CN->getZExtValue();
 521       prefSlotOffs = (int) (alignOffs & 0xf);
 522
 523       // Adjust the rotation amount to ensure that the final result ends up in
 524       // the preferred slot:
 525       prefSlotOffs -= vtm->prefslot_byte;
 526       basePtr = basePtr.getOperand(0);
 527
 528       // Loading from memory, can we adjust alignment?
 529       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 530         SDValue APtr = basePtr.getOperand(0);
 531         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 532           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 533           alignment = GSDN->getGlobal()->getAlignment();
 534         }
 535       }
 536     } else {
 537       alignOffs = 0;
 538       prefSlotOffs = -vtm->prefslot_byte;
 539     }
 540   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 541     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 542     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 543     prefSlotOffs = (int) (alignOffs & 0xf);
 544     prefSlotOffs -= vtm->prefslot_byte;
 545     basePtr = DAG.getRegister(SPU::R1, VT);
 546   } else {
 547     alignOffs = 0;
 548     prefSlotOffs = -vtm->prefslot_byte;
 549   }
 550
 551   if (alignment == 16) {
 552     // Realign the base pointer as a D-Form address:
 553     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 554       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 555                             basePtr,
 556                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 557     }
 558
 559     // Emit the vector load:
 560     was16aligned = true;
 561     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 562                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 563                        LSN->isVolatile(), 16);
 564   }
 565
 566   // Unaligned load or we're using the "large memory" model, which means that
 567   // we have to be very pessimistic:
 568   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 569     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 570                           DAG.getConstant(0, PtrVT));
 571   }
 572
 573   // Add the offset
 574   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 575                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 576   was16aligned = false;
 577   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 578                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 579                      LSN->isVolatile(), 16);
 580 }
 581
 582 /// Custom lower loads for CellSPU
 583 /*!
 584  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 585  within a 16-byte block, we have to rotate to extract the requested element.
 586  */
 587 static SDValue
 588 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 589   LoadSDNode *LN = cast<LoadSDNode>(Op);
 590   SDValue the_chain = LN->getChain();
 591   MVT VT = LN->getMemoryVT();
 592   MVT OpVT = Op.getNode()->getValueType(0);
 593   ISD::LoadExtType ExtType = LN->getExtensionType();
 594   unsigned alignment = LN->getAlignment();
 595   SDValue Ops[8];
 596
 597   switch (LN->getAddressingMode()) {
 598   case ISD::UNINDEXED: {
 599     int offset, rotamt;
 600     bool was16aligned;
 601     SDValue result =
 602       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 603
 604     if (result.getNode() == 0)
 605       return result;
 606
 607     the_chain = result.getValue(1);
 608     // Rotate the chunk if necessary
 609     if (rotamt < 0)
 610       rotamt += 16;
 611     if (rotamt != 0 || !was16aligned) {
 612       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 613
 614       Ops[0] = the_chain;
 615       Ops[1] = result;
 616       if (was16aligned) {
 617         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 618       } else {
 619         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 620         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 621         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 622                              DAG.getConstant(rotamt, PtrVT));
 623       }
 624
 625       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 626       the_chain = result.getValue(1);
 627     }
 628
 629     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 630       SDVTList scalarvts;
 631       MVT vecVT = MVT::v16i8;
 632
 633       // Convert the loaded v16i8 vector to the appropriate vector type
 634       // specified by the operand:
 635       if (OpVT == VT) {
 636         if (VT != MVT::i1)
 637           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 638       } else
 639         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 640
 641       Ops[0] = the_chain;
 642       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 643       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 644       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 645       the_chain = result.getValue(1);
 646     } else {
 647       // Handle the sign and zero-extending loads for i1 and i8:
 648       unsigned NewOpC;
 649
 650       if (ExtType == ISD::SEXTLOAD) {
 651         NewOpC = (OpVT == MVT::i1
 652                   ? SPUISD::EXTRACT_I1_SEXT
 653                   : SPUISD::EXTRACT_I8_SEXT);
 654       } else {
 655         assert(ExtType == ISD::ZEXTLOAD);
 656         NewOpC = (OpVT == MVT::i1
 657                   ? SPUISD::EXTRACT_I1_ZEXT
 658                   : SPUISD::EXTRACT_I8_ZEXT);
 659       }
 660
 661       result = DAG.getNode(NewOpC, OpVT, result);
 662     }
 663
 664     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 665     SDValue retops[2] = {
 666       result,
 667       the_chain
 668     };
 669
 670     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 671                          retops, sizeof(retops) / sizeof(retops[0]));
 672     return result;
 673   }
 674   case ISD::PRE_INC:
 675   case ISD::PRE_DEC:
 676   case ISD::POST_INC:
 677   case ISD::POST_DEC:
 678   case ISD::LAST_INDEXED_MODE:
 679     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 680             "UNINDEXED\n";
 681     cerr << (unsigned) LN->getAddressingMode() << "\n";
 682     abort();
 683     /*NOTREACHED*/
 684   }
 685
 686   return SDValue();
 687 }
 688
 689 /// Custom lower stores for CellSPU
 690 /*!
 691  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 692  within a 16-byte block, we have to generate a shuffle to insert the
 693  requested element into its place, then store the resulting block.
 694  */
 695 static SDValue
 696 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 697   StoreSDNode *SN = cast<StoreSDNode>(Op);
 698   SDValue Value = SN->getValue();
 699   MVT VT = Value.getValueType();
 700   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 701   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 702   unsigned alignment = SN->getAlignment();
 703
 704   switch (SN->getAddressingMode()) {
 705   case ISD::UNINDEXED: {
 706     int chunk_offset, slot_offset;
 707     bool was16aligned;
 708
 709     // The vector type we really want to load from the 16-byte chunk, except
 710     // in the case of MVT::i1, which has to be v16i8.
 711     MVT vecVT = MVT::v16i8, stVecVT = MVT::v16i8;
 712
 713     if (StVT != MVT::i1) {
 714       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 715       vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 716     }
 717
 718     SDValue alignLoadVec =
 719       AlignedLoad(Op, DAG, ST, SN, alignment,
 720                   chunk_offset, slot_offset, VT, was16aligned);
 721
 722     if (alignLoadVec.getNode() == 0)
 723       return alignLoadVec;
 724
 725     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 726     SDValue basePtr = LN->getBasePtr();
 727     SDValue the_chain = alignLoadVec.getValue(1);
 728     SDValue theValue = SN->getValue();
 729     SDValue result;
 730
 731     if (StVT != VT
 732         && (theValue.getOpcode() == ISD::AssertZext
 733             || theValue.getOpcode() == ISD::AssertSext)) {
 734       // Drill down and get the value for zero- and sign-extended
 735       // quantities
 736       theValue = theValue.getOperand(0);
 737     }
 738
 739     chunk_offset &= 0xf;
 740
 741     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 742     SDValue insertEltPtr;
 743
 744     // If the base pointer is already a D-form address, then just create
 745     // a new D-form address with a slot offset and the orignal base pointer.
 746     // Otherwise generate a D-form address with the slot offset relative
 747     // to the stack pointer, which is always aligned.
 748     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 749     DEBUG(basePtr.getNode()->dump(&DAG));
 750     DEBUG(cerr << "\n");
 751
 752     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 753         (basePtr.getOpcode() == ISD::ADD
 754          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 755       insertEltPtr = basePtr;
 756     } else {
 757       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 758     }
 759
 760     SDValue insertEltOp =
 761             DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 762     SDValue vectorizeOp;
 763
 764 #if 0
 765     if (VT == MVT::i1 || StVT != VT) {
 766       MVT toVT = (VT != MVT::i1) ? VT : MVT::i8;
 767       if (toVT.bitsGT(VT)) {
 768         vectorizeOp = DAG.getNode(ISD::ANY_EXTEND, toVT, theValue);
 769       } else if (StVT.bitsLT(VT)) {
 770         vectorizeOp = DAG.getNode(ISD::TRUNCATE, toVT, theValue);
 771       }
 772
 773       vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, vectorizeOp);
 774     } else
 775 #endif
 776       vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
 777
 778     result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
 779                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 780
 781     result = DAG.getStore(the_chain, result, basePtr,
 782                           LN->getSrcValue(), LN->getSrcValueOffset(),
 783                           LN->isVolatile(), LN->getAlignment());
 784
 785 #ifndef NDEBUG
 786     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 787       const SDValue &currentRoot = DAG.getRoot();
 788
 789       DAG.setRoot(result);
 790       cerr << "------- CellSPU:LowerStore result:\n";
 791       DAG.dump();
 792       cerr << "-------\n";
 793       DAG.setRoot(currentRoot);
 794     }
 795 #endif
 796
 797     return result;
 798     /*UNREACHED*/
 799   }
 800   case ISD::PRE_INC:
 801   case ISD::PRE_DEC:
 802   case ISD::POST_INC:
 803   case ISD::POST_DEC:
 804   case ISD::LAST_INDEXED_MODE:
 805     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 806             "UNINDEXED\n";
 807     cerr << (unsigned) SN->getAddressingMode() << "\n";
 808     abort();
 809     /*NOTREACHED*/
 810   }
 811
 812   return SDValue();
 813 }
 814
 815 /// Generate the address of a constant pool entry.
 816 static SDValue
 817 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 818   MVT PtrVT = Op.getValueType();
 819   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 820   Constant *C = CP->getConstVal();
 821   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 822   SDValue Zero = DAG.getConstant(0, PtrVT);
 823   const TargetMachine &TM = DAG.getTarget();
 824
 825   if (TM.getRelocationModel() == Reloc::Static) {
 826     if (!ST->usingLargeMem()) {
 827       // Just return the SDValue with the constant pool address in it.
 828       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 829     } else {
 830       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 831       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 832       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 833     }
 834   }
 835
 836   assert(0 &&
 837          "LowerConstantPool: Relocation model other than static"
 838          " not supported.");
 839   return SDValue();
 840 }
 841
 842 static SDValue
 843 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 844   MVT PtrVT = Op.getValueType();
 845   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 846   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 847   SDValue Zero = DAG.getConstant(0, PtrVT);
 848   const TargetMachine &TM = DAG.getTarget();
 849
 850   if (TM.getRelocationModel() == Reloc::Static) {
 851     if (!ST->usingLargeMem()) {
 852       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 853     } else {
 854       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 855       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 856       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 857     }
 858   }
 859
 860   assert(0 &&
 861          "LowerJumpTable: Relocation model other than static not supported.");
 862   return SDValue();
 863 }
 864
 865 static SDValue
 866 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 867   MVT PtrVT = Op.getValueType();
 868   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 869   GlobalValue *GV = GSDN->getGlobal();
 870   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 871   const TargetMachine &TM = DAG.getTarget();
 872   SDValue Zero = DAG.getConstant(0, PtrVT);
 873
 874   if (TM.getRelocationModel() == Reloc::Static) {
 875     if (!ST->usingLargeMem()) {
 876       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 877     } else {
 878       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 879       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 880       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 881     }
 882   } else {
 883     cerr << "LowerGlobalAddress: Relocation model other than static not "
 884          << "supported.\n";
 885     abort();
 886     /*NOTREACHED*/
 887   }
 888
 889   return SDValue();
 890 }
 891
 892 //! Custom lower i64 integer constants
 893 /*!
 894  This code inserts all of the necessary juggling that needs to occur to load
 895  a 64-bit constant into a register.
 896  */
 897 static SDValue
 898 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 899   MVT VT = Op.getValueType();
 900   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 901
 902   if (VT == MVT::i64) {
 903     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 904     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 905                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 906   } else {
 907     cerr << "LowerConstant: unhandled constant type "
 908          << VT.getMVTString()
 909          << "\n";
 910     abort();
 911     /*NOTREACHED*/
 912   }
 913
 914   return SDValue();
 915 }
 916
 917 //! Custom lower double precision floating point constants
 918 static SDValue
 919 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 920   MVT VT = Op.getValueType();
 921   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 922
 923   assert((FP != 0) &&
 924          "LowerConstantFP: Node is not ConstantFPSDNode");
 925
 926   if (VT == MVT::f64) {
 927     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 928     return DAG.getNode(ISD::BIT_CONVERT, VT,
 929                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 930   }
 931
 932   return SDValue();
 933 }
 934
 935 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 936 static SDValue
 937 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 938 {
 939   SDValue Cond = Op.getOperand(1);
 940   MVT CondVT = Cond.getValueType();
 941   MVT CondNVT;
 942
 943   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 944     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 945     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 946                       Op.getOperand(0),
 947                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 948                       Op.getOperand(2));
 949   } else
 950     return SDValue();                // Unchanged
 951 }
 952
 953 static SDValue
 954 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 955 {
 956   MachineFunction &MF = DAG.getMachineFunction();
 957   MachineFrameInfo *MFI = MF.getFrameInfo();
 958   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 959   SmallVector<SDValue, 48> ArgValues;
 960   SDValue Root = Op.getOperand(0);
 961   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 962
 963   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 964   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 965
 966   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 967   unsigned ArgRegIdx = 0;
 968   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 969
 970   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 971
 972   // Add DAG nodes to load the arguments or copy them out of registers.
 973   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 974        ArgNo != e; ++ArgNo) {
 975     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 976     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 977     SDValue ArgVal;
 978
 979     if (ArgRegIdx < NumArgRegs) {
 980       const TargetRegisterClass *ArgRegClass;
 981
 982       switch (ObjectVT.getSimpleVT()) {
 983       default: {
 984         cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 985              << ObjectVT.getMVTString()
 986              << "\n";
 987         abort();
 988       }
 989       case MVT::i8:
 990         ArgRegClass = &SPU::R8CRegClass;
 991         break;
 992       case MVT::i16:
 993         ArgRegClass = &SPU::R16CRegClass;
 994         break;
 995       case MVT::i32:
 996         ArgRegClass = &SPU::R32CRegClass;
 997         break;
 998       case MVT::i64:
 999         ArgRegClass = &SPU::R64CRegClass;
1000         break;
1001       case MVT::f32:
1002         ArgRegClass = &SPU::R32FPRegClass;
1003         break;
1004       case MVT::f64:
1005         ArgRegClass = &SPU::R64FPRegClass;
1006         break;
1007       case MVT::v2f64:
1008       case MVT::v4f32:
1009       case MVT::v2i64:
1010       case MVT::v4i32:
1011       case MVT::v8i16:
1012       case MVT::v16i8:
1013         ArgRegClass = &SPU::VECREGRegClass;
1014         break;
1015       }
1016
1017       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1018       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1019       ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1020       ++ArgRegIdx;
1021     } else {
1022       // We need to load the argument to a virtual register if we determined
1023       // above that we ran out of physical registers of the appropriate type
1024       // or we're forced to do vararg
1025       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1026       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1027       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1028       ArgOffset += StackSlotSize;
1029     }
1030
1031     ArgValues.push_back(ArgVal);
1032     // Update the chain
1033     Root = ArgVal.getOperand(0);
1034   }
1035
1036   // vararg handling:
1037   if (isVarArg) {
1038     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1039     // We will spill (79-3)+1 registers to the stack
1040     SmallVector<SDValue, 79-3+1> MemOps;
1041
1042     // Create the frame slot
1043
1044     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1045       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1046       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1047       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1048       SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1049       Root = Store.getOperand(0);
1050       MemOps.push_back(Store);
1051
1052       // Increment address by stack slot size for the next stored argument
1053       ArgOffset += StackSlotSize;
1054     }
1055     if (!MemOps.empty())
1056       Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1057   }
1058
1059   ArgValues.push_back(Root);
1060
1061   // Return the new list of results.
1062   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1063                             ArgValues.size());
1064 }
1065
1066 /// isLSAAddress - Return the immediate to use if the specified
1067 /// value is representable as a LSA address.
1068 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1069   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1070   if (!C) return 0;
1071
1072   int Addr = C->getZExtValue();
1073   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1074       (Addr << 14 >> 14) != Addr)
1075     return 0;  // Top 14 bits have to be sext of immediate.
1076
1077   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1078 }
1079
1080 static
1081 SDValue
1082 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1083   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1084   SDValue Chain = TheCall->getChain();
1085   SDValue Callee    = TheCall->getCallee();
1086   unsigned NumOps     = TheCall->getNumArgs();
1087   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1088   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1089   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1090
1091   // Handy pointer type
1092   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1093
1094   // Accumulate how many bytes are to be pushed on the stack, including the
1095   // linkage area, and parameter passing area.  According to the SPU ABI,
1096   // we minimally need space for [LR] and [SP]
1097   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1098
1099   // Set up a copy of the stack pointer for use loading and storing any
1100   // arguments that may not fit in the registers available for argument
1101   // passing.
1102   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1103
1104   // Figure out which arguments are going to go in registers, and which in
1105   // memory.
1106   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1107   unsigned ArgRegIdx = 0;
1108
1109   // Keep track of registers passing arguments
1110   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1111   // And the arguments passed on the stack
1112   SmallVector<SDValue, 8> MemOpChains;
1113
1114   for (unsigned i = 0; i != NumOps; ++i) {
1115     SDValue Arg = TheCall->getArg(i);
1116
1117     // PtrOff will be used to store the current argument to the stack if a
1118     // register cannot be found for it.
1119     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1120     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1121
1122     switch (Arg.getValueType().getSimpleVT()) {
1123     default: assert(0 && "Unexpected ValueType for argument!");
1124     case MVT::i32:
1125     case MVT::i64:
1126     case MVT::i128:
1127       if (ArgRegIdx != NumArgRegs) {
1128         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129       } else {
1130         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131         ArgOffset += StackSlotSize;
1132       }
1133       break;
1134     case MVT::f32:
1135     case MVT::f64:
1136       if (ArgRegIdx != NumArgRegs) {
1137         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1138       } else {
1139         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1140         ArgOffset += StackSlotSize;
1141       }
1142       break;
1143     case MVT::v4f32:
1144     case MVT::v4i32:
1145     case MVT::v8i16:
1146     case MVT::v16i8:
1147       if (ArgRegIdx != NumArgRegs) {
1148         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1149       } else {
1150         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1151         ArgOffset += StackSlotSize;
1152       }
1153       break;
1154     }
1155   }
1156
1157   // Update number of stack bytes actually used, insert a call sequence start
1158   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1159   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1160                                                             true));
1161
1162   if (!MemOpChains.empty()) {
1163     // Adjust the stack pointer for the stack arguments.
1164     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1165                         &MemOpChains[0], MemOpChains.size());
1166   }
1167
1168   // Build a sequence of copy-to-reg nodes chained together with token chain
1169   // and flag operands which copy the outgoing args into the appropriate regs.
1170   SDValue InFlag;
1171   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1172     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1173                              InFlag);
1174     InFlag = Chain.getValue(1);
1175   }
1176
1177   SmallVector<SDValue, 8> Ops;
1178   unsigned CallOpc = SPUISD::CALL;
1179
1180   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1181   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1182   // node so that legalize doesn't hack it.
1183   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1184     GlobalValue *GV = G->getGlobal();
1185     MVT CalleeVT = Callee.getValueType();
1186     SDValue Zero = DAG.getConstant(0, PtrVT);
1187     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1188
1189     if (!ST->usingLargeMem()) {
1190       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1191       // style calls, otherwise, external symbols are BRASL calls. This assumes
1192       // that declared/defined symbols are in the same compilation unit and can
1193       // be reached through PC-relative jumps.
1194       //
1195       // NOTE:
1196       // This may be an unsafe assumption for JIT and really large compilation
1197       // units.
1198       if (GV->isDeclaration()) {
1199         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1200       } else {
1201         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1202       }
1203     } else {
1204       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1205       // address pairs:
1206       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1207     }
1208   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1209     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1210   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1211     // If this is an absolute destination address that appears to be a legal
1212     // local store address, use the munged value.
1213     Callee = SDValue(Dest, 0);
1214   }
1215
1216   Ops.push_back(Chain);
1217   Ops.push_back(Callee);
1218
1219   // Add argument registers to the end of the list so that they are known live
1220   // into the call.
1221   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1222     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1223                                   RegsToPass[i].second.getValueType()));
1224
1225   if (InFlag.getNode())
1226     Ops.push_back(InFlag);
1227   // Returns a chain and a flag for retval copy to use.
1228   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1229                       &Ops[0], Ops.size());
1230   InFlag = Chain.getValue(1);
1231
1232   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1233                              DAG.getIntPtrConstant(0, true), InFlag);
1234   if (TheCall->getValueType(0) != MVT::Other)
1235     InFlag = Chain.getValue(1);
1236
1237   SDValue ResultVals[3];
1238   unsigned NumResults = 0;
1239
1240   // If the call has results, copy the values out of the ret val registers.
1241   switch (TheCall->getValueType(0).getSimpleVT()) {
1242   default: assert(0 && "Unexpected ret value!");
1243   case MVT::Other: break;
1244   case MVT::i32:
1245     if (TheCall->getValueType(1) == MVT::i32) {
1246       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1247       ResultVals[0] = Chain.getValue(0);
1248       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1249                                  Chain.getValue(2)).getValue(1);
1250       ResultVals[1] = Chain.getValue(0);
1251       NumResults = 2;
1252     } else {
1253       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1254       ResultVals[0] = Chain.getValue(0);
1255       NumResults = 1;
1256     }
1257     break;
1258   case MVT::i64:
1259     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1260     ResultVals[0] = Chain.getValue(0);
1261     NumResults = 1;
1262     break;
1263   case MVT::f32:
1264   case MVT::f64:
1265     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1266                                InFlag).getValue(1);
1267     ResultVals[0] = Chain.getValue(0);
1268     NumResults = 1;
1269     break;
1270   case MVT::v2f64:
1271   case MVT::v4f32:
1272   case MVT::v4i32:
1273   case MVT::v8i16:
1274   case MVT::v16i8:
1275     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1276                                    InFlag).getValue(1);
1277     ResultVals[0] = Chain.getValue(0);
1278     NumResults = 1;
1279     break;
1280   }
1281
1282   // If the function returns void, just return the chain.
1283   if (NumResults == 0)
1284     return Chain;
1285
1286   // Otherwise, merge everything together with a MERGE_VALUES node.
1287   ResultVals[NumResults++] = Chain;
1288   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1289   return Res.getValue(Op.getResNo());
1290 }
1291
1292 static SDValue
1293 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1294   SmallVector<CCValAssign, 16> RVLocs;
1295   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1296   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1297   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1298   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1299
1300   // If this is the first return lowered for this function, add the regs to the
1301   // liveout set for the function.
1302   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1303     for (unsigned i = 0; i != RVLocs.size(); ++i)
1304       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1305   }
1306
1307   SDValue Chain = Op.getOperand(0);
1308   SDValue Flag;
1309
1310   // Copy the result values into the output registers.
1311   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1312     CCValAssign &VA = RVLocs[i];
1313     assert(VA.isRegLoc() && "Can only return in registers!");
1314     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1315     Flag = Chain.getValue(1);
1316   }
1317
1318   if (Flag.getNode())
1319     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1320   else
1321     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1322 }
1323
1324
1325 //===----------------------------------------------------------------------===//
1326 // Vector related lowering:
1327 //===----------------------------------------------------------------------===//
1328
1329 static ConstantSDNode *
1330 getVecImm(SDNode *N) {
1331   SDValue OpVal(0, 0);
1332
1333   // Check to see if this buildvec has a single non-undef value in its elements.
1334   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1335     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1336     if (OpVal.getNode() == 0)
1337       OpVal = N->getOperand(i);
1338     else if (OpVal != N->getOperand(i))
1339       return 0;
1340   }
1341
1342   if (OpVal.getNode() != 0) {
1343     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1344       return CN;
1345     }
1346   }
1347
1348   return 0; // All UNDEF: use implicit def.; not Constant node
1349 }
1350
1351 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1352 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1353 /// constant
1354 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1355                               MVT ValueType) {
1356   if (ConstantSDNode *CN = getVecImm(N)) {
1357     uint64_t Value = CN->getZExtValue();
1358     if (ValueType == MVT::i64) {
1359       uint64_t UValue = CN->getZExtValue();
1360       uint32_t upper = uint32_t(UValue >> 32);
1361       uint32_t lower = uint32_t(UValue);
1362       if (upper != lower)
1363         return SDValue();
1364       Value = Value >> 32;
1365     }
1366     if (Value <= 0x3ffff)
1367       return DAG.getTargetConstant(Value, ValueType);
1368   }
1369
1370   return SDValue();
1371 }
1372
1373 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 16-bit constant, and if so, return the
1375 /// constant
1376 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1377                               MVT ValueType) {
1378   if (ConstantSDNode *CN = getVecImm(N)) {
1379     int64_t Value = CN->getSExtValue();
1380     if (ValueType == MVT::i64) {
1381       uint64_t UValue = CN->getZExtValue();
1382       uint32_t upper = uint32_t(UValue >> 32);
1383       uint32_t lower = uint32_t(UValue);
1384       if (upper != lower)
1385         return SDValue();
1386       Value = Value >> 32;
1387     }
1388     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1389       return DAG.getTargetConstant(Value, ValueType);
1390     }
1391   }
1392
1393   return SDValue();
1394 }
1395
1396 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1397 /// and the value fits into a signed 10-bit constant, and if so, return the
1398 /// constant
1399 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1400                               MVT ValueType) {
1401   if (ConstantSDNode *CN = getVecImm(N)) {
1402     int64_t Value = CN->getSExtValue();
1403     if (ValueType == MVT::i64) {
1404       uint64_t UValue = CN->getZExtValue();
1405       uint32_t upper = uint32_t(UValue >> 32);
1406       uint32_t lower = uint32_t(UValue);
1407       if (upper != lower)
1408         return SDValue();
1409       Value = Value >> 32;
1410     }
1411     if (isS10Constant(Value))
1412       return DAG.getTargetConstant(Value, ValueType);
1413   }
1414
1415   return SDValue();
1416 }
1417
1418 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 8-bit constant, and if so, return the
1420 /// constant.
1421 ///
1422 /// @note: The incoming vector is v16i8 because that's the only way we can load
1423 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1424 /// same value.
1425 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1426                              MVT ValueType) {
1427   if (ConstantSDNode *CN = getVecImm(N)) {
1428     int Value = (int) CN->getZExtValue();
1429     if (ValueType == MVT::i16
1430         && Value <= 0xffff                 /* truncated from uint64_t */
1431         && ((short) Value >> 8) == ((short) Value & 0xff))
1432       return DAG.getTargetConstant(Value & 0xff, ValueType);
1433     else if (ValueType == MVT::i8
1434              && (Value & 0xff) == Value)
1435       return DAG.getTargetConstant(Value, ValueType);
1436   }
1437
1438   return SDValue();
1439 }
1440
1441 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into a signed 16-bit constant, and if so, return the
1443 /// constant
1444 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1445                                MVT ValueType) {
1446   if (ConstantSDNode *CN = getVecImm(N)) {
1447     uint64_t Value = CN->getZExtValue();
1448     if ((ValueType == MVT::i32
1449           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1450         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1451       return DAG.getTargetConstant(Value >> 16, ValueType);
1452   }
1453
1454   return SDValue();
1455 }
1456
1457 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1458 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1459   if (ConstantSDNode *CN = getVecImm(N)) {
1460     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1461   }
1462
1463   return SDValue();
1464 }
1465
1466 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1467 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1468   if (ConstantSDNode *CN = getVecImm(N)) {
1469     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1470   }
1471
1472   return SDValue();
1473 }
1474
1475 // If this is a vector of constants or undefs, get the bits.  A bit in
1476 // UndefBits is set if the corresponding element of the vector is an
1477 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1478 // zero.   Return true if this is not an array of constants, false if it is.
1479 //
1480 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1481                                        uint64_t UndefBits[2]) {
1482   // Start with zero'd results.
1483   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1484
1485   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1486   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1487     SDValue OpVal = BV->getOperand(i);
1488
1489     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1490     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1491
1492     uint64_t EltBits = 0;
1493     if (OpVal.getOpcode() == ISD::UNDEF) {
1494       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1495       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1496       continue;
1497     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1498       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1499     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1500       const APFloat &apf = CN->getValueAPF();
1501       EltBits = (CN->getValueType(0) == MVT::f32
1502                  ? FloatToBits(apf.convertToFloat())
1503                  : DoubleToBits(apf.convertToDouble()));
1504     } else {
1505       // Nonconstant element.
1506       return true;
1507     }
1508
1509     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1510   }
1511
1512   //printf("%llx %llx  %llx %llx\n",
1513   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1514   return false;
1515 }
1516
1517 /// If this is a splat (repetition) of a value across the whole vector, return
1518 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1519 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1520 /// SplatSize = 1 byte.
1521 static bool isConstantSplat(const uint64_t Bits128[2],
1522                             const uint64_t Undef128[2],
1523                             int MinSplatBits,
1524                             uint64_t &SplatBits, uint64_t &SplatUndef,
1525                             int &SplatSize) {
1526   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1527   // the same as the lower 64-bits, ignoring undefs.
1528   uint64_t Bits64  = Bits128[0] | Bits128[1];
1529   uint64_t Undef64 = Undef128[0] & Undef128[1];
1530   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1531   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1532   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1533   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1534
1535   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1536     if (MinSplatBits < 64) {
1537
1538       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1539       // undefs.
1540       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1541         if (MinSplatBits < 32) {
1542
1543           // If the top 16-bits are different than the lower 16-bits, ignoring
1544           // undefs, we have an i32 splat.
1545           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1546             if (MinSplatBits < 16) {
1547               // If the top 8-bits are different than the lower 8-bits, ignoring
1548               // undefs, we have an i16 splat.
1549               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1550                   == ((Bits16 >> 8) & ~Undef16)) {
1551                 // Otherwise, we have an 8-bit splat.
1552                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1553                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1554                 SplatSize = 1;
1555                 return true;
1556               }
1557             } else {
1558               SplatBits = Bits16;
1559               SplatUndef = Undef16;
1560               SplatSize = 2;
1561               return true;
1562             }
1563           }
1564         } else {
1565           SplatBits = Bits32;
1566           SplatUndef = Undef32;
1567           SplatSize = 4;
1568           return true;
1569         }
1570       }
1571     } else {
1572       SplatBits = Bits128[0];
1573       SplatUndef = Undef128[0];
1574       SplatSize = 8;
1575       return true;
1576     }
1577   }
1578
1579   return false;  // Can't be a splat if two pieces don't match.
1580 }
1581
1582 // If this is a case we can't handle, return null and let the default
1583 // expansion code take care of it.  If we CAN select this case, and if it
1584 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1585 // this case more efficiently than a constant pool load, lower it to the
1586 // sequence of ops that should be used.
1587 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1588   MVT VT = Op.getValueType();
1589   // If this is a vector of constants or undefs, get the bits.  A bit in
1590   // UndefBits is set if the corresponding element of the vector is an
1591   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1592   // zero.
1593   uint64_t VectorBits[2];
1594   uint64_t UndefBits[2];
1595   uint64_t SplatBits, SplatUndef;
1596   int SplatSize;
1597   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1598       || !isConstantSplat(VectorBits, UndefBits,
1599                           VT.getVectorElementType().getSizeInBits(),
1600                           SplatBits, SplatUndef, SplatSize))
1601     return SDValue();   // Not a constant vector, not a splat.
1602
1603   switch (VT.getSimpleVT()) {
1604   default:
1605   case MVT::v4f32: {
1606     uint32_t Value32 = SplatBits;
1607     assert(SplatSize == 4
1608            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610     SDValue T = DAG.getConstant(Value32, MVT::i32);
1611     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1613     break;
1614   }
1615   case MVT::v2f64: {
1616     uint64_t f64val = SplatBits;
1617     assert(SplatSize == 8
1618            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1619     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620     SDValue T = DAG.getConstant(f64val, MVT::i64);
1621     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1623     break;
1624   }
1625   case MVT::v16i8: {
1626    // 8-bit constants have to be expanded to 16-bits
1627    unsigned short Value16 = SplatBits | (SplatBits << 8);
1628    SDValue Ops[8];
1629    for (int i = 0; i < 8; ++i)
1630      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631    return DAG.getNode(ISD::BIT_CONVERT, VT,
1632                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1633   }
1634   case MVT::v8i16: {
1635     unsigned short Value16;
1636     if (SplatSize == 2)
1637       Value16 = (unsigned short) (SplatBits & 0xffff);
1638     else
1639       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1641     SDValue Ops[8];
1642     for (int i = 0; i < 8; ++i) Ops[i] = T;
1643     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1644   }
1645   case MVT::v4i32: {
1646     unsigned int Value = SplatBits;
1647     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1648     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1649   }
1650   case MVT::v2i64: {
1651     uint64_t val = SplatBits;
1652     uint32_t upper = uint32_t(val >> 32);
1653     uint32_t lower = uint32_t(val);
1654
1655     if (upper == lower) {
1656       // Magic constant that can be matched by IL, ILA, et. al.
1657       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1658       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1659     } else {
1660       SDValue LO32;
1661       SDValue HI32;
1662       SmallVector<SDValue, 16> ShufBytes;
1663       SDValue Result;
1664       bool upper_special, lower_special;
1665
1666       // NOTE: This code creates common-case shuffle masks that can be easily
1667       // detected as common expressions. It is not attempting to create highly
1668       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1669
1670       // Detect if the upper or lower half is a special shuffle mask pattern:
1671       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1672       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1673
1674       // Create lower vector if not a special pattern
1675       if (!lower_special) {
1676         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1677         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679                                        LO32C, LO32C, LO32C, LO32C));
1680       }
1681
1682       // Create upper vector if not a special pattern
1683       if (!upper_special) {
1684         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1685         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1686                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1687                                        HI32C, HI32C, HI32C, HI32C));
1688       }
1689
1690       // If either upper or lower are special, then the two input operands are
1691       // the same (basically, one of them is a "don't care")
1692       if (lower_special)
1693         LO32 = HI32;
1694       if (upper_special)
1695         HI32 = LO32;
1696       if (lower_special && upper_special) {
1697         // Unhappy situation... both upper and lower are special, so punt with
1698         // a target constant:
1699         SDValue Zero = DAG.getConstant(0, MVT::i32);
1700         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1701                                   Zero, Zero);
1702       }
1703
1704       for (int i = 0; i < 4; ++i) {
1705         uint64_t val = 0;
1706         for (int j = 0; j < 4; ++j) {
1707           SDValue V;
1708           bool process_upper, process_lower;
1709           val <<= 8;
1710           process_upper = (upper_special && (i & 1) == 0);
1711           process_lower = (lower_special && (i & 1) == 1);
1712
1713           if (process_upper || process_lower) {
1714             if ((process_upper && upper == 0)
1715                 || (process_lower && lower == 0))
1716               val |= 0x80;
1717             else if ((process_upper && upper == 0xffffffff)
1718                      || (process_lower && lower == 0xffffffff))
1719               val |= 0xc0;
1720             else if ((process_upper && upper == 0x80000000)
1721                      || (process_lower && lower == 0x80000000))
1722               val |= (j == 0 ? 0xe0 : 0x80);
1723           } else
1724             val |= i * 4 + j + ((i & 1) * 16);
1725         }
1726
1727         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1728       }
1729
1730       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1731                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1732                                      &ShufBytes[0], ShufBytes.size()));
1733     }
1734   }
1735   }
1736
1737   return SDValue();
1738 }
1739
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1747 /// \note
1748 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1751 /// \note
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1754   SDValue V1 = Op.getOperand(0);
1755   SDValue V2 = Op.getOperand(1);
1756   SDValue PermMask = Op.getOperand(2);
1757
1758   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1759
1760   // If we have a single element being moved from V1 to V2, this can be handled
1761   // using the C*[DX] compute mask instructions, but the vector elements have
1762   // to be monotonically increasing with one exception element.
1763   MVT EltVT = V1.getValueType().getVectorElementType();
1764   unsigned EltsFromV2 = 0;
1765   unsigned V2Elt = 0;
1766   unsigned V2EltIdx0 = 0;
1767   unsigned CurrElt = 0;
1768   bool monotonic = true;
1769   if (EltVT == MVT::i8)
1770     V2EltIdx0 = 16;
1771   else if (EltVT == MVT::i16)
1772     V2EltIdx0 = 8;
1773   else if (EltVT == MVT::i32)
1774     V2EltIdx0 = 4;
1775   else
1776     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1777
1778   for (unsigned i = 0, e = PermMask.getNumOperands();
1779        EltsFromV2 <= 1 && monotonic && i != e;
1780        ++i) {
1781     unsigned SrcElt;
1782     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1783       SrcElt = 0;
1784     else
1785       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1786
1787     if (SrcElt >= V2EltIdx0) {
1788       ++EltsFromV2;
1789       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790     } else if (CurrElt != SrcElt) {
1791       monotonic = false;
1792     }
1793
1794     ++CurrElt;
1795   }
1796
1797   if (EltsFromV2 == 1 && monotonic) {
1798     // Compute mask and shuffle
1799     MachineFunction &MF = DAG.getMachineFunction();
1800     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1801     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1802     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1803     // Initialize temporary register to 0
1804     SDValue InitTempReg =
1805       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1806     // Copy register's contents as index in INSERT_MASK:
1807     SDValue ShufMaskOp =
1808       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1809                   DAG.getTargetConstant(V2Elt, MVT::i32),
1810                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1811     // Use shuffle mask in SHUFB synthetic instruction:
1812     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1813   } else {
1814    // Convert the SHUFFLE_VECTOR mask's input element units to the
1815    // actual bytes.
1816     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1817
1818     SmallVector<SDValue, 16> ResultMask;
1819     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1820       unsigned SrcElt;
1821       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1822         SrcElt = 0;
1823       else
1824         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1825
1826       for (unsigned j = 0; j < BytesPerElement; ++j) {
1827         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1828                                              MVT::i8));
1829       }
1830     }
1831
1832     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1833                                       &ResultMask[0], ResultMask.size());
1834     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1835   }
1836 }
1837
1838 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1839   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1840
1841   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1842     // For a constant, build the appropriate constant vector, which will
1843     // eventually simplify to a vector register load.
1844
1845     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1846     SmallVector<SDValue, 16> ConstVecValues;
1847     MVT VT;
1848     size_t n_copies;
1849
1850     // Create a constant vector:
1851     switch (Op.getValueType().getSimpleVT()) {
1852     default: assert(0 && "Unexpected constant value type in "
1853                          "LowerSCALAR_TO_VECTOR");
1854     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1855     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1856     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1857     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1858     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1859     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1860     }
1861
1862     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1863     for (size_t j = 0; j < n_copies; ++j)
1864       ConstVecValues.push_back(CValue);
1865
1866     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1867                        &ConstVecValues[0], ConstVecValues.size());
1868   } else {
1869     // Otherwise, copy the value from one register to another:
1870     switch (Op0.getValueType().getSimpleVT()) {
1871     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1872     case MVT::i8:
1873     case MVT::i16:
1874     case MVT::i32:
1875     case MVT::i64:
1876     case MVT::f32:
1877     case MVT::f64:
1878       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1879     }
1880   }
1881
1882   return SDValue();
1883 }
1884
1885 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1886   switch (Op.getValueType().getSimpleVT()) {
1887   default:
1888     cerr << "CellSPU: Unknown vector multiplication, got "
1889          << Op.getValueType().getMVTString()
1890          << "\n";
1891     abort();
1892     /*NOTREACHED*/
1893
1894   case MVT::v4i32: {
1895     SDValue rA = Op.getOperand(0);
1896     SDValue rB = Op.getOperand(1);
1897     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1898     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1899     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1900     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1901
1902     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1903     break;
1904   }
1905
1906   // Multiply two v8i16 vectors (pipeline friendly version):
1907   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1908   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1909   // c) Use SELB to select upper and lower halves from the intermediate results
1910   //
1911   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1912   // dual-issue. This code does manage to do this, even if it's a little on
1913   // the wacky side
1914   case MVT::v8i16: {
1915     MachineFunction &MF = DAG.getMachineFunction();
1916     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1917     SDValue Chain = Op.getOperand(0);
1918     SDValue rA = Op.getOperand(0);
1919     SDValue rB = Op.getOperand(1);
1920     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1921     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1922
1923     SDValue FSMBOp =
1924       DAG.getCopyToReg(Chain, FSMBIreg,
1925                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1926                                    DAG.getConstant(0xcccc, MVT::i16)));
1927
1928     SDValue HHProd =
1929       DAG.getCopyToReg(FSMBOp, HiProdReg,
1930                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1931
1932     SDValue HHProd_v4i32 =
1933       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1934                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1935
1936     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1937                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1938                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1939                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1940                                                HHProd_v4i32,
1941                                                DAG.getConstant(16, MVT::i16))),
1942                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1943   }
1944
1945   // This M00sE is N@stI! (apologies to Monty Python)
1946   //
1947   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1948   // is to break it all apart, sign extend, and reassemble the various
1949   // intermediate products.
1950   case MVT::v16i8: {
1951     SDValue rA = Op.getOperand(0);
1952     SDValue rB = Op.getOperand(1);
1953     SDValue c8 = DAG.getConstant(8, MVT::i32);
1954     SDValue c16 = DAG.getConstant(16, MVT::i32);
1955
1956     SDValue LLProd =
1957       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1958                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1959                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1960
1961     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1962
1963     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1964
1965     SDValue LHProd =
1966       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1967                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1968
1969     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1970                                      DAG.getConstant(0x2222, MVT::i16));
1971
1972     SDValue LoProdParts =
1973       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1974                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1975                               LLProd, LHProd, FSMBmask));
1976
1977     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1978
1979     SDValue LoProd =
1980       DAG.getNode(ISD::AND, MVT::v4i32,
1981                   LoProdParts,
1982                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1983                               LoProdMask, LoProdMask,
1984                               LoProdMask, LoProdMask));
1985
1986     SDValue rAH =
1987       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1988                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1989
1990     SDValue rBH =
1991       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1993
1994     SDValue HLProd =
1995       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1997                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1998
1999     SDValue HHProd_1 =
2000       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2001                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2002                               DAG.getNode(SPUISD::VEC_SRA,
2003                                           MVT::v4i32, rAH, c8)),
2004                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005                               DAG.getNode(SPUISD::VEC_SRA,
2006                                           MVT::v4i32, rBH, c8)));
2007
2008     SDValue HHProd =
2009       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2010                   HLProd,
2011                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2012                   FSMBmask);
2013
2014     SDValue HiProd =
2015       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2016
2017     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2018                        DAG.getNode(ISD::OR, MVT::v4i32,
2019                                    LoProd, HiProd));
2020   }
2021   }
2022
2023   return SDValue();
2024 }
2025
2026 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2027   MachineFunction &MF = DAG.getMachineFunction();
2028   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2029
2030   SDValue A = Op.getOperand(0);
2031   SDValue B = Op.getOperand(1);
2032   MVT VT = Op.getValueType();
2033
2034   unsigned VRegBR, VRegC;
2035
2036   if (VT == MVT::f32) {
2037     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2038     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2039   } else {
2040     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2041     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2042   }
2043   // TODO: make sure we're feeding FPInterp the right arguments
2044   // Right now: fi B, frest(B)
2045
2046   // Computes BRcpl =
2047   // (Floating Interpolate (FP Reciprocal Estimate B))
2048   SDValue BRcpl =
2049       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050                        DAG.getNode(SPUISD::FPInterp, VT, B,
2051                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2052
2053   // Computes A * BRcpl and stores in a temporary register
2054   SDValue AxBRcpl =
2055       DAG.getCopyToReg(BRcpl, VRegC,
2056                  DAG.getNode(ISD::FMUL, VT, A,
2057                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058   // What's the Chain variable do? It's magic!
2059   // TODO: set Chain = Op(0).getEntryNode()
2060
2061   return DAG.getNode(ISD::FADD, VT,
2062                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063                 DAG.getNode(ISD::FMUL, VT,
2064                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065                         DAG.getNode(ISD::FSUB, VT, A,
2066                             DAG.getNode(ISD::FMUL, VT, B,
2067                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2068 }
2069
2070 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071   MVT VT = Op.getValueType();
2072   SDValue N = Op.getOperand(0);
2073   SDValue Elt = Op.getOperand(1);
2074   SDValue ShufMask[16];
2075   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2076
2077   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2078
2079   int EltNo = (int) C->getZExtValue();
2080
2081   // sanity checks:
2082   if (VT == MVT::i8 && EltNo >= 16)
2083     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2084   else if (VT == MVT::i16 && EltNo >= 8)
2085     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2086   else if (VT == MVT::i32 && EltNo >= 4)
2087     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2088   else if (VT == MVT::i64 && EltNo >= 2)
2089     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2090
2091   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2092     // i32 and i64: Element 0 is the preferred slot
2093     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2094   }
2095
2096   // Need to generate shuffle mask and extract:
2097   int prefslot_begin = -1, prefslot_end = -1;
2098   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2099
2100   switch (VT.getSimpleVT()) {
2101   default:
2102     assert(false && "Invalid value type!");
2103   case MVT::i8: {
2104     prefslot_begin = prefslot_end = 3;
2105     break;
2106   }
2107   case MVT::i16: {
2108     prefslot_begin = 2; prefslot_end = 3;
2109     break;
2110   }
2111   case MVT::i32:
2112   case MVT::f32: {
2113     prefslot_begin = 0; prefslot_end = 3;
2114     break;
2115   }
2116   case MVT::i64:
2117   case MVT::f64: {
2118     prefslot_begin = 0; prefslot_end = 7;
2119     break;
2120   }
2121   }
2122
2123   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2124          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2125
2126   for (int i = 0; i < 16; ++i) {
2127     // zero fill uppper part of preferred slot, don't care about the
2128     // other slots:
2129     unsigned int mask_val;
2130
2131     if (i <= prefslot_end) {
2132       mask_val =
2133         ((i < prefslot_begin)
2134          ? 0x80
2135          : elt_byte + (i - prefslot_begin));
2136
2137       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2138     } else
2139       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2140   }
2141
2142   SDValue ShufMaskVec =
2143     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2144                 &ShufMask[0],
2145                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2146
2147   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2148                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2149                                  N, N, ShufMaskVec));
2150
2151 }
2152
2153 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2154   SDValue VecOp = Op.getOperand(0);
2155   SDValue ValOp = Op.getOperand(1);
2156   SDValue IdxOp = Op.getOperand(2);
2157   MVT VT = Op.getValueType();
2158
2159   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2160   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2161
2162   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2163   // Use $2 because it's always 16-byte aligned and it's available:
2164   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2165
2166   SDValue result =
2167     DAG.getNode(SPUISD::SHUFB, VT,
2168                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2169                 VecOp,
2170                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2171                             DAG.getNode(ISD::ADD, PtrVT,
2172                                         PtrBase,
2173                                         DAG.getConstant(CN->getZExtValue(),
2174                                                         PtrVT))));
2175
2176   return result;
2177 }
2178
2179 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2180 {
2181   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2182
2183   assert(Op.getValueType() == MVT::i8);
2184   switch (Opc) {
2185   default:
2186     assert(0 && "Unhandled i8 math operator");
2187     /*NOTREACHED*/
2188     break;
2189   case ISD::SUB: {
2190     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2191     // the result:
2192     SDValue N1 = Op.getOperand(1);
2193     N0 = (N0.getOpcode() != ISD::Constant
2194           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2196                             MVT::i16));
2197     N1 = (N1.getOpcode() != ISD::Constant
2198           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2199           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2200                             MVT::i16));
2201     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202                        DAG.getNode(Opc, MVT::i16, N0, N1));
2203   }
2204   case ISD::ROTR:
2205   case ISD::ROTL: {
2206     SDValue N1 = Op.getOperand(1);
2207     unsigned N1Opc;
2208     N0 = (N0.getOpcode() != ISD::Constant
2209           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2211                             MVT::i16));
2212     N1Opc = N1.getValueType().bitsLT(MVT::i32)
2213             ? ISD::ZERO_EXTEND
2214             : ISD::TRUNCATE;
2215     N1 = (N1.getOpcode() != ISD::Constant
2216           ? DAG.getNode(N1Opc, MVT::i32, N1)
2217           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2218                             MVT::i32));
2219     SDValue ExpandArg =
2220       DAG.getNode(ISD::OR, MVT::i16, N0,
2221                   DAG.getNode(ISD::SHL, MVT::i16,
2222                               N0, DAG.getConstant(8, MVT::i32)));
2223     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2225   }
2226   case ISD::SRL:
2227   case ISD::SHL: {
2228     SDValue N1 = Op.getOperand(1);
2229     unsigned N1Opc;
2230     N0 = (N0.getOpcode() != ISD::Constant
2231           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2232           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2233                             MVT::i16));
2234     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2235             ? ISD::ZERO_EXTEND
2236             : ISD::TRUNCATE;
2237     N1 = (N1.getOpcode() != ISD::Constant
2238           ? DAG.getNode(N1Opc, MVT::i16, N1)
2239           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2240                             MVT::i16));
2241     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2242                        DAG.getNode(Opc, MVT::i16, N0, N1));
2243   }
2244   case ISD::SRA: {
2245     SDValue N1 = Op.getOperand(1);
2246     unsigned N1Opc;
2247     N0 = (N0.getOpcode() != ISD::Constant
2248           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2249           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2250                             MVT::i16));
2251     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2252             ? ISD::SIGN_EXTEND
2253             : ISD::TRUNCATE;
2254     N1 = (N1.getOpcode() != ISD::Constant
2255           ? DAG.getNode(N1Opc, MVT::i16, N1)
2256           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2257                             MVT::i16));
2258     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2259                        DAG.getNode(Opc, MVT::i16, N0, N1));
2260   }
2261   case ISD::MUL: {
2262     SDValue N1 = Op.getOperand(1);
2263     unsigned N1Opc;
2264     N0 = (N0.getOpcode() != ISD::Constant
2265           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2266           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2267                             MVT::i16));
2268     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2269     N1 = (N1.getOpcode() != ISD::Constant
2270           ? DAG.getNode(N1Opc, MVT::i16, N1)
2271           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2272                             MVT::i16));
2273     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2274                        DAG.getNode(Opc, MVT::i16, N0, N1));
2275     break;
2276   }
2277   }
2278
2279   return SDValue();
2280 }
2281
2282 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2283 {
2284   MVT VT = Op.getValueType();
2285   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2286
2287   SDValue Op0 = Op.getOperand(0);
2288
2289   switch (Opc) {
2290   case ISD::ZERO_EXTEND:
2291   case ISD::SIGN_EXTEND:
2292   case ISD::ANY_EXTEND: {
2293     MVT Op0VT = Op0.getValueType();
2294     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2295
2296     assert(Op0VT == MVT::i32
2297            && "CellSPU: Zero/sign extending something other than i32");
2298     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2299
2300     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2301                       ? SPUISD::ROTBYTES_RIGHT_S
2302                       : SPUISD::ROTQUAD_RZ_BYTES);
2303     SDValue PromoteScalar =
2304       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2305
2306     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2307                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2308                                    DAG.getNode(NewOpc, Op0VecVT,
2309                                                PromoteScalar,
2310                                                DAG.getConstant(4, MVT::i32))));
2311   }
2312
2313   case ISD::ADD: {
2314     // Turn operands into vectors to satisfy type checking (shufb works on
2315     // vectors)
2316     SDValue Op0 =
2317       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2318     SDValue Op1 =
2319       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2320     SmallVector<SDValue, 16> ShufBytes;
2321
2322     // Create the shuffle mask for "rotating" the borrow up one register slot
2323     // once the borrow is generated.
2324     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2325     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2326     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2327     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2328
2329     SDValue CarryGen =
2330       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2331     SDValue ShiftedCarry =
2332       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2333                   CarryGen, CarryGen,
2334                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2335                               &ShufBytes[0], ShufBytes.size()));
2336
2337     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2338                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2339                                    Op0, Op1, ShiftedCarry));
2340   }
2341
2342   case ISD::SUB: {
2343     // Turn operands into vectors to satisfy type checking (shufb works on
2344     // vectors)
2345     SDValue Op0 =
2346       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2347     SDValue Op1 =
2348       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2349     SmallVector<SDValue, 16> ShufBytes;
2350
2351     // Create the shuffle mask for "rotating" the borrow up one register slot
2352     // once the borrow is generated.
2353     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2354     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2355     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2356     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2357
2358     SDValue BorrowGen =
2359       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2360     SDValue ShiftedBorrow =
2361       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2362                   BorrowGen, BorrowGen,
2363                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2364                               &ShufBytes[0], ShufBytes.size()));
2365
2366     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2367                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2368                                    Op0, Op1, ShiftedBorrow));
2369   }
2370
2371   case ISD::SHL: {
2372     SDValue ShiftAmt = Op.getOperand(1);
2373     MVT ShiftAmtVT = ShiftAmt.getValueType();
2374     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2375     SDValue MaskLower =
2376       DAG.getNode(SPUISD::SELB, VecVT,
2377                   Op0Vec,
2378                   DAG.getConstant(0, VecVT),
2379                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2380                               DAG.getConstant(0xff00ULL, MVT::i16)));
2381     SDValue ShiftAmtBytes =
2382       DAG.getNode(ISD::SRL, ShiftAmtVT,
2383                   ShiftAmt,
2384                   DAG.getConstant(3, ShiftAmtVT));
2385     SDValue ShiftAmtBits =
2386       DAG.getNode(ISD::AND, ShiftAmtVT,
2387                   ShiftAmt,
2388                   DAG.getConstant(7, ShiftAmtVT));
2389
2390     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2391                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2392                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2393                                                MaskLower, ShiftAmtBytes),
2394                                    ShiftAmtBits));
2395   }
2396
2397   case ISD::SRL: {
2398     MVT VT = Op.getValueType();
2399     SDValue ShiftAmt = Op.getOperand(1);
2400     MVT ShiftAmtVT = ShiftAmt.getValueType();
2401     SDValue ShiftAmtBytes =
2402       DAG.getNode(ISD::SRL, ShiftAmtVT,
2403                   ShiftAmt,
2404                   DAG.getConstant(3, ShiftAmtVT));
2405     SDValue ShiftAmtBits =
2406       DAG.getNode(ISD::AND, ShiftAmtVT,
2407                   ShiftAmt,
2408                   DAG.getConstant(7, ShiftAmtVT));
2409
2410     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2411                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2412                                    Op0, ShiftAmtBytes),
2413                        ShiftAmtBits);
2414   }
2415
2416   case ISD::SRA: {
2417     // Promote Op0 to vector
2418     SDValue Op0 =
2419       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2420     SDValue ShiftAmt = Op.getOperand(1);
2421     MVT ShiftVT = ShiftAmt.getValueType();
2422
2423     // Negate variable shift amounts
2424     if (!isa<ConstantSDNode>(ShiftAmt)) {
2425       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2426                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2427     }
2428
2429     SDValue UpperHalfSign =
2430       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2431                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2432                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2433                                           Op0, DAG.getConstant(31, MVT::i32))));
2434     SDValue UpperHalfSignMask =
2435       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2436     SDValue UpperLowerMask =
2437       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2438                   DAG.getConstant(0xff00, MVT::i16));
2439     SDValue UpperLowerSelect =
2440       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2441                   UpperHalfSignMask, Op0, UpperLowerMask);
2442     SDValue RotateLeftBytes =
2443       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2444                   UpperLowerSelect, ShiftAmt);
2445     SDValue RotateLeftBits =
2446       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2447                   RotateLeftBytes, ShiftAmt);
2448
2449     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2450                        RotateLeftBits);
2451   }
2452   }
2453
2454   return SDValue();
2455 }
2456
2457 //! Lower byte immediate operations for v16i8 vectors:
2458 static SDValue
2459 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2460   SDValue ConstVec;
2461   SDValue Arg;
2462   MVT VT = Op.getValueType();
2463
2464   ConstVec = Op.getOperand(0);
2465   Arg = Op.getOperand(1);
2466   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2467     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2468       ConstVec = ConstVec.getOperand(0);
2469     } else {
2470       ConstVec = Op.getOperand(1);
2471       Arg = Op.getOperand(0);
2472       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2473         ConstVec = ConstVec.getOperand(0);
2474       }
2475     }
2476   }
2477
2478   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2479     uint64_t VectorBits[2];
2480     uint64_t UndefBits[2];
2481     uint64_t SplatBits, SplatUndef;
2482     int SplatSize;
2483
2484     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2485         && isConstantSplat(VectorBits, UndefBits,
2486                            VT.getVectorElementType().getSizeInBits(),
2487                            SplatBits, SplatUndef, SplatSize)) {
2488       SDValue tcVec[16];
2489       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2490       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2491
2492       // Turn the BUILD_VECTOR into a set of target constants:
2493       for (size_t i = 0; i < tcVecSize; ++i)
2494         tcVec[i] = tc;
2495
2496       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2497                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2498     }
2499   }
2500   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2501   // lowered.  Return the operation, rather than a null SDValue.
2502   return Op;
2503 }
2504
2505 //! Lower i32 multiplication
2506 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2507                           unsigned Opc) {
2508   switch (VT.getSimpleVT()) {
2509   default:
2510     cerr << "CellSPU: Unknown LowerMUL value type, got "
2511          << Op.getValueType().getMVTString()
2512          << "\n";
2513     abort();
2514     /*NOTREACHED*/
2515
2516   case MVT::i32: {
2517     SDValue rA = Op.getOperand(0);
2518     SDValue rB = Op.getOperand(1);
2519
2520     return DAG.getNode(ISD::ADD, MVT::i32,
2521                        DAG.getNode(ISD::ADD, MVT::i32,
2522                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2523                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2524                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2525   }
2526   }
2527
2528   return SDValue();
2529 }
2530
2531 //! Custom lowering for CTPOP (count population)
2532 /*!
2533   Custom lowering code that counts the number ones in the input
2534   operand. SPU has such an instruction, but it counts the number of
2535   ones per byte, which then have to be accumulated.
2536 */
2537 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2538   MVT VT = Op.getValueType();
2539   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2540
2541   switch (VT.getSimpleVT()) {
2542   default:
2543     assert(false && "Invalid value type!");
2544   case MVT::i8: {
2545     SDValue N = Op.getOperand(0);
2546     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2547
2548     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2549     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2550
2551     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2552   }
2553
2554   case MVT::i16: {
2555     MachineFunction &MF = DAG.getMachineFunction();
2556     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2557
2558     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2559
2560     SDValue N = Op.getOperand(0);
2561     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2562     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2563     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2564
2565     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2566     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2567
2568     // CNTB_result becomes the chain to which all of the virtual registers
2569     // CNTB_reg, SUM1_reg become associated:
2570     SDValue CNTB_result =
2571       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2572
2573     SDValue CNTB_rescopy =
2574       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2575
2576     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2577
2578     return DAG.getNode(ISD::AND, MVT::i16,
2579                        DAG.getNode(ISD::ADD, MVT::i16,
2580                                    DAG.getNode(ISD::SRL, MVT::i16,
2581                                                Tmp1, Shift1),
2582                                    Tmp1),
2583                        Mask0);
2584   }
2585
2586   case MVT::i32: {
2587     MachineFunction &MF = DAG.getMachineFunction();
2588     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2589
2590     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2591     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2592
2593     SDValue N = Op.getOperand(0);
2594     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2595     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2596     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2597     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2598
2599     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2600     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2601
2602     // CNTB_result becomes the chain to which all of the virtual registers
2603     // CNTB_reg, SUM1_reg become associated:
2604     SDValue CNTB_result =
2605       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2606
2607     SDValue CNTB_rescopy =
2608       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2609
2610     SDValue Comp1 =
2611       DAG.getNode(ISD::SRL, MVT::i32,
2612                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2613
2614     SDValue Sum1 =
2615       DAG.getNode(ISD::ADD, MVT::i32,
2616                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2617
2618     SDValue Sum1_rescopy =
2619       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2620
2621     SDValue Comp2 =
2622       DAG.getNode(ISD::SRL, MVT::i32,
2623                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2624                   Shift2);
2625     SDValue Sum2 =
2626       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2627                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2628
2629     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2630   }
2631
2632   case MVT::i64:
2633     break;
2634   }
2635
2636   return SDValue();
2637 }
2638
2639 /// LowerOperation - Provide custom lowering hooks for some operations.
2640 ///
2641 SDValue
2642 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2643 {
2644   unsigned Opc = (unsigned) Op.getOpcode();
2645   MVT VT = Op.getValueType();
2646
2647   switch (Opc) {
2648   default: {
2649     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2650     cerr << "Op.getOpcode() = " << Opc << "\n";
2651     cerr << "*Op.getNode():\n";
2652     Op.getNode()->dump();
2653     abort();
2654   }
2655   case ISD::LOAD:
2656   case ISD::SEXTLOAD:
2657   case ISD::ZEXTLOAD:
2658     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2659   case ISD::STORE:
2660     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2661   case ISD::ConstantPool:
2662     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2663   case ISD::GlobalAddress:
2664     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2665   case ISD::JumpTable:
2666     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2667   case ISD::Constant:
2668     return LowerConstant(Op, DAG);
2669   case ISD::ConstantFP:
2670     return LowerConstantFP(Op, DAG);
2671   case ISD::BRCOND:
2672     return LowerBRCOND(Op, DAG);
2673   case ISD::FORMAL_ARGUMENTS:
2674     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2675   case ISD::CALL:
2676     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2677   case ISD::RET:
2678     return LowerRET(Op, DAG, getTargetMachine());
2679
2680
2681   // i8, i64 math ops:
2682   case ISD::ZERO_EXTEND:
2683   case ISD::SIGN_EXTEND:
2684   case ISD::ANY_EXTEND:
2685   case ISD::ADD:
2686   case ISD::SUB:
2687   case ISD::ROTR:
2688   case ISD::ROTL:
2689   case ISD::SRL:
2690   case ISD::SHL:
2691   case ISD::SRA: {
2692     if (VT == MVT::i8)
2693       return LowerI8Math(Op, DAG, Opc);
2694     else if (VT == MVT::i64)
2695       return LowerI64Math(Op, DAG, Opc);
2696     break;
2697   }
2698
2699   // Vector-related lowering.
2700   case ISD::BUILD_VECTOR:
2701     return LowerBUILD_VECTOR(Op, DAG);
2702   case ISD::SCALAR_TO_VECTOR:
2703     return LowerSCALAR_TO_VECTOR(Op, DAG);
2704   case ISD::VECTOR_SHUFFLE:
2705     return LowerVECTOR_SHUFFLE(Op, DAG);
2706   case ISD::EXTRACT_VECTOR_ELT:
2707     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2708   case ISD::INSERT_VECTOR_ELT:
2709     return LowerINSERT_VECTOR_ELT(Op, DAG);
2710
2711   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2712   case ISD::AND:
2713   case ISD::OR:
2714   case ISD::XOR:
2715     return LowerByteImmed(Op, DAG);
2716
2717   // Vector and i8 multiply:
2718   case ISD::MUL:
2719     if (VT.isVector())
2720       return LowerVectorMUL(Op, DAG);
2721     else if (VT == MVT::i8)
2722       return LowerI8Math(Op, DAG, Opc);
2723     else
2724       return LowerMUL(Op, DAG, VT, Opc);
2725
2726   case ISD::FDIV:
2727     if (VT == MVT::f32 || VT == MVT::v4f32)
2728       return LowerFDIVf32(Op, DAG);
2729 //    else if (Op.getValueType() == MVT::f64)
2730 //      return LowerFDIVf64(Op, DAG);
2731     else
2732       assert(0 && "Calling FDIV on unsupported MVT");
2733
2734   case ISD::CTPOP:
2735     return LowerCTPOP(Op, DAG);
2736   }
2737
2738   return SDValue();
2739 }
2740
2741 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2742 {
2743 #if 0
2744   unsigned Opc = (unsigned) N->getOpcode();
2745   MVT OpVT = N->getValueType(0);
2746
2747   switch (Opc) {
2748   default: {
2749     cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2750     cerr << "Op.getOpcode() = " << Opc << "\n";
2751     cerr << "*Op.getNode():\n";
2752     N->dump();
2753     abort();
2754     /*NOTREACHED*/
2755   }
2756   }
2757 #endif
2758
2759   /* Otherwise, return unchanged */
2760   return 0;
2761 }
2762
2763 //===----------------------------------------------------------------------===//
2764 // Target Optimization Hooks
2765 //===----------------------------------------------------------------------===//
2766
2767 SDValue
2768 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2769 {
2770 #if 0
2771   TargetMachine &TM = getTargetMachine();
2772 #endif
2773   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2774   SelectionDAG &DAG = DCI.DAG;
2775   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2776   SDValue Result;                     // Initially, NULL result
2777
2778   switch (N->getOpcode()) {
2779   default: break;
2780   case ISD::ADD: {
2781     SDValue Op1 = N->getOperand(1);
2782
2783     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2784       SDValue Op01 = Op0.getOperand(1);
2785       if (Op01.getOpcode() == ISD::Constant
2786           || Op01.getOpcode() == ISD::TargetConstant) {
2787         // (add <const>, (SPUindirect <arg>, <const>)) ->
2788         // (SPUindirect <arg>, <const + const>)
2789         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2790         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2791         SDValue combinedConst =
2792           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2793                           Op0.getValueType());
2794
2795         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2796                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2797         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2798                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2799         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2800                            Op0.getOperand(0), combinedConst);
2801       }
2802     } else if (isa<ConstantSDNode>(Op0)
2803                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2804       SDValue Op11 = Op1.getOperand(1);
2805       if (Op11.getOpcode() == ISD::Constant
2806           || Op11.getOpcode() == ISD::TargetConstant) {
2807         // (add (SPUindirect <arg>, <const>), <const>) ->
2808         // (SPUindirect <arg>, <const + const>)
2809         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2810         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2811         SDValue combinedConst =
2812           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2813                           Op0.getValueType());
2814
2815         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2816                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2817         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2818                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2819
2820         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2821                            Op1.getOperand(0), combinedConst);
2822       }
2823     }
2824     break;
2825   }
2826   case ISD::SIGN_EXTEND:
2827   case ISD::ZERO_EXTEND:
2828   case ISD::ANY_EXTEND: {
2829     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2830         N->getValueType(0) == Op0.getValueType()) {
2831       // (any_extend (SPUextract_elt0 <arg>)) ->
2832       // (SPUextract_elt0 <arg>)
2833       // Types must match, however...
2834       DEBUG(cerr << "Replace: ");
2835       DEBUG(N->dump(&DAG));
2836       DEBUG(cerr << "\nWith:    ");
2837       DEBUG(Op0.getNode()->dump(&DAG));
2838       DEBUG(cerr << "\n");
2839
2840       return Op0;
2841     }
2842     break;
2843   }
2844   case SPUISD::IndirectAddr: {
2845     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2846       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2847       if (CN->getZExtValue() == 0) {
2848         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2849         // (SPUaform <addr>, 0)
2850
2851         DEBUG(cerr << "Replace: ");
2852         DEBUG(N->dump(&DAG));
2853         DEBUG(cerr << "\nWith:    ");
2854         DEBUG(Op0.getNode()->dump(&DAG));
2855         DEBUG(cerr << "\n");
2856
2857         return Op0;
2858       }
2859     }
2860     break;
2861   }
2862   case SPUISD::SHLQUAD_L_BITS:
2863   case SPUISD::SHLQUAD_L_BYTES:
2864   case SPUISD::VEC_SHL:
2865   case SPUISD::VEC_SRL:
2866   case SPUISD::VEC_SRA:
2867   case SPUISD::ROTQUAD_RZ_BYTES:
2868   case SPUISD::ROTQUAD_RZ_BITS: {
2869     SDValue Op1 = N->getOperand(1);
2870
2871     if (isa<ConstantSDNode>(Op1)) {
2872       // Kill degenerate vector shifts:
2873       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2874
2875       if (CN->getZExtValue() == 0) {
2876         Result = Op0;
2877       }
2878     }
2879     break;
2880   }
2881   case SPUISD::PROMOTE_SCALAR: {
2882     switch (Op0.getOpcode()) {
2883     default:
2884       break;
2885     case ISD::ANY_EXTEND:
2886     case ISD::ZERO_EXTEND:
2887     case ISD::SIGN_EXTEND: {
2888       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2889       // <arg>
2890       // but only if the SPUpromote_scalar and <arg> types match.
2891       SDValue Op00 = Op0.getOperand(0);
2892       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2893         SDValue Op000 = Op00.getOperand(0);
2894         if (Op000.getValueType() == N->getValueType(0)) {
2895           Result = Op000;
2896         }
2897       }
2898       break;
2899     }
2900     case SPUISD::EXTRACT_ELT0: {
2901       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2902       // <arg>
2903       Result = Op0.getOperand(0);
2904       break;
2905     }
2906     }
2907     break;
2908   }
2909   }
2910   // Otherwise, return unchanged.
2911 #if 1
2912   if (Result.getNode()) {
2913     DEBUG(cerr << "\nReplace.SPU: ");
2914     DEBUG(N->dump(&DAG));
2915     DEBUG(cerr << "\nWith:        ");
2916     DEBUG(Result.getNode()->dump(&DAG));
2917     DEBUG(cerr << "\n");
2918   }
2919 #endif
2920
2921   return Result;
2922 }
2923
2924 //===----------------------------------------------------------------------===//
2925 // Inline Assembly Support
2926 //===----------------------------------------------------------------------===//
2927
2928 /// getConstraintType - Given a constraint letter, return the type of
2929 /// constraint it is for this target.
2930 SPUTargetLowering::ConstraintType
2931 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2932   if (ConstraintLetter.size() == 1) {
2933     switch (ConstraintLetter[0]) {
2934     default: break;
2935     case 'b':
2936     case 'r':
2937     case 'f':
2938     case 'v':
2939     case 'y':
2940       return C_RegisterClass;
2941     }
2942   }
2943   return TargetLowering::getConstraintType(ConstraintLetter);
2944 }
2945
2946 std::pair<unsigned, const TargetRegisterClass*>
2947 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2948                                                 MVT VT) const
2949 {
2950   if (Constraint.size() == 1) {
2951     // GCC RS6000 Constraint Letters
2952     switch (Constraint[0]) {
2953     case 'b':   // R1-R31
2954     case 'r':   // R0-R31
2955       if (VT == MVT::i64)
2956         return std::make_pair(0U, SPU::R64CRegisterClass);
2957       return std::make_pair(0U, SPU::R32CRegisterClass);
2958     case 'f':
2959       if (VT == MVT::f32)
2960         return std::make_pair(0U, SPU::R32FPRegisterClass);
2961       else if (VT == MVT::f64)
2962         return std::make_pair(0U, SPU::R64FPRegisterClass);
2963       break;
2964     case 'v':
2965       return std::make_pair(0U, SPU::GPRCRegisterClass);
2966     }
2967   }
2968
2969   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2970 }
2971
2972 //! Compute used/known bits for a SPU operand
2973 void
2974 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2975                                                   const APInt &Mask,
2976                                                   APInt &KnownZero,
2977                                                   APInt &KnownOne,
2978                                                   const SelectionDAG &DAG,
2979                                                   unsigned Depth ) const {
2980 #if 0
2981   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2982 #endif
2983
2984   switch (Op.getOpcode()) {
2985   default:
2986     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2987     break;
2988
2989 #if 0
2990   case CALL:
2991   case SHUFB:
2992   case INSERT_MASK:
2993   case CNTB:
2994 #endif
2995
2996   case SPUISD::PROMOTE_SCALAR: {
2997     SDValue Op0 = Op.getOperand(0);
2998     MVT Op0VT = Op0.getValueType();
2999     unsigned Op0VTBits = Op0VT.getSizeInBits();
3000     uint64_t InMask = Op0VT.getIntegerVTBitMask();
3001     KnownZero |= APInt(Op0VTBits, ~InMask, false);
3002     KnownOne |= APInt(Op0VTBits, InMask, false);
3003     break;
3004   }
3005
3006   case SPUISD::LDRESULT:
3007   case SPUISD::EXTRACT_ELT0:
3008   case SPUISD::EXTRACT_ELT0_CHAINED: {
3009     MVT OpVT = Op.getValueType();
3010     unsigned OpVTBits = OpVT.getSizeInBits();
3011     uint64_t InMask = OpVT.getIntegerVTBitMask();
3012     KnownZero |= APInt(OpVTBits, ~InMask, false);
3013     KnownOne |= APInt(OpVTBits, InMask, false);
3014     break;
3015   }
3016
3017 #if 0
3018   case EXTRACT_I1_ZEXT:
3019   case EXTRACT_I1_SEXT:
3020   case EXTRACT_I8_ZEXT:
3021   case EXTRACT_I8_SEXT:
3022   case MPY:
3023   case MPYU:
3024   case MPYH:
3025   case MPYHH:
3026   case SPUISD::SHLQUAD_L_BITS:
3027   case SPUISD::SHLQUAD_L_BYTES:
3028   case SPUISD::VEC_SHL:
3029   case SPUISD::VEC_SRL:
3030   case SPUISD::VEC_SRA:
3031   case SPUISD::VEC_ROTL:
3032   case SPUISD::VEC_ROTR:
3033   case SPUISD::ROTQUAD_RZ_BYTES:
3034   case SPUISD::ROTQUAD_RZ_BITS:
3035   case SPUISD::ROTBYTES_RIGHT_S:
3036   case SPUISD::ROTBYTES_LEFT:
3037   case SPUISD::ROTBYTES_LEFT_CHAINED:
3038   case SPUISD::SELECT_MASK:
3039   case SPUISD::SELB:
3040   case SPUISD::FPInterp:
3041   case SPUISD::FPRecipEst:
3042   case SPUISD::SEXT32TO64:
3043 #endif
3044   }
3045 }
3046
3047 // LowerAsmOperandForConstraint
3048 void
3049 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3050                                                 char ConstraintLetter,
3051                                                 bool hasMemory,
3052                                                 std::vector<SDValue> &Ops,
3053                                                 SelectionDAG &DAG) const {
3054   // Default, for the time being, to the base class handler
3055   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3056                                                Ops, DAG);
3057 }
3058
3059 /// isLegalAddressImmediate - Return true if the integer value can be used
3060 /// as the offset of the target addressing mode.
3061 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3062                                                 const Type *Ty) const {
3063   // SPU's addresses are 256K:
3064   return (V > -(1 << 18) && V < (1 << 18) - 1);
3065 }
3066
3067 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3068   return false;
3069 }
3070
3071 bool
3072 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3073   // The SPU target isn't yet aware of offsets.
3074   return false;
3075 }