lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  20 #include "llvm/CodeGen/CallingConvLower.h"
  21 #include "llvm/CodeGen/MachineFrameInfo.h"
  22 #include "llvm/CodeGen/MachineFunction.h"
  23 #include "llvm/CodeGen/MachineInstrBuilder.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/SelectionDAG.h"
  26 #include "llvm/Constants.h"
  27 #include "llvm/Function.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/MathExtras.h"
  31 #include "llvm/Target/TargetOptions.h"
  32
  33 #include <map>
  34
  35 using namespace llvm;
  36
  37 // Used in getTargetNodeName() below
  38 namespace {
  39   std::map<unsigned, const char *> node_names;
  40
  41   //! MVT mapping to useful data for Cell SPU
  42   struct valtype_map_s {
  43     const MVT        valtype;
  44     const int                   prefslot_byte;
  45   };
  46
  47   const valtype_map_s valtype_map[] = {
  48     { MVT::i1,   3 },
  49     { MVT::i8,   3 },
  50     { MVT::i16,  2 },
  51     { MVT::i32,  0 },
  52     { MVT::f32,  0 },
  53     { MVT::i64,  0 },
  54     { MVT::f64,  0 },
  55     { MVT::i128, 0 }
  56   };
  57
  58   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  59
  60   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  61     const valtype_map_s *retval = 0;
  62
  63     for (size_t i = 0; i < n_valtype_map; ++i) {
  64       if (valtype_map[i].valtype == VT) {
  65         retval = valtype_map + i;
  66         break;
  67       }
  68     }
  69
  70 #ifndef NDEBUG
  71     if (retval == 0) {
  72       cerr << "getValueTypeMapEntry returns NULL for "
  73            << VT.getMVTString()
  74            << "\n";
  75       abort();
  76     }
  77 #endif
  78
  79     return retval;
  80   }
  81
  82   //! Predicate that returns true if operand is a memory target
  83   /*!
  84     \arg Op Operand to test
  85     \return true if the operand is a memory target (i.e., global
  86     address, external symbol, constant pool) or an A-form
  87     address.
  88    */
  89   bool isMemoryOperand(const SDOperand &Op)
  90   {
  91     const unsigned Opc = Op.getOpcode();
  92     return (Opc == ISD::GlobalAddress
  93             || Opc == ISD::GlobalTLSAddress
  94             || Opc == ISD::JumpTable
  95             || Opc == ISD::ConstantPool
  96             || Opc == ISD::ExternalSymbol
  97             || Opc == ISD::TargetGlobalAddress
  98             || Opc == ISD::TargetGlobalTLSAddress
  99             || Opc == ISD::TargetJumpTable
 100             || Opc == ISD::TargetConstantPool
 101             || Opc == ISD::TargetExternalSymbol
 102             || Opc == SPUISD::AFormAddr);
 103   }
 104
 105   //! Predicate that returns true if the operand is an indirect target
 106   bool isIndirectOperand(const SDOperand &Op)
 107   {
 108     const unsigned Opc = Op.getOpcode();
 109     return (Opc == ISD::Register
 110             || Opc == SPUISD::LDRESULT);
 111   }
 112 }
 113
 114 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 115   : TargetLowering(TM),
 116     SPUTM(TM)
 117 {
 118   // Fold away setcc operations if possible.
 119   setPow2DivIsCheap();
 120
 121   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 122   setUseUnderscoreSetJmp(true);
 123   setUseUnderscoreLongJmp(true);
 124
 125   // Set up the SPU's register classes:
 126   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 127   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 128   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 129   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 130   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 131   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 132   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 133
 134   // SPU has no sign or zero extended loads for i1, i8, i16:
 135   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 136   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 137   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 138   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 142   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 143
 144   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 145   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 146   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 151   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 152
 153   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 154   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 155   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 156
 157   // SPU constant load actions are custom lowered:
 158   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 159   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 160   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 161
 162   // SPU's loads and stores have to be custom lowered:
 163   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 164        ++sctype) {
 165     MVT VT = (MVT::SimpleValueType)sctype;
 166
 167     setOperationAction(ISD::LOAD, VT, Custom);
 168     setOperationAction(ISD::STORE, VT, Custom);
 169   }
 170
 171   // Custom lower BRCOND for i1, i8 to "promote" the result to
 172   // i32 and i16, respectively.
 173   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 174
 175   // Expand the jumptable branches
 176   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 177   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 178   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 179
 180   // SPU has no intrinsics for these particular operations:
 181   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 182
 183   // PowerPC has no SREM/UREM instructions
 184   setOperationAction(ISD::SREM, MVT::i32, Expand);
 185   setOperationAction(ISD::UREM, MVT::i32, Expand);
 186   setOperationAction(ISD::SREM, MVT::i64, Expand);
 187   setOperationAction(ISD::UREM, MVT::i64, Expand);
 188
 189   // We don't support sin/cos/sqrt/fmod
 190   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 191   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 192   setOperationAction(ISD::FREM , MVT::f64, Expand);
 193   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 194   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 195   setOperationAction(ISD::FREM , MVT::f32, Expand);
 196
 197   // If we're enabling GP optimizations, use hardware square root
 198   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 199   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 200
 201   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 202   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 203
 204   // SPU can do rotate right and left, so legalize it... but customize for i8
 205   // because instructions don't exist.
 206   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 207   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 208   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 209   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 210   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 211   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 212   // SPU has no native version of shift left/right for i8
 213   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 214   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 215   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 216   // And SPU needs custom lowering for shift left/right for i64
 217   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 218   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 219   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 220
 221   // Custom lower i32 multiplications
 222   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 223
 224   // Need to custom handle (some) common i8, i64 math ops
 225   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 226   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 227   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 228   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 229
 230   // SPU does not have BSWAP. It does have i32 support CTLZ.
 231   // CTPOP has to be custom lowered.
 232   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 233   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 234
 235   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 236   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 237   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 238   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 239
 240   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 241   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 242
 243   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 244
 245   // SPU has a version of select that implements (a&~c)|(b&c), just like
 246   // select ought to work:
 247   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 248   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 249   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 250   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 251   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 252
 253   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 254   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 255   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 256   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 257   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 258
 259   // Zero extension and sign extension for i64 have to be
 260   // custom legalized
 261   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 262   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 263   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 264
 265   // SPU has a legal FP -> signed INT instruction
 266   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 267   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 268   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 269   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 270
 271   // FDIV on SPU requires custom lowering
 272   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 273   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 274
 275   // SPU has [U|S]INT_TO_FP
 276   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 277   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 278   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 279   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 280   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 281   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 282   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 283   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 284
 285   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 286   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 287   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 288   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 289
 290   // We cannot sextinreg(i1).  Expand to shifts.
 291   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 292
 293   // Support label based line numbers.
 294   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 295   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 296
 297   // We want to legalize GlobalAddress and ConstantPool nodes into the
 298   // appropriate instructions to materialize the address.
 299   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 300        ++sctype) {
 301     MVT VT = (MVT::SimpleValueType)sctype;
 302
 303     setOperationAction(ISD::GlobalAddress, VT, Custom);
 304     setOperationAction(ISD::ConstantPool,  VT, Custom);
 305     setOperationAction(ISD::JumpTable,     VT, Custom);
 306   }
 307
 308   // RET must be custom lowered, to meet ABI requirements
 309   setOperationAction(ISD::RET,           MVT::Other, Custom);
 310
 311   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 312   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 313
 314   // Use the default implementation.
 315   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 316   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 317   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 318   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 319   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 320   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 321   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 322
 323   // Cell SPU has instructions for converting between i64 and fp.
 324   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 325   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 326
 327   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 328   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 329
 330   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 331   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 332
 333   // First set operation action for all vector types to expand. Then we
 334   // will selectively turn on ones that can be effectively codegen'd.
 335   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 336   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 337   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 338   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 339   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 340   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 341
 342   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 343        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 344     MVT VT = (MVT::SimpleValueType)i;
 345
 346     // add/sub are legal for all supported vector VT's.
 347     setOperationAction(ISD::ADD , VT, Legal);
 348     setOperationAction(ISD::SUB , VT, Legal);
 349     // mul has to be custom lowered.
 350     setOperationAction(ISD::MUL , VT, Custom);
 351
 352     setOperationAction(ISD::AND   , VT, Legal);
 353     setOperationAction(ISD::OR    , VT, Legal);
 354     setOperationAction(ISD::XOR   , VT, Legal);
 355     setOperationAction(ISD::LOAD  , VT, Legal);
 356     setOperationAction(ISD::SELECT, VT, Legal);
 357     setOperationAction(ISD::STORE,  VT, Legal);
 358
 359     // These operations need to be expanded:
 360     setOperationAction(ISD::SDIV, VT, Expand);
 361     setOperationAction(ISD::SREM, VT, Expand);
 362     setOperationAction(ISD::UDIV, VT, Expand);
 363     setOperationAction(ISD::UREM, VT, Expand);
 364     setOperationAction(ISD::FDIV, VT, Custom);
 365
 366     // Custom lower build_vector, constant pool spills, insert and
 367     // extract vector elements:
 368     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 369     setOperationAction(ISD::ConstantPool, VT, Custom);
 370     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 371     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 372     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 373     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 374   }
 375
 376   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 377   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 378   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 379   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 380   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 381
 382   setShiftAmountType(MVT::i32);
 383   setSetCCResultContents(ZeroOrOneSetCCResult);
 384
 385   setStackPointerRegisterToSaveRestore(SPU::R1);
 386
 387   // We have target-specific dag combine patterns for the following nodes:
 388   setTargetDAGCombine(ISD::ADD);
 389   setTargetDAGCombine(ISD::ZERO_EXTEND);
 390   setTargetDAGCombine(ISD::SIGN_EXTEND);
 391   setTargetDAGCombine(ISD::ANY_EXTEND);
 392
 393   computeRegisterProperties();
 394 }
 395
 396 const char *
 397 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 398 {
 399   if (node_names.empty()) {
 400     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 401     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 402     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 403     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 404     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 405     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 406     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 407     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 408     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 409     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 410     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 411     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 412     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 413     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 414     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 415     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 416     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 417     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 418     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 419     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 420     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 421     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 422     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 423     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 424     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 425     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 426     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 427     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 428     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 429     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 430       "SPUISD::ROTQUAD_RZ_BYTES";
 431     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 432       "SPUISD::ROTQUAD_RZ_BITS";
 433     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 434       "SPUISD::ROTBYTES_RIGHT_S";
 435     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 436     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 437       "SPUISD::ROTBYTES_LEFT_CHAINED";
 438     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 439       "SPUISD::ROTBYTES_LEFT_BITS";
 440     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 441     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 442     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 443     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 444     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 445     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 446     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 447     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 448     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 449   }
 450
 451   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 452
 453   return ((i != node_names.end()) ? i->second : 0);
 454 }
 455
 456 MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
 457   MVT VT = Op.getValueType();
 458   if (VT.isInteger())
 459     return VT;
 460   else
 461     return MVT::i32;
 462 }
 463
 464 //===----------------------------------------------------------------------===//
 465 // Calling convention code:
 466 //===----------------------------------------------------------------------===//
 467
 468 #include "SPUGenCallingConv.inc"
 469
 470 //===----------------------------------------------------------------------===//
 471 //  LowerOperation implementation
 472 //===----------------------------------------------------------------------===//
 473
 474 /// Aligned load common code for CellSPU
 475 /*!
 476   \param[in] Op The SelectionDAG load or store operand
 477   \param[in] DAG The selection DAG
 478   \param[in] ST CellSPU subtarget information structure
 479   \param[in,out] alignment Caller initializes this to the load or store node's
 480   value from getAlignment(), may be updated while generating the aligned load
 481   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 482   offset (divisible by 16, modulo 16 == 0)
 483   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 484   offset of the preferred slot (modulo 16 != 0)
 485   \param[in,out] VT Caller initializes this value type to the the load or store
 486   node's loaded or stored value type; may be updated if an i1-extended load or
 487   store.
 488   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 489   otherwise false. Can help to determine if the chunk needs to be rotated.
 490
 491  Both load and store lowering load a block of data aligned on a 16-byte
 492  boundary. This is the common aligned load code shared between both.
 493  */
 494 static SDOperand
 495 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 496             LSBaseSDNode *LSN,
 497             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 498             MVT &VT, bool &was16aligned)
 499 {
 500   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 501   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 502   SDOperand basePtr = LSN->getBasePtr();
 503   SDOperand chain = LSN->getChain();
 504
 505   if (basePtr.getOpcode() == ISD::ADD) {
 506     SDOperand Op1 = basePtr.Val->getOperand(1);
 507
 508     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 509       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 510
 511       alignOffs = (int) CN->getValue();
 512       prefSlotOffs = (int) (alignOffs & 0xf);
 513
 514       // Adjust the rotation amount to ensure that the final result ends up in
 515       // the preferred slot:
 516       prefSlotOffs -= vtm->prefslot_byte;
 517       basePtr = basePtr.getOperand(0);
 518
 519       // Loading from memory, can we adjust alignment?
 520       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 521         SDOperand APtr = basePtr.getOperand(0);
 522         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 523           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 524           alignment = GSDN->getGlobal()->getAlignment();
 525         }
 526       }
 527     } else {
 528       alignOffs = 0;
 529       prefSlotOffs = -vtm->prefslot_byte;
 530     }
 531   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 532     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 533     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 534     prefSlotOffs = (int) (alignOffs & 0xf);
 535     prefSlotOffs -= vtm->prefslot_byte;
 536     basePtr = DAG.getRegister(SPU::R1, VT);
 537   } else {
 538     alignOffs = 0;
 539     prefSlotOffs = -vtm->prefslot_byte;
 540   }
 541
 542   if (alignment == 16) {
 543     // Realign the base pointer as a D-Form address:
 544     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 545       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 546                             basePtr,
 547                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 548     }
 549
 550     // Emit the vector load:
 551     was16aligned = true;
 552     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 553                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 554                        LSN->isVolatile(), 16);
 555   }
 556
 557   // Unaligned load or we're using the "large memory" model, which means that
 558   // we have to be very pessimistic:
 559   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 560     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 561   }
 562
 563   // Add the offset
 564   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 565                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 566   was16aligned = false;
 567   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 568                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 569                      LSN->isVolatile(), 16);
 570 }
 571
 572 /// Custom lower loads for CellSPU
 573 /*!
 574  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 575  within a 16-byte block, we have to rotate to extract the requested element.
 576  */
 577 static SDOperand
 578 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 579   LoadSDNode *LN = cast<LoadSDNode>(Op);
 580   SDOperand the_chain = LN->getChain();
 581   MVT VT = LN->getMemoryVT();
 582   MVT OpVT = Op.Val->getValueType(0);
 583   ISD::LoadExtType ExtType = LN->getExtensionType();
 584   unsigned alignment = LN->getAlignment();
 585   SDOperand Ops[8];
 586
 587   switch (LN->getAddressingMode()) {
 588   case ISD::UNINDEXED: {
 589     int offset, rotamt;
 590     bool was16aligned;
 591     SDOperand result =
 592       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 593
 594     if (result.Val == 0)
 595       return result;
 596
 597     the_chain = result.getValue(1);
 598     // Rotate the chunk if necessary
 599     if (rotamt < 0)
 600       rotamt += 16;
 601     if (rotamt != 0 || !was16aligned) {
 602       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 603
 604       Ops[0] = the_chain;
 605       Ops[1] = result;
 606       if (was16aligned) {
 607         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 608       } else {
 609         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 610         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 611         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 612                              DAG.getConstant(rotamt, PtrVT));
 613       }
 614
 615       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 616       the_chain = result.getValue(1);
 617     }
 618
 619     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 620       SDVTList scalarvts;
 621       MVT vecVT = MVT::v16i8;
 622
 623       // Convert the loaded v16i8 vector to the appropriate vector type
 624       // specified by the operand:
 625       if (OpVT == VT) {
 626         if (VT != MVT::i1)
 627           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 628       } else
 629         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 630
 631       Ops[0] = the_chain;
 632       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 633       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 634       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 635       the_chain = result.getValue(1);
 636     } else {
 637       // Handle the sign and zero-extending loads for i1 and i8:
 638       unsigned NewOpC;
 639
 640       if (ExtType == ISD::SEXTLOAD) {
 641         NewOpC = (OpVT == MVT::i1
 642                   ? SPUISD::EXTRACT_I1_SEXT
 643                   : SPUISD::EXTRACT_I8_SEXT);
 644       } else {
 645         assert(ExtType == ISD::ZEXTLOAD);
 646         NewOpC = (OpVT == MVT::i1
 647                   ? SPUISD::EXTRACT_I1_ZEXT
 648                   : SPUISD::EXTRACT_I8_ZEXT);
 649       }
 650
 651       result = DAG.getNode(NewOpC, OpVT, result);
 652     }
 653
 654     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 655     SDOperand retops[2] = {
 656       result,
 657       the_chain
 658     };
 659
 660     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 661                          retops, sizeof(retops) / sizeof(retops[0]));
 662     return result;
 663   }
 664   case ISD::PRE_INC:
 665   case ISD::PRE_DEC:
 666   case ISD::POST_INC:
 667   case ISD::POST_DEC:
 668   case ISD::LAST_INDEXED_MODE:
 669     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 670             "UNINDEXED\n";
 671     cerr << (unsigned) LN->getAddressingMode() << "\n";
 672     abort();
 673     /*NOTREACHED*/
 674   }
 675
 676   return SDOperand();
 677 }
 678
 679 /// Custom lower stores for CellSPU
 680 /*!
 681  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 682  within a 16-byte block, we have to generate a shuffle to insert the
 683  requested element into its place, then store the resulting block.
 684  */
 685 static SDOperand
 686 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 687   StoreSDNode *SN = cast<StoreSDNode>(Op);
 688   SDOperand Value = SN->getValue();
 689   MVT VT = Value.getValueType();
 690   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 691   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 692   unsigned alignment = SN->getAlignment();
 693
 694   switch (SN->getAddressingMode()) {
 695   case ISD::UNINDEXED: {
 696     int chunk_offset, slot_offset;
 697     bool was16aligned;
 698
 699     // The vector type we really want to load from the 16-byte chunk, except
 700     // in the case of MVT::i1, which has to be v16i8.
 701     MVT vecVT, stVecVT = MVT::v16i8;
 702
 703     if (StVT != MVT::i1)
 704       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 705     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 706
 707     SDOperand alignLoadVec =
 708       AlignedLoad(Op, DAG, ST, SN, alignment,
 709                   chunk_offset, slot_offset, VT, was16aligned);
 710
 711     if (alignLoadVec.Val == 0)
 712       return alignLoadVec;
 713
 714     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 715     SDOperand basePtr = LN->getBasePtr();
 716     SDOperand the_chain = alignLoadVec.getValue(1);
 717     SDOperand theValue = SN->getValue();
 718     SDOperand result;
 719
 720     if (StVT != VT
 721         && (theValue.getOpcode() == ISD::AssertZext
 722             || theValue.getOpcode() == ISD::AssertSext)) {
 723       // Drill down and get the value for zero- and sign-extended
 724       // quantities
 725       theValue = theValue.getOperand(0);
 726     }
 727
 728     chunk_offset &= 0xf;
 729
 730     SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 731     SDOperand insertEltPtr;
 732     SDOperand insertEltOp;
 733
 734     // If the base pointer is already a D-form address, then just create
 735     // a new D-form address with a slot offset and the orignal base pointer.
 736     // Otherwise generate a D-form address with the slot offset relative
 737     // to the stack pointer, which is always aligned.
 738     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 739     DEBUG(basePtr.Val->dump(&DAG));
 740     DEBUG(cerr << "\n");
 741
 742     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 743         (basePtr.getOpcode() == ISD::ADD
 744          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 745       insertEltPtr = basePtr;
 746     } else {
 747       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 748     }
 749
 750     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 751     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 752                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 753                          alignLoadVec,
 754                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 755
 756     result = DAG.getStore(the_chain, result, basePtr,
 757                           LN->getSrcValue(), LN->getSrcValueOffset(),
 758                           LN->isVolatile(), LN->getAlignment());
 759
 760     return result;
 761     /*UNREACHED*/
 762   }
 763   case ISD::PRE_INC:
 764   case ISD::PRE_DEC:
 765   case ISD::POST_INC:
 766   case ISD::POST_DEC:
 767   case ISD::LAST_INDEXED_MODE:
 768     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 769             "UNINDEXED\n";
 770     cerr << (unsigned) SN->getAddressingMode() << "\n";
 771     abort();
 772     /*NOTREACHED*/
 773   }
 774
 775   return SDOperand();
 776 }
 777
 778 /// Generate the address of a constant pool entry.
 779 static SDOperand
 780 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 781   MVT PtrVT = Op.getValueType();
 782   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 783   Constant *C = CP->getConstVal();
 784   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 785   SDOperand Zero = DAG.getConstant(0, PtrVT);
 786   const TargetMachine &TM = DAG.getTarget();
 787
 788   if (TM.getRelocationModel() == Reloc::Static) {
 789     if (!ST->usingLargeMem()) {
 790       // Just return the SDOperand with the constant pool address in it.
 791       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 792     } else {
 793       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 794       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 795       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 796     }
 797   }
 798
 799   assert(0 &&
 800          "LowerConstantPool: Relocation model other than static not supported.");
 801   return SDOperand();
 802 }
 803
 804 static SDOperand
 805 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 806   MVT PtrVT = Op.getValueType();
 807   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 808   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 809   SDOperand Zero = DAG.getConstant(0, PtrVT);
 810   const TargetMachine &TM = DAG.getTarget();
 811
 812   if (TM.getRelocationModel() == Reloc::Static) {
 813     if (!ST->usingLargeMem()) {
 814       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 815     } else {
 816       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 817       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 818       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 819     }
 820   }
 821
 822   assert(0 &&
 823          "LowerJumpTable: Relocation model other than static not supported.");
 824   return SDOperand();
 825 }
 826
 827 static SDOperand
 828 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 829   MVT PtrVT = Op.getValueType();
 830   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 831   GlobalValue *GV = GSDN->getGlobal();
 832   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 833   const TargetMachine &TM = DAG.getTarget();
 834   SDOperand Zero = DAG.getConstant(0, PtrVT);
 835
 836   if (TM.getRelocationModel() == Reloc::Static) {
 837     if (!ST->usingLargeMem()) {
 838       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 839     } else {
 840       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 841       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 842       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 843     }
 844   } else {
 845     cerr << "LowerGlobalAddress: Relocation model other than static not "
 846          << "supported.\n";
 847     abort();
 848     /*NOTREACHED*/
 849   }
 850
 851   return SDOperand();
 852 }
 853
 854 //! Custom lower i64 integer constants
 855 /*!
 856  This code inserts all of the necessary juggling that needs to occur to load
 857  a 64-bit constant into a register.
 858  */
 859 static SDOperand
 860 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 861   MVT VT = Op.getValueType();
 862   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 863
 864   if (VT == MVT::i64) {
 865     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 866     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 867                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 868   } else {
 869     cerr << "LowerConstant: unhandled constant type "
 870          << VT.getMVTString()
 871          << "\n";
 872     abort();
 873     /*NOTREACHED*/
 874   }
 875
 876   return SDOperand();
 877 }
 878
 879 //! Custom lower double precision floating point constants
 880 static SDOperand
 881 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 882   MVT VT = Op.getValueType();
 883   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 884
 885   assert((FP != 0) &&
 886          "LowerConstantFP: Node is not ConstantFPSDNode");
 887
 888   if (VT == MVT::f64) {
 889     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 890     return DAG.getNode(ISD::BIT_CONVERT, VT,
 891                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 892   }
 893
 894   return SDOperand();
 895 }
 896
 897 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 898 static SDOperand
 899 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
 900 {
 901   SDOperand Cond = Op.getOperand(1);
 902   MVT CondVT = Cond.getValueType();
 903   MVT CondNVT;
 904
 905   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 906     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 907     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 908                       Op.getOperand(0),
 909                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 910                       Op.getOperand(2));
 911   } else
 912     return SDOperand();                // Unchanged
 913 }
 914
 915 static SDOperand
 916 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 917 {
 918   MachineFunction &MF = DAG.getMachineFunction();
 919   MachineFrameInfo *MFI = MF.getFrameInfo();
 920   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 921   SmallVector<SDOperand, 8> ArgValues;
 922   SDOperand Root = Op.getOperand(0);
 923   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 924
 925   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 926   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 927
 928   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 929   unsigned ArgRegIdx = 0;
 930   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 931
 932   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 933
 934   // Add DAG nodes to load the arguments or copy them out of registers.
 935   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 936     SDOperand ArgVal;
 937     bool needsLoad = false;
 938     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 939     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 940
 941     switch (ObjectVT.getSimpleVT()) {
 942     default: {
 943       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 944            << ObjectVT.getMVTString()
 945            << "\n";
 946       abort();
 947     }
 948     case MVT::i8:
 949       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 950         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 951         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 952         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 953         ++ArgRegIdx;
 954       } else {
 955         needsLoad = true;
 956       }
 957       break;
 958     case MVT::i16:
 959       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 960         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 961         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 962         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 963         ++ArgRegIdx;
 964       } else {
 965         needsLoad = true;
 966       }
 967       break;
 968     case MVT::i32:
 969       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 970         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 971         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 972         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 973         ++ArgRegIdx;
 974       } else {
 975         needsLoad = true;
 976       }
 977       break;
 978     case MVT::i64:
 979       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 980         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 981         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 982         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 983         ++ArgRegIdx;
 984       } else {
 985         needsLoad = true;
 986       }
 987       break;
 988     case MVT::f32:
 989       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 990         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 991         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 992         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 993         ++ArgRegIdx;
 994       } else {
 995         needsLoad = true;
 996       }
 997       break;
 998     case MVT::f64:
 999       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1001         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1003         ++ArgRegIdx;
1004       } else {
1005         needsLoad = true;
1006       }
1007       break;
1008     case MVT::v2f64:
1009     case MVT::v4f32:
1010     case MVT::v2i64:
1011     case MVT::v4i32:
1012     case MVT::v8i16:
1013     case MVT::v16i8:
1014       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1015         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1016         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1017         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1018         ++ArgRegIdx;
1019       } else {
1020         needsLoad = true;
1021       }
1022       break;
1023     }
1024
1025     // We need to load the argument to a virtual register if we determined above
1026     // that we ran out of physical registers of the appropriate type
1027     if (needsLoad) {
1028       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1029       SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1030       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1031       ArgOffset += StackSlotSize;
1032     }
1033
1034     ArgValues.push_back(ArgVal);
1035   }
1036
1037   // If the function takes variable number of arguments, make a frame index for
1038   // the start of the first vararg value... for expansion of llvm.va_start.
1039   if (isVarArg) {
1040     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1041                                                ArgOffset);
1042     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1043     // If this function is vararg, store any remaining integer argument regs to
1044     // their spots on the stack so that they may be loaded by deferencing the
1045     // result of va_next.
1046     SmallVector<SDOperand, 8> MemOps;
1047     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1048       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1049       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1050       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1051       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1052       MemOps.push_back(Store);
1053       // Increment the address by four for the next argument to store
1054       SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1055       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1056     }
1057     if (!MemOps.empty())
1058       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1059   }
1060
1061   ArgValues.push_back(Root);
1062
1063   // Return the new list of results.
1064   std::vector<MVT> RetVT(Op.Val->value_begin(),
1065                                     Op.Val->value_end());
1066   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1067 }
1068
1069 /// isLSAAddress - Return the immediate to use if the specified
1070 /// value is representable as a LSA address.
1071 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1072   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1073   if (!C) return 0;
1074
1075   int Addr = C->getValue();
1076   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1077       (Addr << 14 >> 14) != Addr)
1078     return 0;  // Top 14 bits have to be sext of immediate.
1079
1080   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1081 }
1082
1083 static
1084 SDOperand
1085 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1086   SDOperand Chain = Op.getOperand(0);
1087 #if 0
1088   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1089   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1090 #endif
1091   SDOperand Callee    = Op.getOperand(4);
1092   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1093   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1094   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1095   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1096
1097   // Handy pointer type
1098   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1099
1100   // Accumulate how many bytes are to be pushed on the stack, including the
1101   // linkage area, and parameter passing area.  According to the SPU ABI,
1102   // we minimally need space for [LR] and [SP]
1103   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1104
1105   // Set up a copy of the stack pointer for use loading and storing any
1106   // arguments that may not fit in the registers available for argument
1107   // passing.
1108   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1109
1110   // Figure out which arguments are going to go in registers, and which in
1111   // memory.
1112   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1113   unsigned ArgRegIdx = 0;
1114
1115   // Keep track of registers passing arguments
1116   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1117   // And the arguments passed on the stack
1118   SmallVector<SDOperand, 8> MemOpChains;
1119
1120   for (unsigned i = 0; i != NumOps; ++i) {
1121     SDOperand Arg = Op.getOperand(5+2*i);
1122
1123     // PtrOff will be used to store the current argument to the stack if a
1124     // register cannot be found for it.
1125     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1126     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1127
1128     switch (Arg.getValueType().getSimpleVT()) {
1129     default: assert(0 && "Unexpected ValueType for argument!");
1130     case MVT::i32:
1131     case MVT::i64:
1132     case MVT::i128:
1133       if (ArgRegIdx != NumArgRegs) {
1134         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135       } else {
1136         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137         ArgOffset += StackSlotSize;
1138       }
1139       break;
1140     case MVT::f32:
1141     case MVT::f64:
1142       if (ArgRegIdx != NumArgRegs) {
1143         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144       } else {
1145         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146         ArgOffset += StackSlotSize;
1147       }
1148       break;
1149     case MVT::v4f32:
1150     case MVT::v4i32:
1151     case MVT::v8i16:
1152     case MVT::v16i8:
1153       if (ArgRegIdx != NumArgRegs) {
1154         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1155       } else {
1156         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1157         ArgOffset += StackSlotSize;
1158       }
1159       break;
1160     }
1161   }
1162
1163   // Update number of stack bytes actually used, insert a call sequence start
1164   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1165   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1166
1167   if (!MemOpChains.empty()) {
1168     // Adjust the stack pointer for the stack arguments.
1169     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1170                         &MemOpChains[0], MemOpChains.size());
1171   }
1172
1173   // Build a sequence of copy-to-reg nodes chained together with token chain
1174   // and flag operands which copy the outgoing args into the appropriate regs.
1175   SDOperand InFlag;
1176   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1177     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1178                              InFlag);
1179     InFlag = Chain.getValue(1);
1180   }
1181
1182   std::vector<MVT> NodeTys;
1183   NodeTys.push_back(MVT::Other);   // Returns a chain
1184   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1185
1186   SmallVector<SDOperand, 8> Ops;
1187   unsigned CallOpc = SPUISD::CALL;
1188
1189   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1190   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1191   // node so that legalize doesn't hack it.
1192   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1193     GlobalValue *GV = G->getGlobal();
1194     MVT CalleeVT = Callee.getValueType();
1195     SDOperand Zero = DAG.getConstant(0, PtrVT);
1196     SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1197
1198     if (!ST->usingLargeMem()) {
1199       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1200       // style calls, otherwise, external symbols are BRASL calls. This assumes
1201       // that declared/defined symbols are in the same compilation unit and can
1202       // be reached through PC-relative jumps.
1203       //
1204       // NOTE:
1205       // This may be an unsafe assumption for JIT and really large compilation
1206       // units.
1207       if (GV->isDeclaration()) {
1208         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1209       } else {
1210         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1211       }
1212     } else {
1213       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1214       // address pairs:
1215       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1216     }
1217   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1218     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1219   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1220     // If this is an absolute destination address that appears to be a legal
1221     // local store address, use the munged value.
1222     Callee = SDOperand(Dest, 0);
1223   }
1224
1225   Ops.push_back(Chain);
1226   Ops.push_back(Callee);
1227
1228   // Add argument registers to the end of the list so that they are known live
1229   // into the call.
1230   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1231     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1232                                   RegsToPass[i].second.getValueType()));
1233
1234   if (InFlag.Val)
1235     Ops.push_back(InFlag);
1236   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1237   InFlag = Chain.getValue(1);
1238
1239   Chain = DAG.getCALLSEQ_END(Chain,
1240                              DAG.getConstant(NumStackBytes, PtrVT),
1241                              DAG.getConstant(0, PtrVT),
1242                              InFlag);
1243   if (Op.Val->getValueType(0) != MVT::Other)
1244     InFlag = Chain.getValue(1);
1245
1246   SDOperand ResultVals[3];
1247   unsigned NumResults = 0;
1248   NodeTys.clear();
1249
1250   // If the call has results, copy the values out of the ret val registers.
1251   switch (Op.Val->getValueType(0).getSimpleVT()) {
1252   default: assert(0 && "Unexpected ret value!");
1253   case MVT::Other: break;
1254   case MVT::i32:
1255     if (Op.Val->getValueType(1) == MVT::i32) {
1256       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1257       ResultVals[0] = Chain.getValue(0);
1258       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1259                                  Chain.getValue(2)).getValue(1);
1260       ResultVals[1] = Chain.getValue(0);
1261       NumResults = 2;
1262       NodeTys.push_back(MVT::i32);
1263     } else {
1264       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1265       ResultVals[0] = Chain.getValue(0);
1266       NumResults = 1;
1267     }
1268     NodeTys.push_back(MVT::i32);
1269     break;
1270   case MVT::i64:
1271     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1272     ResultVals[0] = Chain.getValue(0);
1273     NumResults = 1;
1274     NodeTys.push_back(MVT::i64);
1275     break;
1276   case MVT::f32:
1277   case MVT::f64:
1278     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1279                                InFlag).getValue(1);
1280     ResultVals[0] = Chain.getValue(0);
1281     NumResults = 1;
1282     NodeTys.push_back(Op.Val->getValueType(0));
1283     break;
1284   case MVT::v2f64:
1285   case MVT::v4f32:
1286   case MVT::v4i32:
1287   case MVT::v8i16:
1288   case MVT::v16i8:
1289     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1290                                    InFlag).getValue(1);
1291     ResultVals[0] = Chain.getValue(0);
1292     NumResults = 1;
1293     NodeTys.push_back(Op.Val->getValueType(0));
1294     break;
1295   }
1296
1297   NodeTys.push_back(MVT::Other);
1298
1299   // If the function returns void, just return the chain.
1300   if (NumResults == 0)
1301     return Chain;
1302
1303   // Otherwise, merge everything together with a MERGE_VALUES node.
1304   ResultVals[NumResults++] = Chain;
1305   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1306                               ResultVals, NumResults);
1307   return Res.getValue(Op.ResNo);
1308 }
1309
1310 static SDOperand
1311 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1312   SmallVector<CCValAssign, 16> RVLocs;
1313   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1314   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1315   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1316   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1317
1318   // If this is the first return lowered for this function, add the regs to the
1319   // liveout set for the function.
1320   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1321     for (unsigned i = 0; i != RVLocs.size(); ++i)
1322       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1323   }
1324
1325   SDOperand Chain = Op.getOperand(0);
1326   SDOperand Flag;
1327
1328   // Copy the result values into the output registers.
1329   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1330     CCValAssign &VA = RVLocs[i];
1331     assert(VA.isRegLoc() && "Can only return in registers!");
1332     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1333     Flag = Chain.getValue(1);
1334   }
1335
1336   if (Flag.Val)
1337     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1338   else
1339     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1340 }
1341
1342
1343 //===----------------------------------------------------------------------===//
1344 // Vector related lowering:
1345 //===----------------------------------------------------------------------===//
1346
1347 static ConstantSDNode *
1348 getVecImm(SDNode *N) {
1349   SDOperand OpVal(0, 0);
1350
1351   // Check to see if this buildvec has a single non-undef value in its elements.
1352   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1353     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1354     if (OpVal.Val == 0)
1355       OpVal = N->getOperand(i);
1356     else if (OpVal != N->getOperand(i))
1357       return 0;
1358   }
1359
1360   if (OpVal.Val != 0) {
1361     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1362       return CN;
1363     }
1364   }
1365
1366   return 0; // All UNDEF: use implicit def.; not Constant node
1367 }
1368
1369 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1370 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1371 /// constant
1372 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1373                               MVT ValueType) {
1374   if (ConstantSDNode *CN = getVecImm(N)) {
1375     uint64_t Value = CN->getValue();
1376     if (ValueType == MVT::i64) {
1377       uint64_t UValue = CN->getValue();
1378       uint32_t upper = uint32_t(UValue >> 32);
1379       uint32_t lower = uint32_t(UValue);
1380       if (upper != lower)
1381         return SDOperand();
1382       Value = Value >> 32;
1383     }
1384     if (Value <= 0x3ffff)
1385       return DAG.getConstant(Value, ValueType);
1386   }
1387
1388   return SDOperand();
1389 }
1390
1391 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1392 /// and the value fits into a signed 16-bit constant, and if so, return the
1393 /// constant
1394 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1395                               MVT ValueType) {
1396   if (ConstantSDNode *CN = getVecImm(N)) {
1397     int64_t Value = CN->getSignExtended();
1398     if (ValueType == MVT::i64) {
1399       uint64_t UValue = CN->getValue();
1400       uint32_t upper = uint32_t(UValue >> 32);
1401       uint32_t lower = uint32_t(UValue);
1402       if (upper != lower)
1403         return SDOperand();
1404       Value = Value >> 32;
1405     }
1406     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1407       return DAG.getConstant(Value, ValueType);
1408     }
1409   }
1410
1411   return SDOperand();
1412 }
1413
1414 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into a signed 10-bit constant, and if so, return the
1416 /// constant
1417 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1418                               MVT ValueType) {
1419   if (ConstantSDNode *CN = getVecImm(N)) {
1420     int64_t Value = CN->getSignExtended();
1421     if (ValueType == MVT::i64) {
1422       uint64_t UValue = CN->getValue();
1423       uint32_t upper = uint32_t(UValue >> 32);
1424       uint32_t lower = uint32_t(UValue);
1425       if (upper != lower)
1426         return SDOperand();
1427       Value = Value >> 32;
1428     }
1429     if (isS10Constant(Value))
1430       return DAG.getConstant(Value, ValueType);
1431   }
1432
1433   return SDOperand();
1434 }
1435
1436 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1437 /// and the value fits into a signed 8-bit constant, and if so, return the
1438 /// constant.
1439 ///
1440 /// @note: The incoming vector is v16i8 because that's the only way we can load
1441 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1442 /// same value.
1443 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1444                              MVT ValueType) {
1445   if (ConstantSDNode *CN = getVecImm(N)) {
1446     int Value = (int) CN->getValue();
1447     if (ValueType == MVT::i16
1448         && Value <= 0xffff                 /* truncated from uint64_t */
1449         && ((short) Value >> 8) == ((short) Value & 0xff))
1450       return DAG.getConstant(Value & 0xff, ValueType);
1451     else if (ValueType == MVT::i8
1452              && (Value & 0xff) == Value)
1453       return DAG.getConstant(Value, ValueType);
1454   }
1455
1456   return SDOperand();
1457 }
1458
1459 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1460 /// and the value fits into a signed 16-bit constant, and if so, return the
1461 /// constant
1462 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1463                                MVT ValueType) {
1464   if (ConstantSDNode *CN = getVecImm(N)) {
1465     uint64_t Value = CN->getValue();
1466     if ((ValueType == MVT::i32
1467           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1468         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1469       return DAG.getConstant(Value >> 16, ValueType);
1470   }
1471
1472   return SDOperand();
1473 }
1474
1475 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1476 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1477   if (ConstantSDNode *CN = getVecImm(N)) {
1478     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1479   }
1480
1481   return SDOperand();
1482 }
1483
1484 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1485 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1486   if (ConstantSDNode *CN = getVecImm(N)) {
1487     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1488   }
1489
1490   return SDOperand();
1491 }
1492
1493 // If this is a vector of constants or undefs, get the bits.  A bit in
1494 // UndefBits is set if the corresponding element of the vector is an
1495 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1496 // zero.   Return true if this is not an array of constants, false if it is.
1497 //
1498 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1499                                        uint64_t UndefBits[2]) {
1500   // Start with zero'd results.
1501   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1502
1503   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1504   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1505     SDOperand OpVal = BV->getOperand(i);
1506
1507     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1508     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1509
1510     uint64_t EltBits = 0;
1511     if (OpVal.getOpcode() == ISD::UNDEF) {
1512       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1513       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1514       continue;
1515     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1516       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1517     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1518       const APFloat &apf = CN->getValueAPF();
1519       EltBits = (CN->getValueType(0) == MVT::f32
1520                  ? FloatToBits(apf.convertToFloat())
1521                  : DoubleToBits(apf.convertToDouble()));
1522     } else {
1523       // Nonconstant element.
1524       return true;
1525     }
1526
1527     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1528   }
1529
1530   //printf("%llx %llx  %llx %llx\n",
1531   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1532   return false;
1533 }
1534
1535 /// If this is a splat (repetition) of a value across the whole vector, return
1536 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1537 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1538 /// SplatSize = 1 byte.
1539 static bool isConstantSplat(const uint64_t Bits128[2],
1540                             const uint64_t Undef128[2],
1541                             int MinSplatBits,
1542                             uint64_t &SplatBits, uint64_t &SplatUndef,
1543                             int &SplatSize) {
1544   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1545   // the same as the lower 64-bits, ignoring undefs.
1546   uint64_t Bits64  = Bits128[0] | Bits128[1];
1547   uint64_t Undef64 = Undef128[0] & Undef128[1];
1548   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1549   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1550   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1551   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1552
1553   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1554     if (MinSplatBits < 64) {
1555
1556       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1557       // undefs.
1558       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1559         if (MinSplatBits < 32) {
1560
1561           // If the top 16-bits are different than the lower 16-bits, ignoring
1562           // undefs, we have an i32 splat.
1563           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1564             if (MinSplatBits < 16) {
1565               // If the top 8-bits are different than the lower 8-bits, ignoring
1566               // undefs, we have an i16 splat.
1567               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1568                 // Otherwise, we have an 8-bit splat.
1569                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1570                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1571                 SplatSize = 1;
1572                 return true;
1573               }
1574             } else {
1575               SplatBits = Bits16;
1576               SplatUndef = Undef16;
1577               SplatSize = 2;
1578               return true;
1579             }
1580           }
1581         } else {
1582           SplatBits = Bits32;
1583           SplatUndef = Undef32;
1584           SplatSize = 4;
1585           return true;
1586         }
1587       }
1588     } else {
1589       SplatBits = Bits128[0];
1590       SplatUndef = Undef128[0];
1591       SplatSize = 8;
1592       return true;
1593     }
1594   }
1595
1596   return false;  // Can't be a splat if two pieces don't match.
1597 }
1598
1599 // If this is a case we can't handle, return null and let the default
1600 // expansion code take care of it.  If we CAN select this case, and if it
1601 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1602 // this case more efficiently than a constant pool load, lower it to the
1603 // sequence of ops that should be used.
1604 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1605   MVT VT = Op.getValueType();
1606   // If this is a vector of constants or undefs, get the bits.  A bit in
1607   // UndefBits is set if the corresponding element of the vector is an
1608   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1609   // zero.
1610   uint64_t VectorBits[2];
1611   uint64_t UndefBits[2];
1612   uint64_t SplatBits, SplatUndef;
1613   int SplatSize;
1614   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1615       || !isConstantSplat(VectorBits, UndefBits,
1616                           VT.getVectorElementType().getSizeInBits(),
1617                           SplatBits, SplatUndef, SplatSize))
1618     return SDOperand();   // Not a constant vector, not a splat.
1619
1620   switch (VT.getSimpleVT()) {
1621   default:
1622   case MVT::v4f32: {
1623     uint32_t Value32 = SplatBits;
1624     assert(SplatSize == 4
1625            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1626     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1627     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1628     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1629                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1630     break;
1631   }
1632   case MVT::v2f64: {
1633     uint64_t f64val = SplatBits;
1634     assert(SplatSize == 8
1635            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1636     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1637     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1638     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1639                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1640     break;
1641   }
1642   case MVT::v16i8: {
1643    // 8-bit constants have to be expanded to 16-bits
1644    unsigned short Value16 = SplatBits | (SplatBits << 8);
1645    SDOperand Ops[8];
1646    for (int i = 0; i < 8; ++i)
1647      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1648    return DAG.getNode(ISD::BIT_CONVERT, VT,
1649                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1650   }
1651   case MVT::v8i16: {
1652     unsigned short Value16;
1653     if (SplatSize == 2)
1654       Value16 = (unsigned short) (SplatBits & 0xffff);
1655     else
1656       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1657     SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1658     SDOperand Ops[8];
1659     for (int i = 0; i < 8; ++i) Ops[i] = T;
1660     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1661   }
1662   case MVT::v4i32: {
1663     unsigned int Value = SplatBits;
1664     SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1665     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1666   }
1667   case MVT::v2i64: {
1668     uint64_t val = SplatBits;
1669     uint32_t upper = uint32_t(val >> 32);
1670     uint32_t lower = uint32_t(val);
1671
1672     if (upper == lower) {
1673       // Magic constant that can be matched by IL, ILA, et. al.
1674       SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1675       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1676     } else {
1677       SDOperand LO32;
1678       SDOperand HI32;
1679       SmallVector<SDOperand, 16> ShufBytes;
1680       SDOperand Result;
1681       bool upper_special, lower_special;
1682
1683       // NOTE: This code creates common-case shuffle masks that can be easily
1684       // detected as common expressions. It is not attempting to create highly
1685       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1686
1687       // Detect if the upper or lower half is a special shuffle mask pattern:
1688       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1689       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1690
1691       // Create lower vector if not a special pattern
1692       if (!lower_special) {
1693         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1694         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1695                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1696                                        LO32C, LO32C, LO32C, LO32C));
1697       }
1698
1699       // Create upper vector if not a special pattern
1700       if (!upper_special) {
1701         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1702         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1703                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1704                                        HI32C, HI32C, HI32C, HI32C));
1705       }
1706
1707       // If either upper or lower are special, then the two input operands are
1708       // the same (basically, one of them is a "don't care")
1709       if (lower_special)
1710         LO32 = HI32;
1711       if (upper_special)
1712         HI32 = LO32;
1713       if (lower_special && upper_special) {
1714         // Unhappy situation... both upper and lower are special, so punt with
1715         // a target constant:
1716         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1717         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1718                                   Zero, Zero);
1719       }
1720
1721       for (int i = 0; i < 4; ++i) {
1722         uint64_t val = 0;
1723         for (int j = 0; j < 4; ++j) {
1724           SDOperand V;
1725           bool process_upper, process_lower;
1726           val <<= 8;
1727           process_upper = (upper_special && (i & 1) == 0);
1728           process_lower = (lower_special && (i & 1) == 1);
1729
1730           if (process_upper || process_lower) {
1731             if ((process_upper && upper == 0)
1732                 || (process_lower && lower == 0))
1733               val |= 0x80;
1734             else if ((process_upper && upper == 0xffffffff)
1735                      || (process_lower && lower == 0xffffffff))
1736               val |= 0xc0;
1737             else if ((process_upper && upper == 0x80000000)
1738                      || (process_lower && lower == 0x80000000))
1739               val |= (j == 0 ? 0xe0 : 0x80);
1740           } else
1741             val |= i * 4 + j + ((i & 1) * 16);
1742         }
1743
1744         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1745       }
1746
1747       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1748                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1749                                      &ShufBytes[0], ShufBytes.size()));
1750     }
1751   }
1752   }
1753
1754   return SDOperand();
1755 }
1756
1757 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1758 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1759 /// permutation vector, V3, is monotonically increasing with one "exception"
1760 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1761 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1762 /// In either case, the net result is going to eventually invoke SHUFB to
1763 /// permute/shuffle the bytes from V1 and V2.
1764 /// \note
1765 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1766 /// control word for byte/halfword/word insertion. This takes care of a single
1767 /// element move from V2 into V1.
1768 /// \note
1769 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1770 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1771   SDOperand V1 = Op.getOperand(0);
1772   SDOperand V2 = Op.getOperand(1);
1773   SDOperand PermMask = Op.getOperand(2);
1774
1775   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1776
1777   // If we have a single element being moved from V1 to V2, this can be handled
1778   // using the C*[DX] compute mask instructions, but the vector elements have
1779   // to be monotonically increasing with one exception element.
1780   MVT EltVT = V1.getValueType().getVectorElementType();
1781   unsigned EltsFromV2 = 0;
1782   unsigned V2Elt = 0;
1783   unsigned V2EltIdx0 = 0;
1784   unsigned CurrElt = 0;
1785   bool monotonic = true;
1786   if (EltVT == MVT::i8)
1787     V2EltIdx0 = 16;
1788   else if (EltVT == MVT::i16)
1789     V2EltIdx0 = 8;
1790   else if (EltVT == MVT::i32)
1791     V2EltIdx0 = 4;
1792   else
1793     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1794
1795   for (unsigned i = 0, e = PermMask.getNumOperands();
1796        EltsFromV2 <= 1 && monotonic && i != e;
1797        ++i) {
1798     unsigned SrcElt;
1799     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1800       SrcElt = 0;
1801     else
1802       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1803
1804     if (SrcElt >= V2EltIdx0) {
1805       ++EltsFromV2;
1806       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1807     } else if (CurrElt != SrcElt) {
1808       monotonic = false;
1809     }
1810
1811     ++CurrElt;
1812   }
1813
1814   if (EltsFromV2 == 1 && monotonic) {
1815     // Compute mask and shuffle
1816     MachineFunction &MF = DAG.getMachineFunction();
1817     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1818     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1819     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1820     // Initialize temporary register to 0
1821     SDOperand InitTempReg =
1822       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1823     // Copy register's contents as index in INSERT_MASK:
1824     SDOperand ShufMaskOp =
1825       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1826                   DAG.getTargetConstant(V2Elt, MVT::i32),
1827                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1828     // Use shuffle mask in SHUFB synthetic instruction:
1829     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1830   } else {
1831     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1832     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1833
1834     SmallVector<SDOperand, 16> ResultMask;
1835     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1836       unsigned SrcElt;
1837       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1838         SrcElt = 0;
1839       else
1840         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1841
1842       for (unsigned j = 0; j < BytesPerElement; ++j) {
1843         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1844                                              MVT::i8));
1845       }
1846     }
1847
1848     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1849                                       &ResultMask[0], ResultMask.size());
1850     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1851   }
1852 }
1853
1854 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1855   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1856
1857   if (Op0.Val->getOpcode() == ISD::Constant) {
1858     // For a constant, build the appropriate constant vector, which will
1859     // eventually simplify to a vector register load.
1860
1861     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1862     SmallVector<SDOperand, 16> ConstVecValues;
1863     MVT VT;
1864     size_t n_copies;
1865
1866     // Create a constant vector:
1867     switch (Op.getValueType().getSimpleVT()) {
1868     default: assert(0 && "Unexpected constant value type in "
1869                          "LowerSCALAR_TO_VECTOR");
1870     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1871     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1872     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1873     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1874     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1875     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1876     }
1877
1878     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1879     for (size_t j = 0; j < n_copies; ++j)
1880       ConstVecValues.push_back(CValue);
1881
1882     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1883                        &ConstVecValues[0], ConstVecValues.size());
1884   } else {
1885     // Otherwise, copy the value from one register to another:
1886     switch (Op0.getValueType().getSimpleVT()) {
1887     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1888     case MVT::i8:
1889     case MVT::i16:
1890     case MVT::i32:
1891     case MVT::i64:
1892     case MVT::f32:
1893     case MVT::f64:
1894       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1895     }
1896   }
1897
1898   return SDOperand();
1899 }
1900
1901 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1902   switch (Op.getValueType().getSimpleVT()) {
1903   default:
1904     cerr << "CellSPU: Unknown vector multiplication, got "
1905          << Op.getValueType().getMVTString()
1906          << "\n";
1907     abort();
1908     /*NOTREACHED*/
1909
1910   case MVT::v4i32: {
1911     SDOperand rA = Op.getOperand(0);
1912     SDOperand rB = Op.getOperand(1);
1913     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1914     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1915     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1916     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1917
1918     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1919     break;
1920   }
1921
1922   // Multiply two v8i16 vectors (pipeline friendly version):
1923   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1924   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1925   // c) Use SELB to select upper and lower halves from the intermediate results
1926   //
1927   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1928   // dual-issue. This code does manage to do this, even if it's a little on
1929   // the wacky side
1930   case MVT::v8i16: {
1931     MachineFunction &MF = DAG.getMachineFunction();
1932     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1933     SDOperand Chain = Op.getOperand(0);
1934     SDOperand rA = Op.getOperand(0);
1935     SDOperand rB = Op.getOperand(1);
1936     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1938
1939     SDOperand FSMBOp =
1940       DAG.getCopyToReg(Chain, FSMBIreg,
1941                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1942                                    DAG.getConstant(0xcccc, MVT::i16)));
1943
1944     SDOperand HHProd =
1945       DAG.getCopyToReg(FSMBOp, HiProdReg,
1946                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1947
1948     SDOperand HHProd_v4i32 =
1949       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1951
1952     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1953                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1954                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1955                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1956                                                HHProd_v4i32,
1957                                                DAG.getConstant(16, MVT::i16))),
1958                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1959   }
1960
1961   // This M00sE is N@stI! (apologies to Monty Python)
1962   //
1963   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1964   // is to break it all apart, sign extend, and reassemble the various
1965   // intermediate products.
1966   case MVT::v16i8: {
1967     SDOperand rA = Op.getOperand(0);
1968     SDOperand rB = Op.getOperand(1);
1969     SDOperand c8 = DAG.getConstant(8, MVT::i32);
1970     SDOperand c16 = DAG.getConstant(16, MVT::i32);
1971
1972     SDOperand LLProd =
1973       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1974                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1975                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1976
1977     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1978
1979     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1980
1981     SDOperand LHProd =
1982       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1983                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1984
1985     SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1986                                      DAG.getConstant(0x2222, MVT::i16));
1987
1988     SDOperand LoProdParts =
1989       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1990                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1991                               LLProd, LHProd, FSMBmask));
1992
1993     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1994
1995     SDOperand LoProd =
1996       DAG.getNode(ISD::AND, MVT::v4i32,
1997                   LoProdParts,
1998                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999                               LoProdMask, LoProdMask,
2000                               LoProdMask, LoProdMask));
2001
2002     SDOperand rAH =
2003       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2004                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2005
2006     SDOperand rBH =
2007       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2008                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2009
2010     SDOperand HLProd =
2011       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2012                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2013                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2014
2015     SDOperand HHProd_1 =
2016       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2017                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2018                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2019                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2020                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2021
2022     SDOperand HHProd =
2023       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2024                   HLProd,
2025                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2026                   FSMBmask);
2027
2028     SDOperand HiProd =
2029       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2030
2031     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2032                        DAG.getNode(ISD::OR, MVT::v4i32,
2033                                    LoProd, HiProd));
2034   }
2035   }
2036
2037   return SDOperand();
2038 }
2039
2040 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2041   MachineFunction &MF = DAG.getMachineFunction();
2042   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2043
2044   SDOperand A = Op.getOperand(0);
2045   SDOperand B = Op.getOperand(1);
2046   MVT VT = Op.getValueType();
2047
2048   unsigned VRegBR, VRegC;
2049
2050   if (VT == MVT::f32) {
2051     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2052     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2053   } else {
2054     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2055     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2056   }
2057   // TODO: make sure we're feeding FPInterp the right arguments
2058   // Right now: fi B, frest(B)
2059
2060   // Computes BRcpl =
2061   // (Floating Interpolate (FP Reciprocal Estimate B))
2062   SDOperand BRcpl =
2063       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2064                        DAG.getNode(SPUISD::FPInterp, VT, B,
2065                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2066
2067   // Computes A * BRcpl and stores in a temporary register
2068   SDOperand AxBRcpl =
2069       DAG.getCopyToReg(BRcpl, VRegC,
2070                  DAG.getNode(ISD::FMUL, VT, A,
2071                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2072   // What's the Chain variable do? It's magic!
2073   // TODO: set Chain = Op(0).getEntryNode()
2074
2075   return DAG.getNode(ISD::FADD, VT,
2076                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2077                 DAG.getNode(ISD::FMUL, VT,
2078                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2079                         DAG.getNode(ISD::FSUB, VT, A,
2080                             DAG.getNode(ISD::FMUL, VT, B,
2081                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2082 }
2083
2084 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2085   MVT VT = Op.getValueType();
2086   SDOperand N = Op.getOperand(0);
2087   SDOperand Elt = Op.getOperand(1);
2088   SDOperand ShufMask[16];
2089   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2090
2091   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2092
2093   int EltNo = (int) C->getValue();
2094
2095   // sanity checks:
2096   if (VT == MVT::i8 && EltNo >= 16)
2097     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2098   else if (VT == MVT::i16 && EltNo >= 8)
2099     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2100   else if (VT == MVT::i32 && EltNo >= 4)
2101     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2102   else if (VT == MVT::i64 && EltNo >= 2)
2103     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2104
2105   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2106     // i32 and i64: Element 0 is the preferred slot
2107     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2108   }
2109
2110   // Need to generate shuffle mask and extract:
2111   int prefslot_begin = -1, prefslot_end = -1;
2112   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2113
2114   switch (VT.getSimpleVT()) {
2115   default:
2116     assert(false && "Invalid value type!");
2117   case MVT::i8: {
2118     prefslot_begin = prefslot_end = 3;
2119     break;
2120   }
2121   case MVT::i16: {
2122     prefslot_begin = 2; prefslot_end = 3;
2123     break;
2124   }
2125   case MVT::i32: {
2126     prefslot_begin = 0; prefslot_end = 3;
2127     break;
2128   }
2129   case MVT::i64: {
2130     prefslot_begin = 0; prefslot_end = 7;
2131     break;
2132   }
2133   }
2134
2135   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2136          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2137
2138   for (int i = 0; i < 16; ++i) {
2139     // zero fill uppper part of preferred slot, don't care about the
2140     // other slots:
2141     unsigned int mask_val;
2142
2143     if (i <= prefslot_end) {
2144       mask_val =
2145         ((i < prefslot_begin)
2146          ? 0x80
2147          : elt_byte + (i - prefslot_begin));
2148
2149       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2150     } else
2151       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2152   }
2153
2154   SDOperand ShufMaskVec =
2155     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2156                 &ShufMask[0],
2157                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2158
2159   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2160                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2161                                  N, N, ShufMaskVec));
2162
2163 }
2164
2165 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2166   SDOperand VecOp = Op.getOperand(0);
2167   SDOperand ValOp = Op.getOperand(1);
2168   SDOperand IdxOp = Op.getOperand(2);
2169   MVT VT = Op.getValueType();
2170
2171   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2172   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2173
2174   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2175   // Use $2 because it's always 16-byte aligned and it's available:
2176   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2177
2178   SDOperand result =
2179     DAG.getNode(SPUISD::SHUFB, VT,
2180                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2181                 VecOp,
2182                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2183                             DAG.getNode(ISD::ADD, PtrVT,
2184                                         PtrBase,
2185                                         DAG.getConstant(CN->getValue(),
2186                                                         PtrVT))));
2187
2188   return result;
2189 }
2190
2191 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2192 {
2193   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2194
2195   assert(Op.getValueType() == MVT::i8);
2196   switch (Opc) {
2197   default:
2198     assert(0 && "Unhandled i8 math operator");
2199     /*NOTREACHED*/
2200     break;
2201   case ISD::SUB: {
2202     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2203     // the result:
2204     SDOperand N1 = Op.getOperand(1);
2205     N0 = (N0.getOpcode() != ISD::Constant
2206           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2207           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2208     N1 = (N1.getOpcode() != ISD::Constant
2209           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2210           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2211     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2212                        DAG.getNode(Opc, MVT::i16, N0, N1));
2213   }
2214   case ISD::ROTR:
2215   case ISD::ROTL: {
2216     SDOperand N1 = Op.getOperand(1);
2217     unsigned N1Opc;
2218     N0 = (N0.getOpcode() != ISD::Constant
2219           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2220           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2221     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2222     N1 = (N1.getOpcode() != ISD::Constant
2223           ? DAG.getNode(N1Opc, MVT::i16, N1)
2224           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2225     SDOperand ExpandArg =
2226       DAG.getNode(ISD::OR, MVT::i16, N0,
2227                   DAG.getNode(ISD::SHL, MVT::i16,
2228                               N0, DAG.getConstant(8, MVT::i16)));
2229     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2231   }
2232   case ISD::SRL:
2233   case ISD::SHL: {
2234     SDOperand N1 = Op.getOperand(1);
2235     unsigned N1Opc;
2236     N0 = (N0.getOpcode() != ISD::Constant
2237           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2238           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2239     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2240     N1 = (N1.getOpcode() != ISD::Constant
2241           ? DAG.getNode(N1Opc, MVT::i16, N1)
2242           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2243     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2244                        DAG.getNode(Opc, MVT::i16, N0, N1));
2245   }
2246   case ISD::SRA: {
2247     SDOperand N1 = Op.getOperand(1);
2248     unsigned N1Opc;
2249     N0 = (N0.getOpcode() != ISD::Constant
2250           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2251           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2252     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2253     N1 = (N1.getOpcode() != ISD::Constant
2254           ? DAG.getNode(N1Opc, MVT::i16, N1)
2255           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2256     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2257                        DAG.getNode(Opc, MVT::i16, N0, N1));
2258   }
2259   case ISD::MUL: {
2260     SDOperand N1 = Op.getOperand(1);
2261     unsigned N1Opc;
2262     N0 = (N0.getOpcode() != ISD::Constant
2263           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2264           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2265     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2266     N1 = (N1.getOpcode() != ISD::Constant
2267           ? DAG.getNode(N1Opc, MVT::i16, N1)
2268           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2269     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2270                        DAG.getNode(Opc, MVT::i16, N0, N1));
2271     break;
2272   }
2273   }
2274
2275   return SDOperand();
2276 }
2277
2278 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2279 {
2280   MVT VT = Op.getValueType();
2281   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2282
2283   SDOperand Op0 = Op.getOperand(0);
2284
2285   switch (Opc) {
2286   case ISD::ZERO_EXTEND:
2287   case ISD::SIGN_EXTEND:
2288   case ISD::ANY_EXTEND: {
2289     MVT Op0VT = Op0.getValueType();
2290     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2291
2292     assert(Op0VT == MVT::i32
2293            && "CellSPU: Zero/sign extending something other than i32");
2294     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2295
2296     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2297                       ? SPUISD::ROTBYTES_RIGHT_S
2298                       : SPUISD::ROTQUAD_RZ_BYTES);
2299     SDOperand PromoteScalar =
2300       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2301
2302     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2303                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2304                                    DAG.getNode(NewOpc, Op0VecVT,
2305                                                PromoteScalar,
2306                                                DAG.getConstant(4, MVT::i32))));
2307   }
2308
2309   case ISD::ADD: {
2310     // Turn operands into vectors to satisfy type checking (shufb works on
2311     // vectors)
2312     SDOperand Op0 =
2313       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2314     SDOperand Op1 =
2315       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2316     SmallVector<SDOperand, 16> ShufBytes;
2317
2318     // Create the shuffle mask for "rotating" the borrow up one register slot
2319     // once the borrow is generated.
2320     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2321     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2322     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2323     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2324
2325     SDOperand CarryGen =
2326       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2327     SDOperand ShiftedCarry =
2328       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2329                   CarryGen, CarryGen,
2330                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2331                               &ShufBytes[0], ShufBytes.size()));
2332
2333     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2334                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2335                                    Op0, Op1, ShiftedCarry));
2336   }
2337
2338   case ISD::SUB: {
2339     // Turn operands into vectors to satisfy type checking (shufb works on
2340     // vectors)
2341     SDOperand Op0 =
2342       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2343     SDOperand Op1 =
2344       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2345     SmallVector<SDOperand, 16> ShufBytes;
2346
2347     // Create the shuffle mask for "rotating" the borrow up one register slot
2348     // once the borrow is generated.
2349     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2350     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2351     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2352     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2353
2354     SDOperand BorrowGen =
2355       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2356     SDOperand ShiftedBorrow =
2357       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2358                   BorrowGen, BorrowGen,
2359                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2360                               &ShufBytes[0], ShufBytes.size()));
2361
2362     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2363                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2364                                    Op0, Op1, ShiftedBorrow));
2365   }
2366
2367   case ISD::SHL: {
2368     SDOperand ShiftAmt = Op.getOperand(1);
2369     MVT ShiftAmtVT = ShiftAmt.getValueType();
2370     SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2371     SDOperand MaskLower =
2372       DAG.getNode(SPUISD::SELB, VecVT,
2373                   Op0Vec,
2374                   DAG.getConstant(0, VecVT),
2375                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2376                               DAG.getConstant(0xff00ULL, MVT::i16)));
2377     SDOperand ShiftAmtBytes =
2378       DAG.getNode(ISD::SRL, ShiftAmtVT,
2379                   ShiftAmt,
2380                   DAG.getConstant(3, ShiftAmtVT));
2381     SDOperand ShiftAmtBits =
2382       DAG.getNode(ISD::AND, ShiftAmtVT,
2383                   ShiftAmt,
2384                   DAG.getConstant(7, ShiftAmtVT));
2385
2386     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2387                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2388                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2389                                                MaskLower, ShiftAmtBytes),
2390                                    ShiftAmtBits));
2391   }
2392
2393   case ISD::SRL: {
2394     MVT VT = Op.getValueType();
2395     SDOperand ShiftAmt = Op.getOperand(1);
2396     MVT ShiftAmtVT = ShiftAmt.getValueType();
2397     SDOperand ShiftAmtBytes =
2398       DAG.getNode(ISD::SRL, ShiftAmtVT,
2399                   ShiftAmt,
2400                   DAG.getConstant(3, ShiftAmtVT));
2401     SDOperand ShiftAmtBits =
2402       DAG.getNode(ISD::AND, ShiftAmtVT,
2403                   ShiftAmt,
2404                   DAG.getConstant(7, ShiftAmtVT));
2405
2406     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2407                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2408                                    Op0, ShiftAmtBytes),
2409                        ShiftAmtBits);
2410   }
2411
2412   case ISD::SRA: {
2413     // Promote Op0 to vector
2414     SDOperand Op0 =
2415       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2416     SDOperand ShiftAmt = Op.getOperand(1);
2417     MVT ShiftVT = ShiftAmt.getValueType();
2418
2419     // Negate variable shift amounts
2420     if (!isa<ConstantSDNode>(ShiftAmt)) {
2421       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2422                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2423     }
2424
2425     SDOperand UpperHalfSign =
2426       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2427                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2428                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2429                                           Op0, DAG.getConstant(31, MVT::i32))));
2430     SDOperand UpperHalfSignMask =
2431       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2432     SDOperand UpperLowerMask =
2433       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2434                   DAG.getConstant(0xff00, MVT::i16));
2435     SDOperand UpperLowerSelect =
2436       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2437                   UpperHalfSignMask, Op0, UpperLowerMask);
2438     SDOperand RotateLeftBytes =
2439       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2440                   UpperLowerSelect, ShiftAmt);
2441     SDOperand RotateLeftBits =
2442       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2443                   RotateLeftBytes, ShiftAmt);
2444
2445     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2446                        RotateLeftBits);
2447   }
2448   }
2449
2450   return SDOperand();
2451 }
2452
2453 //! Lower byte immediate operations for v16i8 vectors:
2454 static SDOperand
2455 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2456   SDOperand ConstVec;
2457   SDOperand Arg;
2458   MVT VT = Op.getValueType();
2459
2460   ConstVec = Op.getOperand(0);
2461   Arg = Op.getOperand(1);
2462   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2463     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2464       ConstVec = ConstVec.getOperand(0);
2465     } else {
2466       ConstVec = Op.getOperand(1);
2467       Arg = Op.getOperand(0);
2468       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2469         ConstVec = ConstVec.getOperand(0);
2470       }
2471     }
2472   }
2473
2474   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2475     uint64_t VectorBits[2];
2476     uint64_t UndefBits[2];
2477     uint64_t SplatBits, SplatUndef;
2478     int SplatSize;
2479
2480     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2481         && isConstantSplat(VectorBits, UndefBits,
2482                            VT.getVectorElementType().getSizeInBits(),
2483                            SplatBits, SplatUndef, SplatSize)) {
2484       SDOperand tcVec[16];
2485       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2486       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2487
2488       // Turn the BUILD_VECTOR into a set of target constants:
2489       for (size_t i = 0; i < tcVecSize; ++i)
2490         tcVec[i] = tc;
2491
2492       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2493                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2494     }
2495   }
2496
2497   return SDOperand();
2498 }
2499
2500 //! Lower i32 multiplication
2501 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2502                           unsigned Opc) {
2503   switch (VT.getSimpleVT()) {
2504   default:
2505     cerr << "CellSPU: Unknown LowerMUL value type, got "
2506          << Op.getValueType().getMVTString()
2507          << "\n";
2508     abort();
2509     /*NOTREACHED*/
2510
2511   case MVT::i32: {
2512     SDOperand rA = Op.getOperand(0);
2513     SDOperand rB = Op.getOperand(1);
2514
2515     return DAG.getNode(ISD::ADD, MVT::i32,
2516                        DAG.getNode(ISD::ADD, MVT::i32,
2517                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2518                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2519                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2520   }
2521   }
2522
2523   return SDOperand();
2524 }
2525
2526 //! Custom lowering for CTPOP (count population)
2527 /*!
2528   Custom lowering code that counts the number ones in the input
2529   operand. SPU has such an instruction, but it counts the number of
2530   ones per byte, which then have to be accumulated.
2531 */
2532 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2533   MVT VT = Op.getValueType();
2534   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2535
2536   switch (VT.getSimpleVT()) {
2537   default:
2538     assert(false && "Invalid value type!");
2539   case MVT::i8: {
2540     SDOperand N = Op.getOperand(0);
2541     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2542
2543     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2544     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2545
2546     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2547   }
2548
2549   case MVT::i16: {
2550     MachineFunction &MF = DAG.getMachineFunction();
2551     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2552
2553     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2554
2555     SDOperand N = Op.getOperand(0);
2556     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2557     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2558     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2559
2560     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2561     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2562
2563     // CNTB_result becomes the chain to which all of the virtual registers
2564     // CNTB_reg, SUM1_reg become associated:
2565     SDOperand CNTB_result =
2566       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2567
2568     SDOperand CNTB_rescopy =
2569       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2570
2571     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2572
2573     return DAG.getNode(ISD::AND, MVT::i16,
2574                        DAG.getNode(ISD::ADD, MVT::i16,
2575                                    DAG.getNode(ISD::SRL, MVT::i16,
2576                                                Tmp1, Shift1),
2577                                    Tmp1),
2578                        Mask0);
2579   }
2580
2581   case MVT::i32: {
2582     MachineFunction &MF = DAG.getMachineFunction();
2583     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2584
2585     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2586     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2587
2588     SDOperand N = Op.getOperand(0);
2589     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2590     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2591     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2592     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2593
2594     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2595     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2596
2597     // CNTB_result becomes the chain to which all of the virtual registers
2598     // CNTB_reg, SUM1_reg become associated:
2599     SDOperand CNTB_result =
2600       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2601
2602     SDOperand CNTB_rescopy =
2603       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2604
2605     SDOperand Comp1 =
2606       DAG.getNode(ISD::SRL, MVT::i32,
2607                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2608
2609     SDOperand Sum1 =
2610       DAG.getNode(ISD::ADD, MVT::i32,
2611                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2612
2613     SDOperand Sum1_rescopy =
2614       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2615
2616     SDOperand Comp2 =
2617       DAG.getNode(ISD::SRL, MVT::i32,
2618                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2619                   Shift2);
2620     SDOperand Sum2 =
2621       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2622                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2623
2624     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2625   }
2626
2627   case MVT::i64:
2628     break;
2629   }
2630
2631   return SDOperand();
2632 }
2633
2634 /// LowerOperation - Provide custom lowering hooks for some operations.
2635 ///
2636 SDOperand
2637 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2638 {
2639   unsigned Opc = (unsigned) Op.getOpcode();
2640   MVT VT = Op.getValueType();
2641
2642   switch (Opc) {
2643   default: {
2644     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2645     cerr << "Op.getOpcode() = " << Opc << "\n";
2646     cerr << "*Op.Val:\n";
2647     Op.Val->dump();
2648     abort();
2649   }
2650   case ISD::LOAD:
2651   case ISD::SEXTLOAD:
2652   case ISD::ZEXTLOAD:
2653     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2654   case ISD::STORE:
2655     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2656   case ISD::ConstantPool:
2657     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2658   case ISD::GlobalAddress:
2659     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2660   case ISD::JumpTable:
2661     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2662   case ISD::Constant:
2663     return LowerConstant(Op, DAG);
2664   case ISD::ConstantFP:
2665     return LowerConstantFP(Op, DAG);
2666   case ISD::BRCOND:
2667     return LowerBRCOND(Op, DAG);
2668   case ISD::FORMAL_ARGUMENTS:
2669     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2670   case ISD::CALL:
2671     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2672   case ISD::RET:
2673     return LowerRET(Op, DAG, getTargetMachine());
2674
2675
2676   // i8, i64 math ops:
2677   case ISD::ZERO_EXTEND:
2678   case ISD::SIGN_EXTEND:
2679   case ISD::ANY_EXTEND:
2680   case ISD::ADD:
2681   case ISD::SUB:
2682   case ISD::ROTR:
2683   case ISD::ROTL:
2684   case ISD::SRL:
2685   case ISD::SHL:
2686   case ISD::SRA: {
2687     if (VT == MVT::i8)
2688       return LowerI8Math(Op, DAG, Opc);
2689     else if (VT == MVT::i64)
2690       return LowerI64Math(Op, DAG, Opc);
2691     break;
2692   }
2693
2694   // Vector-related lowering.
2695   case ISD::BUILD_VECTOR:
2696     return LowerBUILD_VECTOR(Op, DAG);
2697   case ISD::SCALAR_TO_VECTOR:
2698     return LowerSCALAR_TO_VECTOR(Op, DAG);
2699   case ISD::VECTOR_SHUFFLE:
2700     return LowerVECTOR_SHUFFLE(Op, DAG);
2701   case ISD::EXTRACT_VECTOR_ELT:
2702     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2703   case ISD::INSERT_VECTOR_ELT:
2704     return LowerINSERT_VECTOR_ELT(Op, DAG);
2705
2706   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2707   case ISD::AND:
2708   case ISD::OR:
2709   case ISD::XOR:
2710     return LowerByteImmed(Op, DAG);
2711
2712   // Vector and i8 multiply:
2713   case ISD::MUL:
2714     if (VT.isVector())
2715       return LowerVectorMUL(Op, DAG);
2716     else if (VT == MVT::i8)
2717       return LowerI8Math(Op, DAG, Opc);
2718     else
2719       return LowerMUL(Op, DAG, VT, Opc);
2720
2721   case ISD::FDIV:
2722     if (VT == MVT::f32 || VT == MVT::v4f32)
2723       return LowerFDIVf32(Op, DAG);
2724 //    else if (Op.getValueType() == MVT::f64)
2725 //      return LowerFDIVf64(Op, DAG);
2726     else
2727       assert(0 && "Calling FDIV on unsupported MVT");
2728
2729   case ISD::CTPOP:
2730     return LowerCTPOP(Op, DAG);
2731   }
2732
2733   return SDOperand();
2734 }
2735
2736 //===----------------------------------------------------------------------===//
2737 // Target Optimization Hooks
2738 //===----------------------------------------------------------------------===//
2739
2740 SDOperand
2741 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2742 {
2743 #if 0
2744   TargetMachine &TM = getTargetMachine();
2745 #endif
2746   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2747   SelectionDAG &DAG = DCI.DAG;
2748   SDOperand Op0 = N->getOperand(0);      // everything has at least one operand
2749   SDOperand Result;                     // Initially, NULL result
2750
2751   switch (N->getOpcode()) {
2752   default: break;
2753   case ISD::ADD: {
2754     SDOperand Op1 = N->getOperand(1);
2755
2756     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2757       SDOperand Op01 = Op0.getOperand(1);
2758       if (Op01.getOpcode() == ISD::Constant
2759           || Op01.getOpcode() == ISD::TargetConstant) {
2760         // (add <const>, (SPUindirect <arg>, <const>)) ->
2761         // (SPUindirect <arg>, <const + const>)
2762         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2763         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2764         SDOperand combinedConst =
2765           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2766                           Op0.getValueType());
2767
2768         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2769                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2770         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2771                    << CN0->getValue() + CN1->getValue() << ")\n");
2772         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2773                            Op0.getOperand(0), combinedConst);
2774       }
2775     } else if (isa<ConstantSDNode>(Op0)
2776                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2777       SDOperand Op11 = Op1.getOperand(1);
2778       if (Op11.getOpcode() == ISD::Constant
2779           || Op11.getOpcode() == ISD::TargetConstant) {
2780         // (add (SPUindirect <arg>, <const>), <const>) ->
2781         // (SPUindirect <arg>, <const + const>)
2782         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2783         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2784         SDOperand combinedConst =
2785           DAG.getConstant(CN0->getValue() + CN1->getValue(),
2786                           Op0.getValueType());
2787
2788         DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2789                    << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2790         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2791                    << CN0->getValue() + CN1->getValue() << ")\n");
2792
2793         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2794                            Op1.getOperand(0), combinedConst);
2795       }
2796     }
2797     break;
2798   }
2799   case ISD::SIGN_EXTEND:
2800   case ISD::ZERO_EXTEND:
2801   case ISD::ANY_EXTEND: {
2802     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2803         N->getValueType(0) == Op0.getValueType()) {
2804       // (any_extend (SPUextract_elt0 <arg>)) ->
2805       // (SPUextract_elt0 <arg>)
2806       // Types must match, however...
2807       DEBUG(cerr << "Replace: ");
2808       DEBUG(N->dump(&DAG));
2809       DEBUG(cerr << "\nWith:    ");
2810       DEBUG(Op0.Val->dump(&DAG));
2811       DEBUG(cerr << "\n");
2812
2813       return Op0;
2814     }
2815     break;
2816   }
2817   case SPUISD::IndirectAddr: {
2818     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2819       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2820       if (CN->getValue() == 0) {
2821         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2822         // (SPUaform <addr>, 0)
2823
2824         DEBUG(cerr << "Replace: ");
2825         DEBUG(N->dump(&DAG));
2826         DEBUG(cerr << "\nWith:    ");
2827         DEBUG(Op0.Val->dump(&DAG));
2828         DEBUG(cerr << "\n");
2829
2830         return Op0;
2831       }
2832     }
2833     break;
2834   }
2835   case SPUISD::SHLQUAD_L_BITS:
2836   case SPUISD::SHLQUAD_L_BYTES:
2837   case SPUISD::VEC_SHL:
2838   case SPUISD::VEC_SRL:
2839   case SPUISD::VEC_SRA:
2840   case SPUISD::ROTQUAD_RZ_BYTES:
2841   case SPUISD::ROTQUAD_RZ_BITS: {
2842     SDOperand Op1 = N->getOperand(1);
2843
2844     if (isa<ConstantSDNode>(Op1)) {
2845       // Kill degenerate vector shifts:
2846       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2847
2848       if (CN->getValue() == 0) {
2849         Result = Op0;
2850       }
2851     }
2852     break;
2853   }
2854   case SPUISD::PROMOTE_SCALAR: {
2855     switch (Op0.getOpcode()) {
2856     default:
2857       break;
2858     case ISD::ANY_EXTEND:
2859     case ISD::ZERO_EXTEND:
2860     case ISD::SIGN_EXTEND: {
2861       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2862       // <arg>
2863       // but only if the SPUpromote_scalar and <arg> types match.
2864       SDOperand Op00 = Op0.getOperand(0);
2865       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2866         SDOperand Op000 = Op00.getOperand(0);
2867         if (Op000.getValueType() == N->getValueType(0)) {
2868           Result = Op000;
2869         }
2870       }
2871       break;
2872     }
2873     case SPUISD::EXTRACT_ELT0: {
2874       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2875       // <arg>
2876       Result = Op0.getOperand(0);
2877       break;
2878     }
2879     }
2880     break;
2881   }
2882   }
2883   // Otherwise, return unchanged.
2884 #if 1
2885   if (Result.Val) {
2886     DEBUG(cerr << "\nReplace.SPU: ");
2887     DEBUG(N->dump(&DAG));
2888     DEBUG(cerr << "\nWith:        ");
2889     DEBUG(Result.Val->dump(&DAG));
2890     DEBUG(cerr << "\n");
2891   }
2892 #endif
2893
2894   return Result;
2895 }
2896
2897 //===----------------------------------------------------------------------===//
2898 // Inline Assembly Support
2899 //===----------------------------------------------------------------------===//
2900
2901 /// getConstraintType - Given a constraint letter, return the type of
2902 /// constraint it is for this target.
2903 SPUTargetLowering::ConstraintType
2904 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2905   if (ConstraintLetter.size() == 1) {
2906     switch (ConstraintLetter[0]) {
2907     default: break;
2908     case 'b':
2909     case 'r':
2910     case 'f':
2911     case 'v':
2912     case 'y':
2913       return C_RegisterClass;
2914     }
2915   }
2916   return TargetLowering::getConstraintType(ConstraintLetter);
2917 }
2918
2919 std::pair<unsigned, const TargetRegisterClass*>
2920 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2921                                                 MVT VT) const
2922 {
2923   if (Constraint.size() == 1) {
2924     // GCC RS6000 Constraint Letters
2925     switch (Constraint[0]) {
2926     case 'b':   // R1-R31
2927     case 'r':   // R0-R31
2928       if (VT == MVT::i64)
2929         return std::make_pair(0U, SPU::R64CRegisterClass);
2930       return std::make_pair(0U, SPU::R32CRegisterClass);
2931     case 'f':
2932       if (VT == MVT::f32)
2933         return std::make_pair(0U, SPU::R32FPRegisterClass);
2934       else if (VT == MVT::f64)
2935         return std::make_pair(0U, SPU::R64FPRegisterClass);
2936       break;
2937     case 'v':
2938       return std::make_pair(0U, SPU::GPRCRegisterClass);
2939     }
2940   }
2941
2942   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2943 }
2944
2945 //! Compute used/known bits for a SPU operand
2946 void
2947 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2948                                                   const APInt &Mask,
2949                                                   APInt &KnownZero,
2950                                                   APInt &KnownOne,
2951                                                   const SelectionDAG &DAG,
2952                                                   unsigned Depth ) const {
2953 #if 0
2954   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2955 #endif
2956
2957   switch (Op.getOpcode()) {
2958   default:
2959     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2960     break;
2961
2962 #if 0
2963   case CALL:
2964   case SHUFB:
2965   case INSERT_MASK:
2966   case CNTB:
2967 #endif
2968
2969   case SPUISD::PROMOTE_SCALAR: {
2970     SDOperand Op0 = Op.getOperand(0);
2971     MVT Op0VT = Op0.getValueType();
2972     unsigned Op0VTBits = Op0VT.getSizeInBits();
2973     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2974     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2975     KnownOne |= APInt(Op0VTBits, InMask, false);
2976     break;
2977   }
2978
2979   case SPUISD::LDRESULT:
2980   case SPUISD::EXTRACT_ELT0:
2981   case SPUISD::EXTRACT_ELT0_CHAINED: {
2982     MVT OpVT = Op.getValueType();
2983     unsigned OpVTBits = OpVT.getSizeInBits();
2984     uint64_t InMask = OpVT.getIntegerVTBitMask();
2985     KnownZero |= APInt(OpVTBits, ~InMask, false);
2986     KnownOne |= APInt(OpVTBits, InMask, false);
2987     break;
2988   }
2989
2990 #if 0
2991   case EXTRACT_I1_ZEXT:
2992   case EXTRACT_I1_SEXT:
2993   case EXTRACT_I8_ZEXT:
2994   case EXTRACT_I8_SEXT:
2995   case MPY:
2996   case MPYU:
2997   case MPYH:
2998   case MPYHH:
2999   case SPUISD::SHLQUAD_L_BITS:
3000   case SPUISD::SHLQUAD_L_BYTES:
3001   case SPUISD::VEC_SHL:
3002   case SPUISD::VEC_SRL:
3003   case SPUISD::VEC_SRA:
3004   case SPUISD::VEC_ROTL:
3005   case SPUISD::VEC_ROTR:
3006   case SPUISD::ROTQUAD_RZ_BYTES:
3007   case SPUISD::ROTQUAD_RZ_BITS:
3008   case SPUISD::ROTBYTES_RIGHT_S:
3009   case SPUISD::ROTBYTES_LEFT:
3010   case SPUISD::ROTBYTES_LEFT_CHAINED:
3011   case SPUISD::SELECT_MASK:
3012   case SPUISD::SELB:
3013   case SPUISD::FPInterp:
3014   case SPUISD::FPRecipEst:
3015   case SPUISD::SEXT32TO64:
3016 #endif
3017   }
3018 }
3019
3020 // LowerAsmOperandForConstraint
3021 void
3022 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3023                                                 char ConstraintLetter,
3024                                                 std::vector<SDOperand> &Ops,
3025                                                 SelectionDAG &DAG) const {
3026   // Default, for the time being, to the base class handler
3027   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3028 }
3029
3030 /// isLegalAddressImmediate - Return true if the integer value can be used
3031 /// as the offset of the target addressing mode.
3032 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3033   // SPU's addresses are 256K:
3034   return (V > -(1 << 18) && V < (1 << 18) - 1);
3035 }
3036
3037 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3038   return false;
3039 }