lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 194   setOperationAction(ISD::FREM , MVT::f32, Expand);
 195
 196   // If we're enabling GP optimizations, use hardware square root
 197   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 198   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 199
 200   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 201   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 202
 203   // SPU can do rotate right and left, so legalize it... but customize for i8
 204   // because instructions don't exist.
 205
 206   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 207   //        .td files.
 208   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 209   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 210   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 211
 212   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 213   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 214   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 215   // SPU has no native version of shift left/right for i8
 216   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 217   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 218   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 219   // And SPU needs custom lowering for shift left/right for i64
 220   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 221   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 222   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 223
 224   // Custom lower i8, i32 and i64 multiplications
 225   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 226   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 227   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 228
 229   // Need to custom handle (some) common i8, i64 math ops
 230   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 231   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 232   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 233
 234   // SPU does not have BSWAP. It does have i32 support CTLZ.
 235   // CTPOP has to be custom lowered.
 236   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 237   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 238
 239   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 240   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 241   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 242   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 243
 244   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 245   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 246
 247   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 248
 249   // SPU has a version of select that implements (a&~c)|(b&c), just like
 250   // select ought to work:
 251   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 252   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 253   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 254   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 255   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 256
 257   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 258   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 259   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 260   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 261   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 262
 263   // Zero extension and sign extension for i64 have to be
 264   // custom legalized
 265   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 266   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 267   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 268
 269   // SPU has a legal FP -> signed INT instruction
 270   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 271   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 272   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 273   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 274
 275   // FDIV on SPU requires custom lowering
 276   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 277   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 278
 279   // SPU has [U|S]INT_TO_FP
 280   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 281   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 282   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 283   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 284   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 285   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 286   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 287   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 288
 289   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 290   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 291   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 292   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 293
 294   // We cannot sextinreg(i1).  Expand to shifts.
 295   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 296
 297   // Support label based line numbers.
 298   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 299   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 300
 301   // We want to legalize GlobalAddress and ConstantPool nodes into the
 302   // appropriate instructions to materialize the address.
 303   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 304        ++sctype) {
 305     MVT VT = (MVT::SimpleValueType)sctype;
 306
 307     setOperationAction(ISD::GlobalAddress, VT, Custom);
 308     setOperationAction(ISD::ConstantPool,  VT, Custom);
 309     setOperationAction(ISD::JumpTable,     VT, Custom);
 310   }
 311
 312   // RET must be custom lowered, to meet ABI requirements
 313   setOperationAction(ISD::RET,           MVT::Other, Custom);
 314
 315   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 316   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 317
 318   // Use the default implementation.
 319   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 320   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 321   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 322   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 323   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 324   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 325   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 326
 327   // Cell SPU has instructions for converting between i64 and fp.
 328   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 329   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 330
 331   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 332   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 333
 334   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 335   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 336
 337   // First set operation action for all vector types to expand. Then we
 338   // will selectively turn on ones that can be effectively codegen'd.
 339   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 340   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 341   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 342   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 343   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 344   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 345
 346   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 347        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 348     MVT VT = (MVT::SimpleValueType)i;
 349
 350     // add/sub are legal for all supported vector VT's.
 351     setOperationAction(ISD::ADD , VT, Legal);
 352     setOperationAction(ISD::SUB , VT, Legal);
 353     // mul has to be custom lowered.
 354     setOperationAction(ISD::MUL , VT, Custom);
 355
 356     setOperationAction(ISD::AND   , VT, Legal);
 357     setOperationAction(ISD::OR    , VT, Legal);
 358     setOperationAction(ISD::XOR   , VT, Legal);
 359     setOperationAction(ISD::LOAD  , VT, Legal);
 360     setOperationAction(ISD::SELECT, VT, Legal);
 361     setOperationAction(ISD::STORE,  VT, Legal);
 362
 363     // These operations need to be expanded:
 364     setOperationAction(ISD::SDIV, VT, Expand);
 365     setOperationAction(ISD::SREM, VT, Expand);
 366     setOperationAction(ISD::UDIV, VT, Expand);
 367     setOperationAction(ISD::UREM, VT, Expand);
 368     setOperationAction(ISD::FDIV, VT, Custom);
 369
 370     // Custom lower build_vector, constant pool spills, insert and
 371     // extract vector elements:
 372     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 373     setOperationAction(ISD::ConstantPool, VT, Custom);
 374     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 375     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 376     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 377     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 378   }
 379
 380   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 381   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 382   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 383   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 384   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 385
 386   setShiftAmountType(MVT::i32);
 387   setSetCCResultContents(ZeroOrOneSetCCResult);
 388
 389   setStackPointerRegisterToSaveRestore(SPU::R1);
 390
 391   // We have target-specific dag combine patterns for the following nodes:
 392   setTargetDAGCombine(ISD::ADD);
 393   setTargetDAGCombine(ISD::ZERO_EXTEND);
 394   setTargetDAGCombine(ISD::SIGN_EXTEND);
 395   setTargetDAGCombine(ISD::ANY_EXTEND);
 396
 397   computeRegisterProperties();
 398 }
 399
 400 const char *
 401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 402 {
 403   if (node_names.empty()) {
 404     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 405     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 406     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 407     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 408     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 409     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 410     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 411     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 412     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 413     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 414     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 415     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 416     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 417     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 418                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 419     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 420     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 421     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 422     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 423     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 424     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 425     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 426     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 427     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 428     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 429     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 430     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 431     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 432     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 433     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 434     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 435       "SPUISD::ROTQUAD_RZ_BYTES";
 436     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 437       "SPUISD::ROTQUAD_RZ_BITS";
 438     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 439       "SPUISD::ROTBYTES_RIGHT_S";
 440     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 441     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 442       "SPUISD::ROTBYTES_LEFT_CHAINED";
 443     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 444       "SPUISD::ROTBYTES_LEFT_BITS";
 445     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 446     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 447     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 448     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 449     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 450     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 451     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 452     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 453     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 454   }
 455
 456   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 457
 458   return ((i != node_names.end()) ? i->second : 0);
 459 }
 460
 461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 462   MVT VT = Op.getValueType();
 463   if (VT.isInteger())
 464     return VT;
 465   else
 466     return MVT::i32;
 467 }
 468
 469 //===----------------------------------------------------------------------===//
 470 // Calling convention code:
 471 //===----------------------------------------------------------------------===//
 472
 473 #include "SPUGenCallingConv.inc"
 474
 475 //===----------------------------------------------------------------------===//
 476 //  LowerOperation implementation
 477 //===----------------------------------------------------------------------===//
 478
 479 /// Aligned load common code for CellSPU
 480 /*!
 481   \param[in] Op The SelectionDAG load or store operand
 482   \param[in] DAG The selection DAG
 483   \param[in] ST CellSPU subtarget information structure
 484   \param[in,out] alignment Caller initializes this to the load or store node's
 485   value from getAlignment(), may be updated while generating the aligned load
 486   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 487   offset (divisible by 16, modulo 16 == 0)
 488   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 489   offset of the preferred slot (modulo 16 != 0)
 490   \param[in,out] VT Caller initializes this value type to the the load or store
 491   node's loaded or stored value type; may be updated if an i1-extended load or
 492   store.
 493   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 494   otherwise false. Can help to determine if the chunk needs to be rotated.
 495
 496  Both load and store lowering load a block of data aligned on a 16-byte
 497  boundary. This is the common aligned load code shared between both.
 498  */
 499 static SDValue
 500 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 501             LSBaseSDNode *LSN,
 502             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 503             MVT &VT, bool &was16aligned)
 504 {
 505   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 506   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 507   SDValue basePtr = LSN->getBasePtr();
 508   SDValue chain = LSN->getChain();
 509
 510   if (basePtr.getOpcode() == ISD::ADD) {
 511     SDValue Op1 = basePtr.getNode()->getOperand(1);
 512
 513     if (Op1.getOpcode() == ISD::Constant
 514         || Op1.getOpcode() == ISD::TargetConstant) {
 515       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 516
 517       alignOffs = (int) CN->getZExtValue();
 518       prefSlotOffs = (int) (alignOffs & 0xf);
 519
 520       // Adjust the rotation amount to ensure that the final result ends up in
 521       // the preferred slot:
 522       prefSlotOffs -= vtm->prefslot_byte;
 523       basePtr = basePtr.getOperand(0);
 524
 525       // Loading from memory, can we adjust alignment?
 526       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 527         SDValue APtr = basePtr.getOperand(0);
 528         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 529           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 530           alignment = GSDN->getGlobal()->getAlignment();
 531         }
 532       }
 533     } else {
 534       alignOffs = 0;
 535       prefSlotOffs = -vtm->prefslot_byte;
 536     }
 537   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 538     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 539     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 540     prefSlotOffs = (int) (alignOffs & 0xf);
 541     prefSlotOffs -= vtm->prefslot_byte;
 542     basePtr = DAG.getRegister(SPU::R1, VT);
 543   } else {
 544     alignOffs = 0;
 545     prefSlotOffs = -vtm->prefslot_byte;
 546   }
 547
 548   if (alignment == 16) {
 549     // Realign the base pointer as a D-Form address:
 550     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 551       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 552                             basePtr,
 553                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 554     }
 555
 556     // Emit the vector load:
 557     was16aligned = true;
 558     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 559                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 560                        LSN->isVolatile(), 16);
 561   }
 562
 563   // Unaligned load or we're using the "large memory" model, which means that
 564   // we have to be very pessimistic:
 565   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 566     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 567                           DAG.getConstant(0, PtrVT));
 568   }
 569
 570   // Add the offset
 571   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 572                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 573   was16aligned = false;
 574   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 575                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 576                      LSN->isVolatile(), 16);
 577 }
 578
 579 /// Custom lower loads for CellSPU
 580 /*!
 581  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 582  within a 16-byte block, we have to rotate to extract the requested element.
 583  */
 584 static SDValue
 585 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 586   LoadSDNode *LN = cast<LoadSDNode>(Op);
 587   SDValue the_chain = LN->getChain();
 588   MVT VT = LN->getMemoryVT();
 589   MVT OpVT = Op.getNode()->getValueType(0);
 590   ISD::LoadExtType ExtType = LN->getExtensionType();
 591   unsigned alignment = LN->getAlignment();
 592   SDValue Ops[8];
 593
 594   switch (LN->getAddressingMode()) {
 595   case ISD::UNINDEXED: {
 596     int offset, rotamt;
 597     bool was16aligned;
 598     SDValue result =
 599       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 600
 601     if (result.getNode() == 0)
 602       return result;
 603
 604     the_chain = result.getValue(1);
 605     // Rotate the chunk if necessary
 606     if (rotamt < 0)
 607       rotamt += 16;
 608     if (rotamt != 0 || !was16aligned) {
 609       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 610
 611       Ops[0] = the_chain;
 612       Ops[1] = result;
 613       if (was16aligned) {
 614         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 615       } else {
 616         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 617         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 618         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 619                              DAG.getConstant(rotamt, PtrVT));
 620       }
 621
 622       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 623       the_chain = result.getValue(1);
 624     }
 625
 626     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 627       SDVTList scalarvts;
 628       MVT vecVT = MVT::v16i8;
 629
 630       // Convert the loaded v16i8 vector to the appropriate vector type
 631       // specified by the operand:
 632       if (OpVT == VT) {
 633         if (VT != MVT::i1)
 634           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 635       } else
 636         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 637
 638       Ops[0] = the_chain;
 639       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 640       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 641       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 642       the_chain = result.getValue(1);
 643     } else {
 644       // Handle the sign and zero-extending loads for i1 and i8:
 645       unsigned NewOpC;
 646
 647       if (ExtType == ISD::SEXTLOAD) {
 648         NewOpC = (OpVT == MVT::i1
 649                   ? SPUISD::EXTRACT_I1_SEXT
 650                   : SPUISD::EXTRACT_I8_SEXT);
 651       } else {
 652         assert(ExtType == ISD::ZEXTLOAD);
 653         NewOpC = (OpVT == MVT::i1
 654                   ? SPUISD::EXTRACT_I1_ZEXT
 655                   : SPUISD::EXTRACT_I8_ZEXT);
 656       }
 657
 658       result = DAG.getNode(NewOpC, OpVT, result);
 659     }
 660
 661     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 662     SDValue retops[2] = {
 663       result,
 664       the_chain
 665     };
 666
 667     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 668                          retops, sizeof(retops) / sizeof(retops[0]));
 669     return result;
 670   }
 671   case ISD::PRE_INC:
 672   case ISD::PRE_DEC:
 673   case ISD::POST_INC:
 674   case ISD::POST_DEC:
 675   case ISD::LAST_INDEXED_MODE:
 676     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 677             "UNINDEXED\n";
 678     cerr << (unsigned) LN->getAddressingMode() << "\n";
 679     abort();
 680     /*NOTREACHED*/
 681   }
 682
 683   return SDValue();
 684 }
 685
 686 /// Custom lower stores for CellSPU
 687 /*!
 688  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 689  within a 16-byte block, we have to generate a shuffle to insert the
 690  requested element into its place, then store the resulting block.
 691  */
 692 static SDValue
 693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 694   StoreSDNode *SN = cast<StoreSDNode>(Op);
 695   SDValue Value = SN->getValue();
 696   MVT VT = Value.getValueType();
 697   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 698   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 699   unsigned alignment = SN->getAlignment();
 700
 701   switch (SN->getAddressingMode()) {
 702   case ISD::UNINDEXED: {
 703     int chunk_offset, slot_offset;
 704     bool was16aligned;
 705
 706     // The vector type we really want to load from the 16-byte chunk, except
 707     // in the case of MVT::i1, which has to be v16i8.
 708     MVT vecVT, stVecVT = MVT::v16i8;
 709
 710     if (StVT != MVT::i1)
 711       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 712     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 713
 714     SDValue alignLoadVec =
 715       AlignedLoad(Op, DAG, ST, SN, alignment,
 716                   chunk_offset, slot_offset, VT, was16aligned);
 717
 718     if (alignLoadVec.getNode() == 0)
 719       return alignLoadVec;
 720
 721     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 722     SDValue basePtr = LN->getBasePtr();
 723     SDValue the_chain = alignLoadVec.getValue(1);
 724     SDValue theValue = SN->getValue();
 725     SDValue result;
 726
 727     if (StVT != VT
 728         && (theValue.getOpcode() == ISD::AssertZext
 729             || theValue.getOpcode() == ISD::AssertSext)) {
 730       // Drill down and get the value for zero- and sign-extended
 731       // quantities
 732       theValue = theValue.getOperand(0);
 733     }
 734
 735     chunk_offset &= 0xf;
 736
 737     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 738     SDValue insertEltPtr;
 739     SDValue insertEltOp;
 740
 741     // If the base pointer is already a D-form address, then just create
 742     // a new D-form address with a slot offset and the orignal base pointer.
 743     // Otherwise generate a D-form address with the slot offset relative
 744     // to the stack pointer, which is always aligned.
 745     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 746     DEBUG(basePtr.getNode()->dump(&DAG));
 747     DEBUG(cerr << "\n");
 748
 749     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 750         (basePtr.getOpcode() == ISD::ADD
 751          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 752       insertEltPtr = basePtr;
 753     } else {
 754       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 755     }
 756
 757     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 758     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 759                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 760                          alignLoadVec,
 761                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 762
 763     result = DAG.getStore(the_chain, result, basePtr,
 764                           LN->getSrcValue(), LN->getSrcValueOffset(),
 765                           LN->isVolatile(), LN->getAlignment());
 766
 767     return result;
 768     /*UNREACHED*/
 769   }
 770   case ISD::PRE_INC:
 771   case ISD::PRE_DEC:
 772   case ISD::POST_INC:
 773   case ISD::POST_DEC:
 774   case ISD::LAST_INDEXED_MODE:
 775     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 776             "UNINDEXED\n";
 777     cerr << (unsigned) SN->getAddressingMode() << "\n";
 778     abort();
 779     /*NOTREACHED*/
 780   }
 781
 782   return SDValue();
 783 }
 784
 785 /// Generate the address of a constant pool entry.
 786 static SDValue
 787 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 788   MVT PtrVT = Op.getValueType();
 789   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 790   Constant *C = CP->getConstVal();
 791   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 792   SDValue Zero = DAG.getConstant(0, PtrVT);
 793   const TargetMachine &TM = DAG.getTarget();
 794
 795   if (TM.getRelocationModel() == Reloc::Static) {
 796     if (!ST->usingLargeMem()) {
 797       // Just return the SDValue with the constant pool address in it.
 798       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 799     } else {
 800       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 801       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 802       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 803     }
 804   }
 805
 806   assert(0 &&
 807          "LowerConstantPool: Relocation model other than static"
 808          " not supported.");
 809   return SDValue();
 810 }
 811
 812 static SDValue
 813 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 814   MVT PtrVT = Op.getValueType();
 815   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 816   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 817   SDValue Zero = DAG.getConstant(0, PtrVT);
 818   const TargetMachine &TM = DAG.getTarget();
 819
 820   if (TM.getRelocationModel() == Reloc::Static) {
 821     if (!ST->usingLargeMem()) {
 822       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 823     } else {
 824       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 825       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 826       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 827     }
 828   }
 829
 830   assert(0 &&
 831          "LowerJumpTable: Relocation model other than static not supported.");
 832   return SDValue();
 833 }
 834
 835 static SDValue
 836 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 837   MVT PtrVT = Op.getValueType();
 838   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 839   GlobalValue *GV = GSDN->getGlobal();
 840   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 841   const TargetMachine &TM = DAG.getTarget();
 842   SDValue Zero = DAG.getConstant(0, PtrVT);
 843
 844   if (TM.getRelocationModel() == Reloc::Static) {
 845     if (!ST->usingLargeMem()) {
 846       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 847     } else {
 848       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 849       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 850       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 851     }
 852   } else {
 853     cerr << "LowerGlobalAddress: Relocation model other than static not "
 854          << "supported.\n";
 855     abort();
 856     /*NOTREACHED*/
 857   }
 858
 859   return SDValue();
 860 }
 861
 862 //! Custom lower i64 integer constants
 863 /*!
 864  This code inserts all of the necessary juggling that needs to occur to load
 865  a 64-bit constant into a register.
 866  */
 867 static SDValue
 868 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 869   MVT VT = Op.getValueType();
 870   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 871
 872   if (VT == MVT::i64) {
 873     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 874     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 875                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 876   } else {
 877     cerr << "LowerConstant: unhandled constant type "
 878          << VT.getMVTString()
 879          << "\n";
 880     abort();
 881     /*NOTREACHED*/
 882   }
 883
 884   return SDValue();
 885 }
 886
 887 //! Custom lower double precision floating point constants
 888 static SDValue
 889 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 890   MVT VT = Op.getValueType();
 891   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 892
 893   assert((FP != 0) &&
 894          "LowerConstantFP: Node is not ConstantFPSDNode");
 895
 896   if (VT == MVT::f64) {
 897     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 898     return DAG.getNode(ISD::BIT_CONVERT, VT,
 899                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 906 static SDValue
 907 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 908 {
 909   SDValue Cond = Op.getOperand(1);
 910   MVT CondVT = Cond.getValueType();
 911   MVT CondNVT;
 912
 913   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 914     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 915     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 916                       Op.getOperand(0),
 917                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 918                       Op.getOperand(2));
 919   } else
 920     return SDValue();                // Unchanged
 921 }
 922
 923 static SDValue
 924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 925 {
 926   MachineFunction &MF = DAG.getMachineFunction();
 927   MachineFrameInfo *MFI = MF.getFrameInfo();
 928   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 929   SmallVector<SDValue, 8> ArgValues;
 930   SDValue Root = Op.getOperand(0);
 931   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 932
 933   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 934   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 935
 936   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 937   unsigned ArgRegIdx = 0;
 938   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 939
 940   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 941
 942   // Add DAG nodes to load the arguments or copy them out of registers.
 943   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 944        ArgNo != e; ++ArgNo) {
 945     SDValue ArgVal;
 946     bool needsLoad = false;
 947     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 948     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 949
 950     switch (ObjectVT.getSimpleVT()) {
 951     default: {
 952       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 953            << ObjectVT.getMVTString()
 954            << "\n";
 955       abort();
 956     }
 957     case MVT::i8:
 958       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 959         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 960         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 961         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 962         ++ArgRegIdx;
 963       } else {
 964         needsLoad = true;
 965       }
 966       break;
 967     case MVT::i16:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i32:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::i64:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::f32:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::f64:
1008       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1010         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1012         ++ArgRegIdx;
1013       } else {
1014         needsLoad = true;
1015       }
1016       break;
1017     case MVT::v2f64:
1018     case MVT::v4f32:
1019     case MVT::v2i64:
1020     case MVT::v4i32:
1021     case MVT::v8i16:
1022     case MVT::v16i8:
1023       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1024         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1025         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1026         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1027         ++ArgRegIdx;
1028       } else {
1029         needsLoad = true;
1030       }
1031       break;
1032     }
1033
1034     // We need to load the argument to a virtual register if we determined above
1035     // that we ran out of physical registers of the appropriate type
1036     if (needsLoad) {
1037       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1038       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1039       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1040       ArgOffset += StackSlotSize;
1041     }
1042
1043     ArgValues.push_back(ArgVal);
1044   }
1045
1046   // If the function takes variable number of arguments, make a frame index for
1047   // the start of the first vararg value... for expansion of llvm.va_start.
1048   if (isVarArg) {
1049     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1050                                                ArgOffset);
1051     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1052     // If this function is vararg, store any remaining integer argument regs to
1053     // their spots on the stack so that they may be loaded by deferencing the
1054     // result of va_next.
1055     SmallVector<SDValue, 8> MemOps;
1056     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1057       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1058       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1059       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1060       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1061       MemOps.push_back(Store);
1062       // Increment the address by four for the next argument to store
1063       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1064       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1065     }
1066     if (!MemOps.empty())
1067       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1068   }
1069
1070   ArgValues.push_back(Root);
1071
1072   // Return the new list of results.
1073   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1074                             ArgValues.size());
1075 }
1076
1077 /// isLSAAddress - Return the immediate to use if the specified
1078 /// value is representable as a LSA address.
1079 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1080   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1081   if (!C) return 0;
1082
1083   int Addr = C->getZExtValue();
1084   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1085       (Addr << 14 >> 14) != Addr)
1086     return 0;  // Top 14 bits have to be sext of immediate.
1087
1088   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1089 }
1090
1091 static
1092 SDValue
1093 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1094   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1095   SDValue Chain = TheCall->getChain();
1096 #if 0
1097   bool isVarArg   = TheCall->isVarArg();
1098   bool isTailCall = TheCall->isTailCall();
1099 #endif
1100   SDValue Callee    = TheCall->getCallee();
1101   unsigned NumOps     = TheCall->getNumArgs();
1102   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1103   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1104   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1105
1106   // Handy pointer type
1107   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1108
1109   // Accumulate how many bytes are to be pushed on the stack, including the
1110   // linkage area, and parameter passing area.  According to the SPU ABI,
1111   // we minimally need space for [LR] and [SP]
1112   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1113
1114   // Set up a copy of the stack pointer for use loading and storing any
1115   // arguments that may not fit in the registers available for argument
1116   // passing.
1117   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1118
1119   // Figure out which arguments are going to go in registers, and which in
1120   // memory.
1121   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1122   unsigned ArgRegIdx = 0;
1123
1124   // Keep track of registers passing arguments
1125   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1126   // And the arguments passed on the stack
1127   SmallVector<SDValue, 8> MemOpChains;
1128
1129   for (unsigned i = 0; i != NumOps; ++i) {
1130     SDValue Arg = TheCall->getArg(i);
1131
1132     // PtrOff will be used to store the current argument to the stack if a
1133     // register cannot be found for it.
1134     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1135     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1136
1137     switch (Arg.getValueType().getSimpleVT()) {
1138     default: assert(0 && "Unexpected ValueType for argument!");
1139     case MVT::i32:
1140     case MVT::i64:
1141     case MVT::i128:
1142       if (ArgRegIdx != NumArgRegs) {
1143         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144       } else {
1145         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146         ArgOffset += StackSlotSize;
1147       }
1148       break;
1149     case MVT::f32:
1150     case MVT::f64:
1151       if (ArgRegIdx != NumArgRegs) {
1152         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1153       } else {
1154         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155         ArgOffset += StackSlotSize;
1156       }
1157       break;
1158     case MVT::v4f32:
1159     case MVT::v4i32:
1160     case MVT::v8i16:
1161     case MVT::v16i8:
1162       if (ArgRegIdx != NumArgRegs) {
1163         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1164       } else {
1165         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1166         ArgOffset += StackSlotSize;
1167       }
1168       break;
1169     }
1170   }
1171
1172   // Update number of stack bytes actually used, insert a call sequence start
1173   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1174   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1175
1176   if (!MemOpChains.empty()) {
1177     // Adjust the stack pointer for the stack arguments.
1178     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1179                         &MemOpChains[0], MemOpChains.size());
1180   }
1181
1182   // Build a sequence of copy-to-reg nodes chained together with token chain
1183   // and flag operands which copy the outgoing args into the appropriate regs.
1184   SDValue InFlag;
1185   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1186     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1187                              InFlag);
1188     InFlag = Chain.getValue(1);
1189   }
1190
1191   SmallVector<SDValue, 8> Ops;
1192   unsigned CallOpc = SPUISD::CALL;
1193
1194   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1195   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1196   // node so that legalize doesn't hack it.
1197   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1198     GlobalValue *GV = G->getGlobal();
1199     MVT CalleeVT = Callee.getValueType();
1200     SDValue Zero = DAG.getConstant(0, PtrVT);
1201     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1202
1203     if (!ST->usingLargeMem()) {
1204       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1205       // style calls, otherwise, external symbols are BRASL calls. This assumes
1206       // that declared/defined symbols are in the same compilation unit and can
1207       // be reached through PC-relative jumps.
1208       //
1209       // NOTE:
1210       // This may be an unsafe assumption for JIT and really large compilation
1211       // units.
1212       if (GV->isDeclaration()) {
1213         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1214       } else {
1215         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1216       }
1217     } else {
1218       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1219       // address pairs:
1220       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1221     }
1222   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1223     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1224   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1225     // If this is an absolute destination address that appears to be a legal
1226     // local store address, use the munged value.
1227     Callee = SDValue(Dest, 0);
1228   }
1229
1230   Ops.push_back(Chain);
1231   Ops.push_back(Callee);
1232
1233   // Add argument registers to the end of the list so that they are known live
1234   // into the call.
1235   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1236     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1237                                   RegsToPass[i].second.getValueType()));
1238
1239   if (InFlag.getNode())
1240     Ops.push_back(InFlag);
1241   // Returns a chain and a flag for retval copy to use.
1242   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1243                       &Ops[0], Ops.size());
1244   InFlag = Chain.getValue(1);
1245
1246   Chain = DAG.getCALLSEQ_END(Chain,
1247                              DAG.getConstant(NumStackBytes, PtrVT),
1248                              DAG.getConstant(0, PtrVT),
1249                              InFlag);
1250   if (TheCall->getValueType(0) != MVT::Other)
1251     InFlag = Chain.getValue(1);
1252
1253   SDValue ResultVals[3];
1254   unsigned NumResults = 0;
1255
1256   // If the call has results, copy the values out of the ret val registers.
1257   switch (TheCall->getValueType(0).getSimpleVT()) {
1258   default: assert(0 && "Unexpected ret value!");
1259   case MVT::Other: break;
1260   case MVT::i32:
1261     if (TheCall->getValueType(1) == MVT::i32) {
1262       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1263       ResultVals[0] = Chain.getValue(0);
1264       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1265                                  Chain.getValue(2)).getValue(1);
1266       ResultVals[1] = Chain.getValue(0);
1267       NumResults = 2;
1268     } else {
1269       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1270       ResultVals[0] = Chain.getValue(0);
1271       NumResults = 1;
1272     }
1273     break;
1274   case MVT::i64:
1275     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1276     ResultVals[0] = Chain.getValue(0);
1277     NumResults = 1;
1278     break;
1279   case MVT::f32:
1280   case MVT::f64:
1281     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1282                                InFlag).getValue(1);
1283     ResultVals[0] = Chain.getValue(0);
1284     NumResults = 1;
1285     break;
1286   case MVT::v2f64:
1287   case MVT::v4f32:
1288   case MVT::v4i32:
1289   case MVT::v8i16:
1290   case MVT::v16i8:
1291     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1292                                    InFlag).getValue(1);
1293     ResultVals[0] = Chain.getValue(0);
1294     NumResults = 1;
1295     break;
1296   }
1297
1298   // If the function returns void, just return the chain.
1299   if (NumResults == 0)
1300     return Chain;
1301
1302   // Otherwise, merge everything together with a MERGE_VALUES node.
1303   ResultVals[NumResults++] = Chain;
1304   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1305   return Res.getValue(Op.getResNo());
1306 }
1307
1308 static SDValue
1309 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1310   SmallVector<CCValAssign, 16> RVLocs;
1311   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1312   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1313   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1314   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1315
1316   // If this is the first return lowered for this function, add the regs to the
1317   // liveout set for the function.
1318   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1319     for (unsigned i = 0; i != RVLocs.size(); ++i)
1320       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1321   }
1322
1323   SDValue Chain = Op.getOperand(0);
1324   SDValue Flag;
1325
1326   // Copy the result values into the output registers.
1327   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1328     CCValAssign &VA = RVLocs[i];
1329     assert(VA.isRegLoc() && "Can only return in registers!");
1330     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1331     Flag = Chain.getValue(1);
1332   }
1333
1334   if (Flag.getNode())
1335     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1336   else
1337     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1338 }
1339
1340
1341 //===----------------------------------------------------------------------===//
1342 // Vector related lowering:
1343 //===----------------------------------------------------------------------===//
1344
1345 static ConstantSDNode *
1346 getVecImm(SDNode *N) {
1347   SDValue OpVal(0, 0);
1348
1349   // Check to see if this buildvec has a single non-undef value in its elements.
1350   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1351     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1352     if (OpVal.getNode() == 0)
1353       OpVal = N->getOperand(i);
1354     else if (OpVal != N->getOperand(i))
1355       return 0;
1356   }
1357
1358   if (OpVal.getNode() != 0) {
1359     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1360       return CN;
1361     }
1362   }
1363
1364   return 0; // All UNDEF: use implicit def.; not Constant node
1365 }
1366
1367 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1368 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1369 /// constant
1370 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1371                               MVT ValueType) {
1372   if (ConstantSDNode *CN = getVecImm(N)) {
1373     uint64_t Value = CN->getZExtValue();
1374     if (ValueType == MVT::i64) {
1375       uint64_t UValue = CN->getZExtValue();
1376       uint32_t upper = uint32_t(UValue >> 32);
1377       uint32_t lower = uint32_t(UValue);
1378       if (upper != lower)
1379         return SDValue();
1380       Value = Value >> 32;
1381     }
1382     if (Value <= 0x3ffff)
1383       return DAG.getConstant(Value, ValueType);
1384   }
1385
1386   return SDValue();
1387 }
1388
1389 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1390 /// and the value fits into a signed 16-bit constant, and if so, return the
1391 /// constant
1392 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1393                               MVT ValueType) {
1394   if (ConstantSDNode *CN = getVecImm(N)) {
1395     int64_t Value = CN->getSExtValue();
1396     if (ValueType == MVT::i64) {
1397       uint64_t UValue = CN->getZExtValue();
1398       uint32_t upper = uint32_t(UValue >> 32);
1399       uint32_t lower = uint32_t(UValue);
1400       if (upper != lower)
1401         return SDValue();
1402       Value = Value >> 32;
1403     }
1404     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1405       return DAG.getConstant(Value, ValueType);
1406     }
1407   }
1408
1409   return SDValue();
1410 }
1411
1412 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1413 /// and the value fits into a signed 10-bit constant, and if so, return the
1414 /// constant
1415 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1416                               MVT ValueType) {
1417   if (ConstantSDNode *CN = getVecImm(N)) {
1418     int64_t Value = CN->getSExtValue();
1419     if (ValueType == MVT::i64) {
1420       uint64_t UValue = CN->getZExtValue();
1421       uint32_t upper = uint32_t(UValue >> 32);
1422       uint32_t lower = uint32_t(UValue);
1423       if (upper != lower)
1424         return SDValue();
1425       Value = Value >> 32;
1426     }
1427     if (isS10Constant(Value))
1428       return DAG.getConstant(Value, ValueType);
1429   }
1430
1431   return SDValue();
1432 }
1433
1434 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into a signed 8-bit constant, and if so, return the
1436 /// constant.
1437 ///
1438 /// @note: The incoming vector is v16i8 because that's the only way we can load
1439 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1440 /// same value.
1441 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1442                              MVT ValueType) {
1443   if (ConstantSDNode *CN = getVecImm(N)) {
1444     int Value = (int) CN->getZExtValue();
1445     if (ValueType == MVT::i16
1446         && Value <= 0xffff                 /* truncated from uint64_t */
1447         && ((short) Value >> 8) == ((short) Value & 0xff))
1448       return DAG.getConstant(Value & 0xff, ValueType);
1449     else if (ValueType == MVT::i8
1450              && (Value & 0xff) == Value)
1451       return DAG.getConstant(Value, ValueType);
1452   }
1453
1454   return SDValue();
1455 }
1456
1457 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1458 /// and the value fits into a signed 16-bit constant, and if so, return the
1459 /// constant
1460 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1461                                MVT ValueType) {
1462   if (ConstantSDNode *CN = getVecImm(N)) {
1463     uint64_t Value = CN->getZExtValue();
1464     if ((ValueType == MVT::i32
1465           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1466         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1467       return DAG.getConstant(Value >> 16, ValueType);
1468   }
1469
1470   return SDValue();
1471 }
1472
1473 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1474 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1475   if (ConstantSDNode *CN = getVecImm(N)) {
1476     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1477   }
1478
1479   return SDValue();
1480 }
1481
1482 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1483 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1484   if (ConstantSDNode *CN = getVecImm(N)) {
1485     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1486   }
1487
1488   return SDValue();
1489 }
1490
1491 // If this is a vector of constants or undefs, get the bits.  A bit in
1492 // UndefBits is set if the corresponding element of the vector is an
1493 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1494 // zero.   Return true if this is not an array of constants, false if it is.
1495 //
1496 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1497                                        uint64_t UndefBits[2]) {
1498   // Start with zero'd results.
1499   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1500
1501   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1502   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1503     SDValue OpVal = BV->getOperand(i);
1504
1505     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1506     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1507
1508     uint64_t EltBits = 0;
1509     if (OpVal.getOpcode() == ISD::UNDEF) {
1510       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1511       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1512       continue;
1513     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1514       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1515     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1516       const APFloat &apf = CN->getValueAPF();
1517       EltBits = (CN->getValueType(0) == MVT::f32
1518                  ? FloatToBits(apf.convertToFloat())
1519                  : DoubleToBits(apf.convertToDouble()));
1520     } else {
1521       // Nonconstant element.
1522       return true;
1523     }
1524
1525     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1526   }
1527
1528   //printf("%llx %llx  %llx %llx\n",
1529   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1530   return false;
1531 }
1532
1533 /// If this is a splat (repetition) of a value across the whole vector, return
1534 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1535 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1536 /// SplatSize = 1 byte.
1537 static bool isConstantSplat(const uint64_t Bits128[2],
1538                             const uint64_t Undef128[2],
1539                             int MinSplatBits,
1540                             uint64_t &SplatBits, uint64_t &SplatUndef,
1541                             int &SplatSize) {
1542   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1543   // the same as the lower 64-bits, ignoring undefs.
1544   uint64_t Bits64  = Bits128[0] | Bits128[1];
1545   uint64_t Undef64 = Undef128[0] & Undef128[1];
1546   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1547   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1548   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1549   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1550
1551   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1552     if (MinSplatBits < 64) {
1553
1554       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1555       // undefs.
1556       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1557         if (MinSplatBits < 32) {
1558
1559           // If the top 16-bits are different than the lower 16-bits, ignoring
1560           // undefs, we have an i32 splat.
1561           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1562             if (MinSplatBits < 16) {
1563               // If the top 8-bits are different than the lower 8-bits, ignoring
1564               // undefs, we have an i16 splat.
1565               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1566                   == ((Bits16 >> 8) & ~Undef16)) {
1567                 // Otherwise, we have an 8-bit splat.
1568                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1569                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1570                 SplatSize = 1;
1571                 return true;
1572               }
1573             } else {
1574               SplatBits = Bits16;
1575               SplatUndef = Undef16;
1576               SplatSize = 2;
1577               return true;
1578             }
1579           }
1580         } else {
1581           SplatBits = Bits32;
1582           SplatUndef = Undef32;
1583           SplatSize = 4;
1584           return true;
1585         }
1586       }
1587     } else {
1588       SplatBits = Bits128[0];
1589       SplatUndef = Undef128[0];
1590       SplatSize = 8;
1591       return true;
1592     }
1593   }
1594
1595   return false;  // Can't be a splat if two pieces don't match.
1596 }
1597
1598 // If this is a case we can't handle, return null and let the default
1599 // expansion code take care of it.  If we CAN select this case, and if it
1600 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1601 // this case more efficiently than a constant pool load, lower it to the
1602 // sequence of ops that should be used.
1603 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1604   MVT VT = Op.getValueType();
1605   // If this is a vector of constants or undefs, get the bits.  A bit in
1606   // UndefBits is set if the corresponding element of the vector is an
1607   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1608   // zero.
1609   uint64_t VectorBits[2];
1610   uint64_t UndefBits[2];
1611   uint64_t SplatBits, SplatUndef;
1612   int SplatSize;
1613   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1614       || !isConstantSplat(VectorBits, UndefBits,
1615                           VT.getVectorElementType().getSizeInBits(),
1616                           SplatBits, SplatUndef, SplatSize))
1617     return SDValue();   // Not a constant vector, not a splat.
1618
1619   switch (VT.getSimpleVT()) {
1620   default:
1621   case MVT::v4f32: {
1622     uint32_t Value32 = SplatBits;
1623     assert(SplatSize == 4
1624            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1625     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1626     SDValue T = DAG.getConstant(Value32, MVT::i32);
1627     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1628                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1629     break;
1630   }
1631   case MVT::v2f64: {
1632     uint64_t f64val = SplatBits;
1633     assert(SplatSize == 8
1634            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1635     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636     SDValue T = DAG.getConstant(f64val, MVT::i64);
1637     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1638                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1639     break;
1640   }
1641   case MVT::v16i8: {
1642    // 8-bit constants have to be expanded to 16-bits
1643    unsigned short Value16 = SplatBits | (SplatBits << 8);
1644    SDValue Ops[8];
1645    for (int i = 0; i < 8; ++i)
1646      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1647    return DAG.getNode(ISD::BIT_CONVERT, VT,
1648                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1649   }
1650   case MVT::v8i16: {
1651     unsigned short Value16;
1652     if (SplatSize == 2)
1653       Value16 = (unsigned short) (SplatBits & 0xffff);
1654     else
1655       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1656     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1657     SDValue Ops[8];
1658     for (int i = 0; i < 8; ++i) Ops[i] = T;
1659     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1660   }
1661   case MVT::v4i32: {
1662     unsigned int Value = SplatBits;
1663     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1664     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1665   }
1666   case MVT::v2i64: {
1667     uint64_t val = SplatBits;
1668     uint32_t upper = uint32_t(val >> 32);
1669     uint32_t lower = uint32_t(val);
1670
1671     if (upper == lower) {
1672       // Magic constant that can be matched by IL, ILA, et. al.
1673       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1674       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1675     } else {
1676       SDValue LO32;
1677       SDValue HI32;
1678       SmallVector<SDValue, 16> ShufBytes;
1679       SDValue Result;
1680       bool upper_special, lower_special;
1681
1682       // NOTE: This code creates common-case shuffle masks that can be easily
1683       // detected as common expressions. It is not attempting to create highly
1684       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1685
1686       // Detect if the upper or lower half is a special shuffle mask pattern:
1687       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1688       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1689
1690       // Create lower vector if not a special pattern
1691       if (!lower_special) {
1692         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1693         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1694                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1695                                        LO32C, LO32C, LO32C, LO32C));
1696       }
1697
1698       // Create upper vector if not a special pattern
1699       if (!upper_special) {
1700         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1701         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1702                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1703                                        HI32C, HI32C, HI32C, HI32C));
1704       }
1705
1706       // If either upper or lower are special, then the two input operands are
1707       // the same (basically, one of them is a "don't care")
1708       if (lower_special)
1709         LO32 = HI32;
1710       if (upper_special)
1711         HI32 = LO32;
1712       if (lower_special && upper_special) {
1713         // Unhappy situation... both upper and lower are special, so punt with
1714         // a target constant:
1715         SDValue Zero = DAG.getConstant(0, MVT::i32);
1716         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1717                                   Zero, Zero);
1718       }
1719
1720       for (int i = 0; i < 4; ++i) {
1721         uint64_t val = 0;
1722         for (int j = 0; j < 4; ++j) {
1723           SDValue V;
1724           bool process_upper, process_lower;
1725           val <<= 8;
1726           process_upper = (upper_special && (i & 1) == 0);
1727           process_lower = (lower_special && (i & 1) == 1);
1728
1729           if (process_upper || process_lower) {
1730             if ((process_upper && upper == 0)
1731                 || (process_lower && lower == 0))
1732               val |= 0x80;
1733             else if ((process_upper && upper == 0xffffffff)
1734                      || (process_lower && lower == 0xffffffff))
1735               val |= 0xc0;
1736             else if ((process_upper && upper == 0x80000000)
1737                      || (process_lower && lower == 0x80000000))
1738               val |= (j == 0 ? 0xe0 : 0x80);
1739           } else
1740             val |= i * 4 + j + ((i & 1) * 16);
1741         }
1742
1743         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1744       }
1745
1746       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1747                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1748                                      &ShufBytes[0], ShufBytes.size()));
1749     }
1750   }
1751   }
1752
1753   return SDValue();
1754 }
1755
1756 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1757 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1758 /// permutation vector, V3, is monotonically increasing with one "exception"
1759 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1760 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1761 /// In either case, the net result is going to eventually invoke SHUFB to
1762 /// permute/shuffle the bytes from V1 and V2.
1763 /// \note
1764 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1765 /// control word for byte/halfword/word insertion. This takes care of a single
1766 /// element move from V2 into V1.
1767 /// \note
1768 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1769 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1770   SDValue V1 = Op.getOperand(0);
1771   SDValue V2 = Op.getOperand(1);
1772   SDValue PermMask = Op.getOperand(2);
1773
1774   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1775
1776   // If we have a single element being moved from V1 to V2, this can be handled
1777   // using the C*[DX] compute mask instructions, but the vector elements have
1778   // to be monotonically increasing with one exception element.
1779   MVT EltVT = V1.getValueType().getVectorElementType();
1780   unsigned EltsFromV2 = 0;
1781   unsigned V2Elt = 0;
1782   unsigned V2EltIdx0 = 0;
1783   unsigned CurrElt = 0;
1784   bool monotonic = true;
1785   if (EltVT == MVT::i8)
1786     V2EltIdx0 = 16;
1787   else if (EltVT == MVT::i16)
1788     V2EltIdx0 = 8;
1789   else if (EltVT == MVT::i32)
1790     V2EltIdx0 = 4;
1791   else
1792     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1793
1794   for (unsigned i = 0, e = PermMask.getNumOperands();
1795        EltsFromV2 <= 1 && monotonic && i != e;
1796        ++i) {
1797     unsigned SrcElt;
1798     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1799       SrcElt = 0;
1800     else
1801       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1802
1803     if (SrcElt >= V2EltIdx0) {
1804       ++EltsFromV2;
1805       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1806     } else if (CurrElt != SrcElt) {
1807       monotonic = false;
1808     }
1809
1810     ++CurrElt;
1811   }
1812
1813   if (EltsFromV2 == 1 && monotonic) {
1814     // Compute mask and shuffle
1815     MachineFunction &MF = DAG.getMachineFunction();
1816     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819     // Initialize temporary register to 0
1820     SDValue InitTempReg =
1821       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822     // Copy register's contents as index in INSERT_MASK:
1823     SDValue ShufMaskOp =
1824       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1825                   DAG.getTargetConstant(V2Elt, MVT::i32),
1826                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827     // Use shuffle mask in SHUFB synthetic instruction:
1828     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1829   } else {
1830    // Convert the SHUFFLE_VECTOR mask's input element units to the
1831    // actual bytes.
1832     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1833
1834     SmallVector<SDValue, 16> ResultMask;
1835     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1836       unsigned SrcElt;
1837       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1838         SrcElt = 0;
1839       else
1840         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1841
1842       for (unsigned j = 0; j < BytesPerElement; ++j) {
1843         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1844                                              MVT::i8));
1845       }
1846     }
1847
1848     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1849                                       &ResultMask[0], ResultMask.size());
1850     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1851   }
1852 }
1853
1854 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1855   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1856
1857   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1858     // For a constant, build the appropriate constant vector, which will
1859     // eventually simplify to a vector register load.
1860
1861     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1862     SmallVector<SDValue, 16> ConstVecValues;
1863     MVT VT;
1864     size_t n_copies;
1865
1866     // Create a constant vector:
1867     switch (Op.getValueType().getSimpleVT()) {
1868     default: assert(0 && "Unexpected constant value type in "
1869                          "LowerSCALAR_TO_VECTOR");
1870     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1871     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1872     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1873     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1874     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1875     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1876     }
1877
1878     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1879     for (size_t j = 0; j < n_copies; ++j)
1880       ConstVecValues.push_back(CValue);
1881
1882     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1883                        &ConstVecValues[0], ConstVecValues.size());
1884   } else {
1885     // Otherwise, copy the value from one register to another:
1886     switch (Op0.getValueType().getSimpleVT()) {
1887     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1888     case MVT::i8:
1889     case MVT::i16:
1890     case MVT::i32:
1891     case MVT::i64:
1892     case MVT::f32:
1893     case MVT::f64:
1894       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1895     }
1896   }
1897
1898   return SDValue();
1899 }
1900
1901 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1902   switch (Op.getValueType().getSimpleVT()) {
1903   default:
1904     cerr << "CellSPU: Unknown vector multiplication, got "
1905          << Op.getValueType().getMVTString()
1906          << "\n";
1907     abort();
1908     /*NOTREACHED*/
1909
1910   case MVT::v4i32: {
1911     SDValue rA = Op.getOperand(0);
1912     SDValue rB = Op.getOperand(1);
1913     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1914     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1915     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1916     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1917
1918     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1919     break;
1920   }
1921
1922   // Multiply two v8i16 vectors (pipeline friendly version):
1923   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1924   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1925   // c) Use SELB to select upper and lower halves from the intermediate results
1926   //
1927   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1928   // dual-issue. This code does manage to do this, even if it's a little on
1929   // the wacky side
1930   case MVT::v8i16: {
1931     MachineFunction &MF = DAG.getMachineFunction();
1932     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1933     SDValue Chain = Op.getOperand(0);
1934     SDValue rA = Op.getOperand(0);
1935     SDValue rB = Op.getOperand(1);
1936     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1938
1939     SDValue FSMBOp =
1940       DAG.getCopyToReg(Chain, FSMBIreg,
1941                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1942                                    DAG.getConstant(0xcccc, MVT::i16)));
1943
1944     SDValue HHProd =
1945       DAG.getCopyToReg(FSMBOp, HiProdReg,
1946                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1947
1948     SDValue HHProd_v4i32 =
1949       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1951
1952     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1953                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1954                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1955                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1956                                                HHProd_v4i32,
1957                                                DAG.getConstant(16, MVT::i16))),
1958                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1959   }
1960
1961   // This M00sE is N@stI! (apologies to Monty Python)
1962   //
1963   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1964   // is to break it all apart, sign extend, and reassemble the various
1965   // intermediate products.
1966   case MVT::v16i8: {
1967     SDValue rA = Op.getOperand(0);
1968     SDValue rB = Op.getOperand(1);
1969     SDValue c8 = DAG.getConstant(8, MVT::i32);
1970     SDValue c16 = DAG.getConstant(16, MVT::i32);
1971
1972     SDValue LLProd =
1973       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1974                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1975                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1976
1977     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1978
1979     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1980
1981     SDValue LHProd =
1982       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1983                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1984
1985     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1986                                      DAG.getConstant(0x2222, MVT::i16));
1987
1988     SDValue LoProdParts =
1989       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1990                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1991                               LLProd, LHProd, FSMBmask));
1992
1993     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1994
1995     SDValue LoProd =
1996       DAG.getNode(ISD::AND, MVT::v4i32,
1997                   LoProdParts,
1998                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999                               LoProdMask, LoProdMask,
2000                               LoProdMask, LoProdMask));
2001
2002     SDValue rAH =
2003       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2004                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2005
2006     SDValue rBH =
2007       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2008                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2009
2010     SDValue HLProd =
2011       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2012                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2013                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2014
2015     SDValue HHProd_1 =
2016       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2017                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2018                               DAG.getNode(SPUISD::VEC_SRA,
2019                                           MVT::v4i32, rAH, c8)),
2020                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2021                               DAG.getNode(SPUISD::VEC_SRA,
2022                                           MVT::v4i32, rBH, c8)));
2023
2024     SDValue HHProd =
2025       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2026                   HLProd,
2027                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2028                   FSMBmask);
2029
2030     SDValue HiProd =
2031       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2032
2033     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2034                        DAG.getNode(ISD::OR, MVT::v4i32,
2035                                    LoProd, HiProd));
2036   }
2037   }
2038
2039   return SDValue();
2040 }
2041
2042 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2043   MachineFunction &MF = DAG.getMachineFunction();
2044   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2045
2046   SDValue A = Op.getOperand(0);
2047   SDValue B = Op.getOperand(1);
2048   MVT VT = Op.getValueType();
2049
2050   unsigned VRegBR, VRegC;
2051
2052   if (VT == MVT::f32) {
2053     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2054     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2055   } else {
2056     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2057     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2058   }
2059   // TODO: make sure we're feeding FPInterp the right arguments
2060   // Right now: fi B, frest(B)
2061
2062   // Computes BRcpl =
2063   // (Floating Interpolate (FP Reciprocal Estimate B))
2064   SDValue BRcpl =
2065       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2066                        DAG.getNode(SPUISD::FPInterp, VT, B,
2067                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2068
2069   // Computes A * BRcpl and stores in a temporary register
2070   SDValue AxBRcpl =
2071       DAG.getCopyToReg(BRcpl, VRegC,
2072                  DAG.getNode(ISD::FMUL, VT, A,
2073                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2074   // What's the Chain variable do? It's magic!
2075   // TODO: set Chain = Op(0).getEntryNode()
2076
2077   return DAG.getNode(ISD::FADD, VT,
2078                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2079                 DAG.getNode(ISD::FMUL, VT,
2080                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2081                         DAG.getNode(ISD::FSUB, VT, A,
2082                             DAG.getNode(ISD::FMUL, VT, B,
2083                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2084 }
2085
2086 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2087   MVT VT = Op.getValueType();
2088   SDValue N = Op.getOperand(0);
2089   SDValue Elt = Op.getOperand(1);
2090   SDValue ShufMask[16];
2091   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2092
2093   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2094
2095   int EltNo = (int) C->getZExtValue();
2096
2097   // sanity checks:
2098   if (VT == MVT::i8 && EltNo >= 16)
2099     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2100   else if (VT == MVT::i16 && EltNo >= 8)
2101     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2102   else if (VT == MVT::i32 && EltNo >= 4)
2103     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2104   else if (VT == MVT::i64 && EltNo >= 2)
2105     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2106
2107   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2108     // i32 and i64: Element 0 is the preferred slot
2109     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2110   }
2111
2112   // Need to generate shuffle mask and extract:
2113   int prefslot_begin = -1, prefslot_end = -1;
2114   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2115
2116   switch (VT.getSimpleVT()) {
2117   default:
2118     assert(false && "Invalid value type!");
2119   case MVT::i8: {
2120     prefslot_begin = prefslot_end = 3;
2121     break;
2122   }
2123   case MVT::i16: {
2124     prefslot_begin = 2; prefslot_end = 3;
2125     break;
2126   }
2127   case MVT::i32: {
2128     prefslot_begin = 0; prefslot_end = 3;
2129     break;
2130   }
2131   case MVT::i64: {
2132     prefslot_begin = 0; prefslot_end = 7;
2133     break;
2134   }
2135   }
2136
2137   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2138          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2139
2140   for (int i = 0; i < 16; ++i) {
2141     // zero fill uppper part of preferred slot, don't care about the
2142     // other slots:
2143     unsigned int mask_val;
2144
2145     if (i <= prefslot_end) {
2146       mask_val =
2147         ((i < prefslot_begin)
2148          ? 0x80
2149          : elt_byte + (i - prefslot_begin));
2150
2151       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2152     } else
2153       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2154   }
2155
2156   SDValue ShufMaskVec =
2157     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2158                 &ShufMask[0],
2159                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2160
2161   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2162                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2163                                  N, N, ShufMaskVec));
2164
2165 }
2166
2167 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2168   SDValue VecOp = Op.getOperand(0);
2169   SDValue ValOp = Op.getOperand(1);
2170   SDValue IdxOp = Op.getOperand(2);
2171   MVT VT = Op.getValueType();
2172
2173   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2174   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2175
2176   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2177   // Use $2 because it's always 16-byte aligned and it's available:
2178   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2179
2180   SDValue result =
2181     DAG.getNode(SPUISD::SHUFB, VT,
2182                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2183                 VecOp,
2184                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2185                             DAG.getNode(ISD::ADD, PtrVT,
2186                                         PtrBase,
2187                                         DAG.getConstant(CN->getZExtValue(),
2188                                                         PtrVT))));
2189
2190   return result;
2191 }
2192
2193 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2194 {
2195   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2196
2197   assert(Op.getValueType() == MVT::i8);
2198   switch (Opc) {
2199   default:
2200     assert(0 && "Unhandled i8 math operator");
2201     /*NOTREACHED*/
2202     break;
2203   case ISD::SUB: {
2204     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2205     // the result:
2206     SDValue N1 = Op.getOperand(1);
2207     N0 = (N0.getOpcode() != ISD::Constant
2208           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2209           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2210                             MVT::i16));
2211     N1 = (N1.getOpcode() != ISD::Constant
2212           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2213           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2214                             MVT::i16));
2215     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2216                        DAG.getNode(Opc, MVT::i16, N0, N1));
2217   }
2218   case ISD::ROTR:
2219   case ISD::ROTL: {
2220     SDValue N1 = Op.getOperand(1);
2221     unsigned N1Opc;
2222     N0 = (N0.getOpcode() != ISD::Constant
2223           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2224           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2225                             MVT::i16));
2226     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2227             ? ISD::ZERO_EXTEND
2228             : ISD::TRUNCATE;
2229     N1 = (N1.getOpcode() != ISD::Constant
2230           ? DAG.getNode(N1Opc, MVT::i16, N1)
2231           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2232                             MVT::i16));
2233     SDValue ExpandArg =
2234       DAG.getNode(ISD::OR, MVT::i16, N0,
2235                   DAG.getNode(ISD::SHL, MVT::i16,
2236                               N0, DAG.getConstant(8, MVT::i16)));
2237     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2239   }
2240   case ISD::SRL:
2241   case ISD::SHL: {
2242     SDValue N1 = Op.getOperand(1);
2243     unsigned N1Opc;
2244     N0 = (N0.getOpcode() != ISD::Constant
2245           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2246           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2247                             MVT::i16));
2248     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2249             ? ISD::ZERO_EXTEND
2250             : ISD::TRUNCATE;
2251     N1 = (N1.getOpcode() != ISD::Constant
2252           ? DAG.getNode(N1Opc, MVT::i16, N1)
2253           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2254                             MVT::i16));
2255     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256                        DAG.getNode(Opc, MVT::i16, N0, N1));
2257   }
2258   case ISD::SRA: {
2259     SDValue N1 = Op.getOperand(1);
2260     unsigned N1Opc;
2261     N0 = (N0.getOpcode() != ISD::Constant
2262           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2263           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2264                             MVT::i16));
2265     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2266             ? ISD::SIGN_EXTEND
2267             : ISD::TRUNCATE;
2268     N1 = (N1.getOpcode() != ISD::Constant
2269           ? DAG.getNode(N1Opc, MVT::i16, N1)
2270           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2271                             MVT::i16));
2272     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2273                        DAG.getNode(Opc, MVT::i16, N0, N1));
2274   }
2275   case ISD::MUL: {
2276     SDValue N1 = Op.getOperand(1);
2277     unsigned N1Opc;
2278     N0 = (N0.getOpcode() != ISD::Constant
2279           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2280           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2281                             MVT::i16));
2282     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2283     N1 = (N1.getOpcode() != ISD::Constant
2284           ? DAG.getNode(N1Opc, MVT::i16, N1)
2285           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2286                             MVT::i16));
2287     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2288                        DAG.getNode(Opc, MVT::i16, N0, N1));
2289     break;
2290   }
2291   }
2292
2293   return SDValue();
2294 }
2295
2296 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2297 {
2298   MVT VT = Op.getValueType();
2299   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2300
2301   SDValue Op0 = Op.getOperand(0);
2302
2303   switch (Opc) {
2304   case ISD::ZERO_EXTEND:
2305   case ISD::SIGN_EXTEND:
2306   case ISD::ANY_EXTEND: {
2307     MVT Op0VT = Op0.getValueType();
2308     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2309
2310     assert(Op0VT == MVT::i32
2311            && "CellSPU: Zero/sign extending something other than i32");
2312     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2313
2314     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2315                       ? SPUISD::ROTBYTES_RIGHT_S
2316                       : SPUISD::ROTQUAD_RZ_BYTES);
2317     SDValue PromoteScalar =
2318       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2319
2320     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2321                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2322                                    DAG.getNode(NewOpc, Op0VecVT,
2323                                                PromoteScalar,
2324                                                DAG.getConstant(4, MVT::i32))));
2325   }
2326
2327   case ISD::ADD: {
2328     // Turn operands into vectors to satisfy type checking (shufb works on
2329     // vectors)
2330     SDValue Op0 =
2331       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2332     SDValue Op1 =
2333       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2334     SmallVector<SDValue, 16> ShufBytes;
2335
2336     // Create the shuffle mask for "rotating" the borrow up one register slot
2337     // once the borrow is generated.
2338     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2339     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2340     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2341     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2342
2343     SDValue CarryGen =
2344       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2345     SDValue ShiftedCarry =
2346       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2347                   CarryGen, CarryGen,
2348                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2349                               &ShufBytes[0], ShufBytes.size()));
2350
2351     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2352                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2353                                    Op0, Op1, ShiftedCarry));
2354   }
2355
2356   case ISD::SUB: {
2357     // Turn operands into vectors to satisfy type checking (shufb works on
2358     // vectors)
2359     SDValue Op0 =
2360       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2361     SDValue Op1 =
2362       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2363     SmallVector<SDValue, 16> ShufBytes;
2364
2365     // Create the shuffle mask for "rotating" the borrow up one register slot
2366     // once the borrow is generated.
2367     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2368     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2369     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2370     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2371
2372     SDValue BorrowGen =
2373       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2374     SDValue ShiftedBorrow =
2375       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2376                   BorrowGen, BorrowGen,
2377                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2378                               &ShufBytes[0], ShufBytes.size()));
2379
2380     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2381                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2382                                    Op0, Op1, ShiftedBorrow));
2383   }
2384
2385   case ISD::SHL: {
2386     SDValue ShiftAmt = Op.getOperand(1);
2387     MVT ShiftAmtVT = ShiftAmt.getValueType();
2388     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2389     SDValue MaskLower =
2390       DAG.getNode(SPUISD::SELB, VecVT,
2391                   Op0Vec,
2392                   DAG.getConstant(0, VecVT),
2393                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2394                               DAG.getConstant(0xff00ULL, MVT::i16)));
2395     SDValue ShiftAmtBytes =
2396       DAG.getNode(ISD::SRL, ShiftAmtVT,
2397                   ShiftAmt,
2398                   DAG.getConstant(3, ShiftAmtVT));
2399     SDValue ShiftAmtBits =
2400       DAG.getNode(ISD::AND, ShiftAmtVT,
2401                   ShiftAmt,
2402                   DAG.getConstant(7, ShiftAmtVT));
2403
2404     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2405                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2406                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2407                                                MaskLower, ShiftAmtBytes),
2408                                    ShiftAmtBits));
2409   }
2410
2411   case ISD::SRL: {
2412     MVT VT = Op.getValueType();
2413     SDValue ShiftAmt = Op.getOperand(1);
2414     MVT ShiftAmtVT = ShiftAmt.getValueType();
2415     SDValue ShiftAmtBytes =
2416       DAG.getNode(ISD::SRL, ShiftAmtVT,
2417                   ShiftAmt,
2418                   DAG.getConstant(3, ShiftAmtVT));
2419     SDValue ShiftAmtBits =
2420       DAG.getNode(ISD::AND, ShiftAmtVT,
2421                   ShiftAmt,
2422                   DAG.getConstant(7, ShiftAmtVT));
2423
2424     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2425                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2426                                    Op0, ShiftAmtBytes),
2427                        ShiftAmtBits);
2428   }
2429
2430   case ISD::SRA: {
2431     // Promote Op0 to vector
2432     SDValue Op0 =
2433       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2434     SDValue ShiftAmt = Op.getOperand(1);
2435     MVT ShiftVT = ShiftAmt.getValueType();
2436
2437     // Negate variable shift amounts
2438     if (!isa<ConstantSDNode>(ShiftAmt)) {
2439       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2440                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2441     }
2442
2443     SDValue UpperHalfSign =
2444       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2445                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2446                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2447                                           Op0, DAG.getConstant(31, MVT::i32))));
2448     SDValue UpperHalfSignMask =
2449       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2450     SDValue UpperLowerMask =
2451       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2452                   DAG.getConstant(0xff00, MVT::i16));
2453     SDValue UpperLowerSelect =
2454       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2455                   UpperHalfSignMask, Op0, UpperLowerMask);
2456     SDValue RotateLeftBytes =
2457       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2458                   UpperLowerSelect, ShiftAmt);
2459     SDValue RotateLeftBits =
2460       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2461                   RotateLeftBytes, ShiftAmt);
2462
2463     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2464                        RotateLeftBits);
2465   }
2466   }
2467
2468   return SDValue();
2469 }
2470
2471 //! Lower byte immediate operations for v16i8 vectors:
2472 static SDValue
2473 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2474   SDValue ConstVec;
2475   SDValue Arg;
2476   MVT VT = Op.getValueType();
2477
2478   ConstVec = Op.getOperand(0);
2479   Arg = Op.getOperand(1);
2480   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2481     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2482       ConstVec = ConstVec.getOperand(0);
2483     } else {
2484       ConstVec = Op.getOperand(1);
2485       Arg = Op.getOperand(0);
2486       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2487         ConstVec = ConstVec.getOperand(0);
2488       }
2489     }
2490   }
2491
2492   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2493     uint64_t VectorBits[2];
2494     uint64_t UndefBits[2];
2495     uint64_t SplatBits, SplatUndef;
2496     int SplatSize;
2497
2498     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2499         && isConstantSplat(VectorBits, UndefBits,
2500                            VT.getVectorElementType().getSizeInBits(),
2501                            SplatBits, SplatUndef, SplatSize)) {
2502       SDValue tcVec[16];
2503       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2504       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2505
2506       // Turn the BUILD_VECTOR into a set of target constants:
2507       for (size_t i = 0; i < tcVecSize; ++i)
2508         tcVec[i] = tc;
2509
2510       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2511                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2512     }
2513   }
2514   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2515   // lowered.  Return the operation, rather than a null SDValue.
2516   return Op;
2517 }
2518
2519 //! Lower i32 multiplication
2520 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2521                           unsigned Opc) {
2522   switch (VT.getSimpleVT()) {
2523   default:
2524     cerr << "CellSPU: Unknown LowerMUL value type, got "
2525          << Op.getValueType().getMVTString()
2526          << "\n";
2527     abort();
2528     /*NOTREACHED*/
2529
2530   case MVT::i32: {
2531     SDValue rA = Op.getOperand(0);
2532     SDValue rB = Op.getOperand(1);
2533
2534     return DAG.getNode(ISD::ADD, MVT::i32,
2535                        DAG.getNode(ISD::ADD, MVT::i32,
2536                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2537                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2538                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2539   }
2540   }
2541
2542   return SDValue();
2543 }
2544
2545 //! Custom lowering for CTPOP (count population)
2546 /*!
2547   Custom lowering code that counts the number ones in the input
2548   operand. SPU has such an instruction, but it counts the number of
2549   ones per byte, which then have to be accumulated.
2550 */
2551 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2552   MVT VT = Op.getValueType();
2553   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2554
2555   switch (VT.getSimpleVT()) {
2556   default:
2557     assert(false && "Invalid value type!");
2558   case MVT::i8: {
2559     SDValue N = Op.getOperand(0);
2560     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2561
2562     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2563     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2564
2565     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2566   }
2567
2568   case MVT::i16: {
2569     MachineFunction &MF = DAG.getMachineFunction();
2570     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2571
2572     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2573
2574     SDValue N = Op.getOperand(0);
2575     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2576     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2577     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2578
2579     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2580     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2581
2582     // CNTB_result becomes the chain to which all of the virtual registers
2583     // CNTB_reg, SUM1_reg become associated:
2584     SDValue CNTB_result =
2585       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2586
2587     SDValue CNTB_rescopy =
2588       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2589
2590     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2591
2592     return DAG.getNode(ISD::AND, MVT::i16,
2593                        DAG.getNode(ISD::ADD, MVT::i16,
2594                                    DAG.getNode(ISD::SRL, MVT::i16,
2595                                                Tmp1, Shift1),
2596                                    Tmp1),
2597                        Mask0);
2598   }
2599
2600   case MVT::i32: {
2601     MachineFunction &MF = DAG.getMachineFunction();
2602     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2603
2604     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2605     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2606
2607     SDValue N = Op.getOperand(0);
2608     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2609     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2610     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2611     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2612
2613     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2614     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2615
2616     // CNTB_result becomes the chain to which all of the virtual registers
2617     // CNTB_reg, SUM1_reg become associated:
2618     SDValue CNTB_result =
2619       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2620
2621     SDValue CNTB_rescopy =
2622       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2623
2624     SDValue Comp1 =
2625       DAG.getNode(ISD::SRL, MVT::i32,
2626                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2627
2628     SDValue Sum1 =
2629       DAG.getNode(ISD::ADD, MVT::i32,
2630                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2631
2632     SDValue Sum1_rescopy =
2633       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2634
2635     SDValue Comp2 =
2636       DAG.getNode(ISD::SRL, MVT::i32,
2637                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2638                   Shift2);
2639     SDValue Sum2 =
2640       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2641                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2642
2643     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2644   }
2645
2646   case MVT::i64:
2647     break;
2648   }
2649
2650   return SDValue();
2651 }
2652
2653 /// LowerOperation - Provide custom lowering hooks for some operations.
2654 ///
2655 SDValue
2656 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2657 {
2658   unsigned Opc = (unsigned) Op.getOpcode();
2659   MVT VT = Op.getValueType();
2660
2661   switch (Opc) {
2662   default: {
2663     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2664     cerr << "Op.getOpcode() = " << Opc << "\n";
2665     cerr << "*Op.getNode():\n";
2666     Op.getNode()->dump();
2667     abort();
2668   }
2669   case ISD::LOAD:
2670   case ISD::SEXTLOAD:
2671   case ISD::ZEXTLOAD:
2672     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2673   case ISD::STORE:
2674     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2675   case ISD::ConstantPool:
2676     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2677   case ISD::GlobalAddress:
2678     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2679   case ISD::JumpTable:
2680     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2681   case ISD::Constant:
2682     return LowerConstant(Op, DAG);
2683   case ISD::ConstantFP:
2684     return LowerConstantFP(Op, DAG);
2685   case ISD::BRCOND:
2686     return LowerBRCOND(Op, DAG);
2687   case ISD::FORMAL_ARGUMENTS:
2688     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2689   case ISD::CALL:
2690     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2691   case ISD::RET:
2692     return LowerRET(Op, DAG, getTargetMachine());
2693
2694
2695   // i8, i64 math ops:
2696   case ISD::ZERO_EXTEND:
2697   case ISD::SIGN_EXTEND:
2698   case ISD::ANY_EXTEND:
2699   case ISD::ADD:
2700   case ISD::SUB:
2701   case ISD::ROTR:
2702   case ISD::ROTL:
2703   case ISD::SRL:
2704   case ISD::SHL:
2705   case ISD::SRA: {
2706     if (VT == MVT::i8)
2707       return LowerI8Math(Op, DAG, Opc);
2708     else if (VT == MVT::i64)
2709       return LowerI64Math(Op, DAG, Opc);
2710     break;
2711   }
2712
2713   // Vector-related lowering.
2714   case ISD::BUILD_VECTOR:
2715     return LowerBUILD_VECTOR(Op, DAG);
2716   case ISD::SCALAR_TO_VECTOR:
2717     return LowerSCALAR_TO_VECTOR(Op, DAG);
2718   case ISD::VECTOR_SHUFFLE:
2719     return LowerVECTOR_SHUFFLE(Op, DAG);
2720   case ISD::EXTRACT_VECTOR_ELT:
2721     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2722   case ISD::INSERT_VECTOR_ELT:
2723     return LowerINSERT_VECTOR_ELT(Op, DAG);
2724
2725   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2726   case ISD::AND:
2727   case ISD::OR:
2728   case ISD::XOR:
2729     return LowerByteImmed(Op, DAG);
2730
2731   // Vector and i8 multiply:
2732   case ISD::MUL:
2733     if (VT.isVector())
2734       return LowerVectorMUL(Op, DAG);
2735     else if (VT == MVT::i8)
2736       return LowerI8Math(Op, DAG, Opc);
2737     else
2738       return LowerMUL(Op, DAG, VT, Opc);
2739
2740   case ISD::FDIV:
2741     if (VT == MVT::f32 || VT == MVT::v4f32)
2742       return LowerFDIVf32(Op, DAG);
2743 //    else if (Op.getValueType() == MVT::f64)
2744 //      return LowerFDIVf64(Op, DAG);
2745     else
2746       assert(0 && "Calling FDIV on unsupported MVT");
2747
2748   case ISD::CTPOP:
2749     return LowerCTPOP(Op, DAG);
2750   }
2751
2752   return SDValue();
2753 }
2754
2755 //===----------------------------------------------------------------------===//
2756 // Target Optimization Hooks
2757 //===----------------------------------------------------------------------===//
2758
2759 SDValue
2760 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2761 {
2762 #if 0
2763   TargetMachine &TM = getTargetMachine();
2764 #endif
2765   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2766   SelectionDAG &DAG = DCI.DAG;
2767   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2768   SDValue Result;                     // Initially, NULL result
2769
2770   switch (N->getOpcode()) {
2771   default: break;
2772   case ISD::ADD: {
2773     SDValue Op1 = N->getOperand(1);
2774
2775     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2776       SDValue Op01 = Op0.getOperand(1);
2777       if (Op01.getOpcode() == ISD::Constant
2778           || Op01.getOpcode() == ISD::TargetConstant) {
2779         // (add <const>, (SPUindirect <arg>, <const>)) ->
2780         // (SPUindirect <arg>, <const + const>)
2781         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2782         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2783         SDValue combinedConst =
2784           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2785                           Op0.getValueType());
2786
2787         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2788                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2789         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2790                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2791         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2792                            Op0.getOperand(0), combinedConst);
2793       }
2794     } else if (isa<ConstantSDNode>(Op0)
2795                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2796       SDValue Op11 = Op1.getOperand(1);
2797       if (Op11.getOpcode() == ISD::Constant
2798           || Op11.getOpcode() == ISD::TargetConstant) {
2799         // (add (SPUindirect <arg>, <const>), <const>) ->
2800         // (SPUindirect <arg>, <const + const>)
2801         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2802         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2803         SDValue combinedConst =
2804           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2805                           Op0.getValueType());
2806
2807         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2808                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2809         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2810                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2811
2812         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2813                            Op1.getOperand(0), combinedConst);
2814       }
2815     }
2816     break;
2817   }
2818   case ISD::SIGN_EXTEND:
2819   case ISD::ZERO_EXTEND:
2820   case ISD::ANY_EXTEND: {
2821     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2822         N->getValueType(0) == Op0.getValueType()) {
2823       // (any_extend (SPUextract_elt0 <arg>)) ->
2824       // (SPUextract_elt0 <arg>)
2825       // Types must match, however...
2826       DEBUG(cerr << "Replace: ");
2827       DEBUG(N->dump(&DAG));
2828       DEBUG(cerr << "\nWith:    ");
2829       DEBUG(Op0.getNode()->dump(&DAG));
2830       DEBUG(cerr << "\n");
2831
2832       return Op0;
2833     }
2834     break;
2835   }
2836   case SPUISD::IndirectAddr: {
2837     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2838       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2839       if (CN->getZExtValue() == 0) {
2840         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2841         // (SPUaform <addr>, 0)
2842
2843         DEBUG(cerr << "Replace: ");
2844         DEBUG(N->dump(&DAG));
2845         DEBUG(cerr << "\nWith:    ");
2846         DEBUG(Op0.getNode()->dump(&DAG));
2847         DEBUG(cerr << "\n");
2848
2849         return Op0;
2850       }
2851     }
2852     break;
2853   }
2854   case SPUISD::SHLQUAD_L_BITS:
2855   case SPUISD::SHLQUAD_L_BYTES:
2856   case SPUISD::VEC_SHL:
2857   case SPUISD::VEC_SRL:
2858   case SPUISD::VEC_SRA:
2859   case SPUISD::ROTQUAD_RZ_BYTES:
2860   case SPUISD::ROTQUAD_RZ_BITS: {
2861     SDValue Op1 = N->getOperand(1);
2862
2863     if (isa<ConstantSDNode>(Op1)) {
2864       // Kill degenerate vector shifts:
2865       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2866
2867       if (CN->getZExtValue() == 0) {
2868         Result = Op0;
2869       }
2870     }
2871     break;
2872   }
2873   case SPUISD::PROMOTE_SCALAR: {
2874     switch (Op0.getOpcode()) {
2875     default:
2876       break;
2877     case ISD::ANY_EXTEND:
2878     case ISD::ZERO_EXTEND:
2879     case ISD::SIGN_EXTEND: {
2880       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2881       // <arg>
2882       // but only if the SPUpromote_scalar and <arg> types match.
2883       SDValue Op00 = Op0.getOperand(0);
2884       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2885         SDValue Op000 = Op00.getOperand(0);
2886         if (Op000.getValueType() == N->getValueType(0)) {
2887           Result = Op000;
2888         }
2889       }
2890       break;
2891     }
2892     case SPUISD::EXTRACT_ELT0: {
2893       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2894       // <arg>
2895       Result = Op0.getOperand(0);
2896       break;
2897     }
2898     }
2899     break;
2900   }
2901   }
2902   // Otherwise, return unchanged.
2903 #if 1
2904   if (Result.getNode()) {
2905     DEBUG(cerr << "\nReplace.SPU: ");
2906     DEBUG(N->dump(&DAG));
2907     DEBUG(cerr << "\nWith:        ");
2908     DEBUG(Result.getNode()->dump(&DAG));
2909     DEBUG(cerr << "\n");
2910   }
2911 #endif
2912
2913   return Result;
2914 }
2915
2916 //===----------------------------------------------------------------------===//
2917 // Inline Assembly Support
2918 //===----------------------------------------------------------------------===//
2919
2920 /// getConstraintType - Given a constraint letter, return the type of
2921 /// constraint it is for this target.
2922 SPUTargetLowering::ConstraintType
2923 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2924   if (ConstraintLetter.size() == 1) {
2925     switch (ConstraintLetter[0]) {
2926     default: break;
2927     case 'b':
2928     case 'r':
2929     case 'f':
2930     case 'v':
2931     case 'y':
2932       return C_RegisterClass;
2933     }
2934   }
2935   return TargetLowering::getConstraintType(ConstraintLetter);
2936 }
2937
2938 std::pair<unsigned, const TargetRegisterClass*>
2939 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2940                                                 MVT VT) const
2941 {
2942   if (Constraint.size() == 1) {
2943     // GCC RS6000 Constraint Letters
2944     switch (Constraint[0]) {
2945     case 'b':   // R1-R31
2946     case 'r':   // R0-R31
2947       if (VT == MVT::i64)
2948         return std::make_pair(0U, SPU::R64CRegisterClass);
2949       return std::make_pair(0U, SPU::R32CRegisterClass);
2950     case 'f':
2951       if (VT == MVT::f32)
2952         return std::make_pair(0U, SPU::R32FPRegisterClass);
2953       else if (VT == MVT::f64)
2954         return std::make_pair(0U, SPU::R64FPRegisterClass);
2955       break;
2956     case 'v':
2957       return std::make_pair(0U, SPU::GPRCRegisterClass);
2958     }
2959   }
2960
2961   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2962 }
2963
2964 //! Compute used/known bits for a SPU operand
2965 void
2966 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2967                                                   const APInt &Mask,
2968                                                   APInt &KnownZero,
2969                                                   APInt &KnownOne,
2970                                                   const SelectionDAG &DAG,
2971                                                   unsigned Depth ) const {
2972 #if 0
2973   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2974 #endif
2975
2976   switch (Op.getOpcode()) {
2977   default:
2978     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2979     break;
2980
2981 #if 0
2982   case CALL:
2983   case SHUFB:
2984   case INSERT_MASK:
2985   case CNTB:
2986 #endif
2987
2988   case SPUISD::PROMOTE_SCALAR: {
2989     SDValue Op0 = Op.getOperand(0);
2990     MVT Op0VT = Op0.getValueType();
2991     unsigned Op0VTBits = Op0VT.getSizeInBits();
2992     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2993     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2994     KnownOne |= APInt(Op0VTBits, InMask, false);
2995     break;
2996   }
2997
2998   case SPUISD::LDRESULT:
2999   case SPUISD::EXTRACT_ELT0:
3000   case SPUISD::EXTRACT_ELT0_CHAINED: {
3001     MVT OpVT = Op.getValueType();
3002     unsigned OpVTBits = OpVT.getSizeInBits();
3003     uint64_t InMask = OpVT.getIntegerVTBitMask();
3004     KnownZero |= APInt(OpVTBits, ~InMask, false);
3005     KnownOne |= APInt(OpVTBits, InMask, false);
3006     break;
3007   }
3008
3009 #if 0
3010   case EXTRACT_I1_ZEXT:
3011   case EXTRACT_I1_SEXT:
3012   case EXTRACT_I8_ZEXT:
3013   case EXTRACT_I8_SEXT:
3014   case MPY:
3015   case MPYU:
3016   case MPYH:
3017   case MPYHH:
3018   case SPUISD::SHLQUAD_L_BITS:
3019   case SPUISD::SHLQUAD_L_BYTES:
3020   case SPUISD::VEC_SHL:
3021   case SPUISD::VEC_SRL:
3022   case SPUISD::VEC_SRA:
3023   case SPUISD::VEC_ROTL:
3024   case SPUISD::VEC_ROTR:
3025   case SPUISD::ROTQUAD_RZ_BYTES:
3026   case SPUISD::ROTQUAD_RZ_BITS:
3027   case SPUISD::ROTBYTES_RIGHT_S:
3028   case SPUISD::ROTBYTES_LEFT:
3029   case SPUISD::ROTBYTES_LEFT_CHAINED:
3030   case SPUISD::SELECT_MASK:
3031   case SPUISD::SELB:
3032   case SPUISD::FPInterp:
3033   case SPUISD::FPRecipEst:
3034   case SPUISD::SEXT32TO64:
3035 #endif
3036   }
3037 }
3038
3039 // LowerAsmOperandForConstraint
3040 void
3041 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3042                                                 char ConstraintLetter,
3043                                                 bool hasMemory,
3044                                                 std::vector<SDValue> &Ops,
3045                                                 SelectionDAG &DAG) const {
3046   // Default, for the time being, to the base class handler
3047   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3048                                                Ops, DAG);
3049 }
3050
3051 /// isLegalAddressImmediate - Return true if the integer value can be used
3052 /// as the offset of the target addressing mode.
3053 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3054                                                 const Type *Ty) const {
3055   // SPU's addresses are 256K:
3056   return (V > -(1 << 18) && V < (1 << 18) - 1);
3057 }
3058
3059 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3060   return false;
3061 }