lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/ADT/VectorExtras.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << VT.getMVTString()
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDValue &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDValue &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 126   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 127   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 128   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 129   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 130   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 131   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 132
 133   // SPU has no sign or zero extended loads for i1, i8, i16:
 134   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 135   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 136   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 137   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 138   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 139   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 140   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 142
 143   setLoadExtAction(ISD::EXTLOAD,  MVT::i8, Custom);
 144   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 145   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 146   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 147   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 148   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 151
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::i16, Custom);
 153   setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 154   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 155
 156   // SPU constant load actions are custom lowered:
 157   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 158   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 159   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 160
 161   // SPU's loads and stores have to be custom lowered:
 162   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 163        ++sctype) {
 164     MVT VT = (MVT::SimpleValueType)sctype;
 165
 166     setOperationAction(ISD::LOAD, VT, Custom);
 167     setOperationAction(ISD::STORE, VT, Custom);
 168   }
 169
 170   // Custom lower BRCOND for i1, i8 to "promote" the result to
 171   // i32 and i16, respectively.
 172   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 173
 174   // Expand the jumptable branches
 175   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 176   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 177   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 178
 179   // SPU has no intrinsics for these particular operations:
 180   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 181
 182   // PowerPC has no SREM/UREM instructions
 183   setOperationAction(ISD::SREM, MVT::i32, Expand);
 184   setOperationAction(ISD::UREM, MVT::i32, Expand);
 185   setOperationAction(ISD::SREM, MVT::i64, Expand);
 186   setOperationAction(ISD::UREM, MVT::i64, Expand);
 187
 188   // We don't support sin/cos/sqrt/fmod
 189   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 190   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 191   setOperationAction(ISD::FREM , MVT::f64, Expand);
 192   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 194   setOperationAction(ISD::FREM , MVT::f32, Expand);
 195
 196   // If we're enabling GP optimizations, use hardware square root
 197   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 198   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 199
 200   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 201   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 202
 203   // SPU can do rotate right and left, so legalize it... but customize for i8
 204   // because instructions don't exist.
 205
 206   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 207   //        .td files.
 208   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 209   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 210   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 211
 212   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 213   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 214   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 215   // SPU has no native version of shift left/right for i8
 216   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 217   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 218   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 219   // And SPU needs custom lowering for shift left/right for i64
 220   setOperationAction(ISD::SHL,  MVT::i64,    Custom);
 221   setOperationAction(ISD::SRL,  MVT::i64,    Custom);
 222   setOperationAction(ISD::SRA,  MVT::i64,    Custom);
 223
 224   // Custom lower i8, i32 and i64 multiplications
 225   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 226   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 227   setOperationAction(ISD::MUL,  MVT::i64,    Custom);
 228
 229   // Need to custom handle (some) common i8, i64 math ops
 230   setOperationAction(ISD::ADD,  MVT::i64,    Custom);
 231   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 232   setOperationAction(ISD::SUB,  MVT::i64,    Custom);
 233
 234   // SPU does not have BSWAP. It does have i32 support CTLZ.
 235   // CTPOP has to be custom lowered.
 236   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 237   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 238
 239   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 240   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 241   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 242   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 243
 244   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 245   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 246
 247   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 248
 249   // SPU has a version of select that implements (a&~c)|(b&c), just like
 250   // select ought to work:
 251   setOperationAction(ISD::SELECT, MVT::i1,   Promote);
 252   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 253   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 254   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 255   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 256
 257   setOperationAction(ISD::SETCC, MVT::i1,    Promote);
 258   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 259   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 260   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 261   setOperationAction(ISD::SETCC, MVT::i64,   Expand);
 262
 263   // Zero extension and sign extension for i64 have to be
 264   // custom legalized
 265   setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
 266   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 267   setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
 268
 269   // SPU has a legal FP -> signed INT instruction
 270   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 271   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 272   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 273   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 274
 275   // FDIV on SPU requires custom lowering
 276   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 277   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 278
 279   // SPU has [U|S]INT_TO_FP
 280   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 281   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 282   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 283   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 284   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 285   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 286   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 287   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 288
 289   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 290   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 291   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 292   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 293
 294   // We cannot sextinreg(i1).  Expand to shifts.
 295   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 296
 297   // Support label based line numbers.
 298   setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
 299   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 300
 301   // We want to legalize GlobalAddress and ConstantPool nodes into the
 302   // appropriate instructions to materialize the address.
 303   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 304        ++sctype) {
 305     MVT VT = (MVT::SimpleValueType)sctype;
 306
 307     setOperationAction(ISD::GlobalAddress, VT, Custom);
 308     setOperationAction(ISD::ConstantPool,  VT, Custom);
 309     setOperationAction(ISD::JumpTable,     VT, Custom);
 310   }
 311
 312   // RET must be custom lowered, to meet ABI requirements
 313   setOperationAction(ISD::RET,           MVT::Other, Custom);
 314
 315   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 316   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 317
 318   // Use the default implementation.
 319   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 320   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 321   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 322   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 323   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 324   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 325   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 326
 327   // Cell SPU has instructions for converting between i64 and fp.
 328   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 329   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 330
 331   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 332   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 333
 334   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 335   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 336
 337   // First set operation action for all vector types to expand. Then we
 338   // will selectively turn on ones that can be effectively codegen'd.
 339   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 340   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 341   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 342   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 343   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 344   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 345
 346   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 347        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 348     MVT VT = (MVT::SimpleValueType)i;
 349
 350     // add/sub are legal for all supported vector VT's.
 351     setOperationAction(ISD::ADD , VT, Legal);
 352     setOperationAction(ISD::SUB , VT, Legal);
 353     // mul has to be custom lowered.
 354     setOperationAction(ISD::MUL , VT, Custom);
 355
 356     setOperationAction(ISD::AND   , VT, Legal);
 357     setOperationAction(ISD::OR    , VT, Legal);
 358     setOperationAction(ISD::XOR   , VT, Legal);
 359     setOperationAction(ISD::LOAD  , VT, Legal);
 360     setOperationAction(ISD::SELECT, VT, Legal);
 361     setOperationAction(ISD::STORE,  VT, Legal);
 362
 363     // These operations need to be expanded:
 364     setOperationAction(ISD::SDIV, VT, Expand);
 365     setOperationAction(ISD::SREM, VT, Expand);
 366     setOperationAction(ISD::UDIV, VT, Expand);
 367     setOperationAction(ISD::UREM, VT, Expand);
 368     setOperationAction(ISD::FDIV, VT, Custom);
 369
 370     // Custom lower build_vector, constant pool spills, insert and
 371     // extract vector elements:
 372     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 373     setOperationAction(ISD::ConstantPool, VT, Custom);
 374     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 375     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 376     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 377     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 378   }
 379
 380   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 381   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 382   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 383   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 384   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 385
 386   setShiftAmountType(MVT::i32);
 387   setSetCCResultContents(ZeroOrOneSetCCResult);
 388
 389   setStackPointerRegisterToSaveRestore(SPU::R1);
 390
 391   // We have target-specific dag combine patterns for the following nodes:
 392   setTargetDAGCombine(ISD::ADD);
 393   setTargetDAGCombine(ISD::ZERO_EXTEND);
 394   setTargetDAGCombine(ISD::SIGN_EXTEND);
 395   setTargetDAGCombine(ISD::ANY_EXTEND);
 396
 397   computeRegisterProperties();
 398 }
 399
 400 const char *
 401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 402 {
 403   if (node_names.empty()) {
 404     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 405     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 406     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 407     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 408     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 409     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 410     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 411     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 412     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 413     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 414     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 415     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 416     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 417     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
 418                                               = "SPUISD::EXTRACT_ELT0_CHAINED";
 419     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 420     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 421     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 422     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 423     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 424     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 425     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 426     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 427     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 428     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 429     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 430     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 431     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 432     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 433     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 434     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
 435       "SPUISD::ROTQUAD_RZ_BYTES";
 436     node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
 437       "SPUISD::ROTQUAD_RZ_BITS";
 438     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 439       "SPUISD::ROTBYTES_RIGHT_S";
 440     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 441     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 442       "SPUISD::ROTBYTES_LEFT_CHAINED";
 443     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 444       "SPUISD::ROTBYTES_LEFT_BITS";
 445     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 446     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 447     node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
 448     node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
 449     node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
 450     node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
 451     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 452     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 453     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 454   }
 455
 456   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 457
 458   return ((i != node_names.end()) ? i->second : 0);
 459 }
 460
 461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
 462   MVT VT = Op.getValueType();
 463   if (VT.isInteger())
 464     return VT;
 465   else
 466     return MVT::i32;
 467 }
 468
 469 //===----------------------------------------------------------------------===//
 470 // Calling convention code:
 471 //===----------------------------------------------------------------------===//
 472
 473 #include "SPUGenCallingConv.inc"
 474
 475 //===----------------------------------------------------------------------===//
 476 //  LowerOperation implementation
 477 //===----------------------------------------------------------------------===//
 478
 479 /// Aligned load common code for CellSPU
 480 /*!
 481   \param[in] Op The SelectionDAG load or store operand
 482   \param[in] DAG The selection DAG
 483   \param[in] ST CellSPU subtarget information structure
 484   \param[in,out] alignment Caller initializes this to the load or store node's
 485   value from getAlignment(), may be updated while generating the aligned load
 486   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 487   offset (divisible by 16, modulo 16 == 0)
 488   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 489   offset of the preferred slot (modulo 16 != 0)
 490   \param[in,out] VT Caller initializes this value type to the the load or store
 491   node's loaded or stored value type; may be updated if an i1-extended load or
 492   store.
 493   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 494   otherwise false. Can help to determine if the chunk needs to be rotated.
 495
 496  Both load and store lowering load a block of data aligned on a 16-byte
 497  boundary. This is the common aligned load code shared between both.
 498  */
 499 static SDValue
 500 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 501             LSBaseSDNode *LSN,
 502             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 503             MVT &VT, bool &was16aligned)
 504 {
 505   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 506   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 507   SDValue basePtr = LSN->getBasePtr();
 508   SDValue chain = LSN->getChain();
 509
 510   if (basePtr.getOpcode() == ISD::ADD) {
 511     SDValue Op1 = basePtr.getNode()->getOperand(1);
 512
 513     if (Op1.getOpcode() == ISD::Constant
 514         || Op1.getOpcode() == ISD::TargetConstant) {
 515       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 516
 517       alignOffs = (int) CN->getZExtValue();
 518       prefSlotOffs = (int) (alignOffs & 0xf);
 519
 520       // Adjust the rotation amount to ensure that the final result ends up in
 521       // the preferred slot:
 522       prefSlotOffs -= vtm->prefslot_byte;
 523       basePtr = basePtr.getOperand(0);
 524
 525       // Loading from memory, can we adjust alignment?
 526       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 527         SDValue APtr = basePtr.getOperand(0);
 528         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 529           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 530           alignment = GSDN->getGlobal()->getAlignment();
 531         }
 532       }
 533     } else {
 534       alignOffs = 0;
 535       prefSlotOffs = -vtm->prefslot_byte;
 536     }
 537   } else if (basePtr.getOpcode() == ISD::FrameIndex) {
 538     FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
 539     alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
 540     prefSlotOffs = (int) (alignOffs & 0xf);
 541     prefSlotOffs -= vtm->prefslot_byte;
 542     basePtr = DAG.getRegister(SPU::R1, VT);
 543   } else {
 544     alignOffs = 0;
 545     prefSlotOffs = -vtm->prefslot_byte;
 546   }
 547
 548   if (alignment == 16) {
 549     // Realign the base pointer as a D-Form address:
 550     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 551       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 552                             basePtr,
 553                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 554     }
 555
 556     // Emit the vector load:
 557     was16aligned = true;
 558     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 559                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 560                        LSN->isVolatile(), 16);
 561   }
 562
 563   // Unaligned load or we're using the "large memory" model, which means that
 564   // we have to be very pessimistic:
 565   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 566     basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
 567                           DAG.getConstant(0, PtrVT));
 568   }
 569
 570   // Add the offset
 571   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
 572                         DAG.getConstant((alignOffs & ~0xf), PtrVT));
 573   was16aligned = false;
 574   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 575                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 576                      LSN->isVolatile(), 16);
 577 }
 578
 579 /// Custom lower loads for CellSPU
 580 /*!
 581  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 582  within a 16-byte block, we have to rotate to extract the requested element.
 583  */
 584 static SDValue
 585 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 586   LoadSDNode *LN = cast<LoadSDNode>(Op);
 587   SDValue the_chain = LN->getChain();
 588   MVT VT = LN->getMemoryVT();
 589   MVT OpVT = Op.getNode()->getValueType(0);
 590   ISD::LoadExtType ExtType = LN->getExtensionType();
 591   unsigned alignment = LN->getAlignment();
 592   SDValue Ops[8];
 593
 594   switch (LN->getAddressingMode()) {
 595   case ISD::UNINDEXED: {
 596     int offset, rotamt;
 597     bool was16aligned;
 598     SDValue result =
 599       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 600
 601     if (result.getNode() == 0)
 602       return result;
 603
 604     the_chain = result.getValue(1);
 605     // Rotate the chunk if necessary
 606     if (rotamt < 0)
 607       rotamt += 16;
 608     if (rotamt != 0 || !was16aligned) {
 609       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 610
 611       Ops[0] = the_chain;
 612       Ops[1] = result;
 613       if (was16aligned) {
 614         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 615       } else {
 616         MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 617         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 618         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 619                              DAG.getConstant(rotamt, PtrVT));
 620       }
 621
 622       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 623       the_chain = result.getValue(1);
 624     }
 625
 626     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 627       SDVTList scalarvts;
 628       MVT vecVT = MVT::v16i8;
 629
 630       // Convert the loaded v16i8 vector to the appropriate vector type
 631       // specified by the operand:
 632       if (OpVT == VT) {
 633         if (VT != MVT::i1)
 634           vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 635       } else
 636         vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
 637
 638       Ops[0] = the_chain;
 639       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 640       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 641       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 642       the_chain = result.getValue(1);
 643     } else {
 644       // Handle the sign and zero-extending loads for i1 and i8:
 645       unsigned NewOpC;
 646
 647       if (ExtType == ISD::SEXTLOAD) {
 648         NewOpC = (OpVT == MVT::i1
 649                   ? SPUISD::EXTRACT_I1_SEXT
 650                   : SPUISD::EXTRACT_I8_SEXT);
 651       } else {
 652         assert(ExtType == ISD::ZEXTLOAD);
 653         NewOpC = (OpVT == MVT::i1
 654                   ? SPUISD::EXTRACT_I1_ZEXT
 655                   : SPUISD::EXTRACT_I8_ZEXT);
 656       }
 657
 658       result = DAG.getNode(NewOpC, OpVT, result);
 659     }
 660
 661     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 662     SDValue retops[2] = {
 663       result,
 664       the_chain
 665     };
 666
 667     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 668                          retops, sizeof(retops) / sizeof(retops[0]));
 669     return result;
 670   }
 671   case ISD::PRE_INC:
 672   case ISD::PRE_DEC:
 673   case ISD::POST_INC:
 674   case ISD::POST_DEC:
 675   case ISD::LAST_INDEXED_MODE:
 676     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 677             "UNINDEXED\n";
 678     cerr << (unsigned) LN->getAddressingMode() << "\n";
 679     abort();
 680     /*NOTREACHED*/
 681   }
 682
 683   return SDValue();
 684 }
 685
 686 /// Custom lower stores for CellSPU
 687 /*!
 688  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 689  within a 16-byte block, we have to generate a shuffle to insert the
 690  requested element into its place, then store the resulting block.
 691  */
 692 static SDValue
 693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 694   StoreSDNode *SN = cast<StoreSDNode>(Op);
 695   SDValue Value = SN->getValue();
 696   MVT VT = Value.getValueType();
 697   MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 698   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 699   unsigned alignment = SN->getAlignment();
 700
 701   switch (SN->getAddressingMode()) {
 702   case ISD::UNINDEXED: {
 703     int chunk_offset, slot_offset;
 704     bool was16aligned;
 705
 706     // The vector type we really want to load from the 16-byte chunk, except
 707     // in the case of MVT::i1, which has to be v16i8.
 708     MVT vecVT, stVecVT = MVT::v16i8;
 709
 710     if (StVT != MVT::i1)
 711       stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
 712     vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
 713
 714     SDValue alignLoadVec =
 715       AlignedLoad(Op, DAG, ST, SN, alignment,
 716                   chunk_offset, slot_offset, VT, was16aligned);
 717
 718     if (alignLoadVec.getNode() == 0)
 719       return alignLoadVec;
 720
 721     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 722     SDValue basePtr = LN->getBasePtr();
 723     SDValue the_chain = alignLoadVec.getValue(1);
 724     SDValue theValue = SN->getValue();
 725     SDValue result;
 726
 727     if (StVT != VT
 728         && (theValue.getOpcode() == ISD::AssertZext
 729             || theValue.getOpcode() == ISD::AssertSext)) {
 730       // Drill down and get the value for zero- and sign-extended
 731       // quantities
 732       theValue = theValue.getOperand(0);
 733     }
 734
 735     chunk_offset &= 0xf;
 736
 737     SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 738     SDValue insertEltPtr;
 739     SDValue insertEltOp;
 740
 741     // If the base pointer is already a D-form address, then just create
 742     // a new D-form address with a slot offset and the orignal base pointer.
 743     // Otherwise generate a D-form address with the slot offset relative
 744     // to the stack pointer, which is always aligned.
 745     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 746     DEBUG(basePtr.getNode()->dump(&DAG));
 747     DEBUG(cerr << "\n");
 748
 749     if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
 750         (basePtr.getOpcode() == ISD::ADD
 751          && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
 752       insertEltPtr = basePtr;
 753     } else {
 754       insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
 755     }
 756
 757     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 758     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 759                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 760                          alignLoadVec,
 761                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 762
 763     result = DAG.getStore(the_chain, result, basePtr,
 764                           LN->getSrcValue(), LN->getSrcValueOffset(),
 765                           LN->isVolatile(), LN->getAlignment());
 766
 767     return result;
 768     /*UNREACHED*/
 769   }
 770   case ISD::PRE_INC:
 771   case ISD::PRE_DEC:
 772   case ISD::POST_INC:
 773   case ISD::POST_DEC:
 774   case ISD::LAST_INDEXED_MODE:
 775     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 776             "UNINDEXED\n";
 777     cerr << (unsigned) SN->getAddressingMode() << "\n";
 778     abort();
 779     /*NOTREACHED*/
 780   }
 781
 782   return SDValue();
 783 }
 784
 785 /// Generate the address of a constant pool entry.
 786 static SDValue
 787 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 788   MVT PtrVT = Op.getValueType();
 789   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 790   Constant *C = CP->getConstVal();
 791   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 792   SDValue Zero = DAG.getConstant(0, PtrVT);
 793   const TargetMachine &TM = DAG.getTarget();
 794
 795   if (TM.getRelocationModel() == Reloc::Static) {
 796     if (!ST->usingLargeMem()) {
 797       // Just return the SDValue with the constant pool address in it.
 798       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 799     } else {
 800       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 801       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 802       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 803     }
 804   }
 805
 806   assert(0 &&
 807          "LowerConstantPool: Relocation model other than static"
 808          " not supported.");
 809   return SDValue();
 810 }
 811
 812 static SDValue
 813 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 814   MVT PtrVT = Op.getValueType();
 815   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 816   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 817   SDValue Zero = DAG.getConstant(0, PtrVT);
 818   const TargetMachine &TM = DAG.getTarget();
 819
 820   if (TM.getRelocationModel() == Reloc::Static) {
 821     if (!ST->usingLargeMem()) {
 822       return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
 823     } else {
 824       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 825       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 826       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 827     }
 828   }
 829
 830   assert(0 &&
 831          "LowerJumpTable: Relocation model other than static not supported.");
 832   return SDValue();
 833 }
 834
 835 static SDValue
 836 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 837   MVT PtrVT = Op.getValueType();
 838   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 839   GlobalValue *GV = GSDN->getGlobal();
 840   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 841   const TargetMachine &TM = DAG.getTarget();
 842   SDValue Zero = DAG.getConstant(0, PtrVT);
 843
 844   if (TM.getRelocationModel() == Reloc::Static) {
 845     if (!ST->usingLargeMem()) {
 846       return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
 847     } else {
 848       SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 849       SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 850       return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
 851     }
 852   } else {
 853     cerr << "LowerGlobalAddress: Relocation model other than static not "
 854          << "supported.\n";
 855     abort();
 856     /*NOTREACHED*/
 857   }
 858
 859   return SDValue();
 860 }
 861
 862 //! Custom lower i64 integer constants
 863 /*!
 864  This code inserts all of the necessary juggling that needs to occur to load
 865  a 64-bit constant into a register.
 866  */
 867 static SDValue
 868 LowerConstant(SDValue Op, SelectionDAG &DAG) {
 869   MVT VT = Op.getValueType();
 870   ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
 871
 872   if (VT == MVT::i64) {
 873     SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
 874     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 875                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 876   } else {
 877     cerr << "LowerConstant: unhandled constant type "
 878          << VT.getMVTString()
 879          << "\n";
 880     abort();
 881     /*NOTREACHED*/
 882   }
 883
 884   return SDValue();
 885 }
 886
 887 //! Custom lower double precision floating point constants
 888 static SDValue
 889 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 890   MVT VT = Op.getValueType();
 891   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 892
 893   assert((FP != 0) &&
 894          "LowerConstantFP: Node is not ConstantFPSDNode");
 895
 896   if (VT == MVT::f64) {
 897     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 898     return DAG.getNode(ISD::BIT_CONVERT, VT,
 899                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 900   }
 901
 902   return SDValue();
 903 }
 904
 905 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 906 static SDValue
 907 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
 908 {
 909   SDValue Cond = Op.getOperand(1);
 910   MVT CondVT = Cond.getValueType();
 911   MVT CondNVT;
 912
 913   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 914     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 915     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 916                       Op.getOperand(0),
 917                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 918                       Op.getOperand(2));
 919   } else
 920     return SDValue();                // Unchanged
 921 }
 922
 923 static SDValue
 924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 925 {
 926   MachineFunction &MF = DAG.getMachineFunction();
 927   MachineFrameInfo *MFI = MF.getFrameInfo();
 928   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 929   SmallVector<SDValue, 8> ArgValues;
 930   SDValue Root = Op.getOperand(0);
 931   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
 932
 933   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 934   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 935
 936   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 937   unsigned ArgRegIdx = 0;
 938   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 939
 940   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 941
 942   // Add DAG nodes to load the arguments or copy them out of registers.
 943   for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
 944        ArgNo != e; ++ArgNo) {
 945     SDValue ArgVal;
 946     bool needsLoad = false;
 947     MVT ObjectVT = Op.getValue(ArgNo).getValueType();
 948     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
 949
 950     switch (ObjectVT.getSimpleVT()) {
 951     default: {
 952       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 953            << ObjectVT.getMVTString()
 954            << "\n";
 955       abort();
 956     }
 957     case MVT::i8:
 958       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 959         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 960         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 961         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 962         ++ArgRegIdx;
 963       } else {
 964         needsLoad = true;
 965       }
 966       break;
 967     case MVT::i16:
 968       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 969         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 970         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 971         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 972         ++ArgRegIdx;
 973       } else {
 974         needsLoad = true;
 975       }
 976       break;
 977     case MVT::i32:
 978       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 979         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 980         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 981         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 982         ++ArgRegIdx;
 983       } else {
 984         needsLoad = true;
 985       }
 986       break;
 987     case MVT::i64:
 988       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 989         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 990         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 991         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 992         ++ArgRegIdx;
 993       } else {
 994         needsLoad = true;
 995       }
 996       break;
 997     case MVT::f32:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1000         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     case MVT::f64:
1008       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1010         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1012         ++ArgRegIdx;
1013       } else {
1014         needsLoad = true;
1015       }
1016       break;
1017     case MVT::v2f64:
1018     case MVT::v4f32:
1019     case MVT::v2i64:
1020     case MVT::v4i32:
1021     case MVT::v8i16:
1022     case MVT::v16i8:
1023       if (!isVarArg && ArgRegIdx < NumArgRegs) {
1024         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1025         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1026         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1027         ++ArgRegIdx;
1028       } else {
1029         needsLoad = true;
1030       }
1031       break;
1032     }
1033
1034     // We need to load the argument to a virtual register if we determined above
1035     // that we ran out of physical registers of the appropriate type
1036     if (needsLoad) {
1037       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1038       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1039       ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1040       ArgOffset += StackSlotSize;
1041     }
1042
1043     ArgValues.push_back(ArgVal);
1044   }
1045
1046   // If the function takes variable number of arguments, make a frame index for
1047   // the start of the first vararg value... for expansion of llvm.va_start.
1048   if (isVarArg) {
1049     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1050                                                ArgOffset);
1051     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1052     // If this function is vararg, store any remaining integer argument regs to
1053     // their spots on the stack so that they may be loaded by deferencing the
1054     // result of va_next.
1055     SmallVector<SDValue, 8> MemOps;
1056     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1057       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1058       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1059       SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1060       SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1061       MemOps.push_back(Store);
1062       // Increment the address by four for the next argument to store
1063       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1064       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1065     }
1066     if (!MemOps.empty())
1067       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1068   }
1069
1070   ArgValues.push_back(Root);
1071
1072   // Return the new list of results.
1073   return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1074                             ArgValues.size());
1075 }
1076
1077 /// isLSAAddress - Return the immediate to use if the specified
1078 /// value is representable as a LSA address.
1079 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1080   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1081   if (!C) return 0;
1082
1083   int Addr = C->getZExtValue();
1084   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1085       (Addr << 14 >> 14) != Addr)
1086     return 0;  // Top 14 bits have to be sext of immediate.
1087
1088   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1089 }
1090
1091 static
1092 SDValue
1093 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1094   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1095   SDValue Chain = TheCall->getChain();
1096 #if 0
1097   bool isVarArg   = TheCall->isVarArg();
1098   bool isTailCall = TheCall->isTailCall();
1099 #endif
1100   SDValue Callee    = TheCall->getCallee();
1101   unsigned NumOps     = TheCall->getNumArgs();
1102   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1103   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1104   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1105
1106   // Handy pointer type
1107   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1108
1109   // Accumulate how many bytes are to be pushed on the stack, including the
1110   // linkage area, and parameter passing area.  According to the SPU ABI,
1111   // we minimally need space for [LR] and [SP]
1112   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1113
1114   // Set up a copy of the stack pointer for use loading and storing any
1115   // arguments that may not fit in the registers available for argument
1116   // passing.
1117   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1118
1119   // Figure out which arguments are going to go in registers, and which in
1120   // memory.
1121   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1122   unsigned ArgRegIdx = 0;
1123
1124   // Keep track of registers passing arguments
1125   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1126   // And the arguments passed on the stack
1127   SmallVector<SDValue, 8> MemOpChains;
1128
1129   for (unsigned i = 0; i != NumOps; ++i) {
1130     SDValue Arg = TheCall->getArg(i);
1131
1132     // PtrOff will be used to store the current argument to the stack if a
1133     // register cannot be found for it.
1134     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1135     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1136
1137     switch (Arg.getValueType().getSimpleVT()) {
1138     default: assert(0 && "Unexpected ValueType for argument!");
1139     case MVT::i32:
1140     case MVT::i64:
1141     case MVT::i128:
1142       if (ArgRegIdx != NumArgRegs) {
1143         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144       } else {
1145         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146         ArgOffset += StackSlotSize;
1147       }
1148       break;
1149     case MVT::f32:
1150     case MVT::f64:
1151       if (ArgRegIdx != NumArgRegs) {
1152         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1153       } else {
1154         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155         ArgOffset += StackSlotSize;
1156       }
1157       break;
1158     case MVT::v4f32:
1159     case MVT::v4i32:
1160     case MVT::v8i16:
1161     case MVT::v16i8:
1162       if (ArgRegIdx != NumArgRegs) {
1163         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1164       } else {
1165         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1166         ArgOffset += StackSlotSize;
1167       }
1168       break;
1169     }
1170   }
1171
1172   // Update number of stack bytes actually used, insert a call sequence start
1173   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1174   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1175                                                             true));
1176
1177   if (!MemOpChains.empty()) {
1178     // Adjust the stack pointer for the stack arguments.
1179     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1180                         &MemOpChains[0], MemOpChains.size());
1181   }
1182
1183   // Build a sequence of copy-to-reg nodes chained together with token chain
1184   // and flag operands which copy the outgoing args into the appropriate regs.
1185   SDValue InFlag;
1186   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1187     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1188                              InFlag);
1189     InFlag = Chain.getValue(1);
1190   }
1191
1192   SmallVector<SDValue, 8> Ops;
1193   unsigned CallOpc = SPUISD::CALL;
1194
1195   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1196   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1197   // node so that legalize doesn't hack it.
1198   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1199     GlobalValue *GV = G->getGlobal();
1200     MVT CalleeVT = Callee.getValueType();
1201     SDValue Zero = DAG.getConstant(0, PtrVT);
1202     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1203
1204     if (!ST->usingLargeMem()) {
1205       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1206       // style calls, otherwise, external symbols are BRASL calls. This assumes
1207       // that declared/defined symbols are in the same compilation unit and can
1208       // be reached through PC-relative jumps.
1209       //
1210       // NOTE:
1211       // This may be an unsafe assumption for JIT and really large compilation
1212       // units.
1213       if (GV->isDeclaration()) {
1214         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1215       } else {
1216         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1217       }
1218     } else {
1219       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1220       // address pairs:
1221       Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1222     }
1223   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1224     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1225   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1226     // If this is an absolute destination address that appears to be a legal
1227     // local store address, use the munged value.
1228     Callee = SDValue(Dest, 0);
1229   }
1230
1231   Ops.push_back(Chain);
1232   Ops.push_back(Callee);
1233
1234   // Add argument registers to the end of the list so that they are known live
1235   // into the call.
1236   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1237     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1238                                   RegsToPass[i].second.getValueType()));
1239
1240   if (InFlag.getNode())
1241     Ops.push_back(InFlag);
1242   // Returns a chain and a flag for retval copy to use.
1243   Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1244                       &Ops[0], Ops.size());
1245   InFlag = Chain.getValue(1);
1246
1247   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1248                              DAG.getIntPtrConstant(0, true), InFlag);
1249   if (TheCall->getValueType(0) != MVT::Other)
1250     InFlag = Chain.getValue(1);
1251
1252   SDValue ResultVals[3];
1253   unsigned NumResults = 0;
1254
1255   // If the call has results, copy the values out of the ret val registers.
1256   switch (TheCall->getValueType(0).getSimpleVT()) {
1257   default: assert(0 && "Unexpected ret value!");
1258   case MVT::Other: break;
1259   case MVT::i32:
1260     if (TheCall->getValueType(1) == MVT::i32) {
1261       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1262       ResultVals[0] = Chain.getValue(0);
1263       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1264                                  Chain.getValue(2)).getValue(1);
1265       ResultVals[1] = Chain.getValue(0);
1266       NumResults = 2;
1267     } else {
1268       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1269       ResultVals[0] = Chain.getValue(0);
1270       NumResults = 1;
1271     }
1272     break;
1273   case MVT::i64:
1274     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1275     ResultVals[0] = Chain.getValue(0);
1276     NumResults = 1;
1277     break;
1278   case MVT::f32:
1279   case MVT::f64:
1280     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1281                                InFlag).getValue(1);
1282     ResultVals[0] = Chain.getValue(0);
1283     NumResults = 1;
1284     break;
1285   case MVT::v2f64:
1286   case MVT::v4f32:
1287   case MVT::v4i32:
1288   case MVT::v8i16:
1289   case MVT::v16i8:
1290     Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1291                                    InFlag).getValue(1);
1292     ResultVals[0] = Chain.getValue(0);
1293     NumResults = 1;
1294     break;
1295   }
1296
1297   // If the function returns void, just return the chain.
1298   if (NumResults == 0)
1299     return Chain;
1300
1301   // Otherwise, merge everything together with a MERGE_VALUES node.
1302   ResultVals[NumResults++] = Chain;
1303   SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1304   return Res.getValue(Op.getResNo());
1305 }
1306
1307 static SDValue
1308 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1309   SmallVector<CCValAssign, 16> RVLocs;
1310   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1311   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1312   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1313   CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1314
1315   // If this is the first return lowered for this function, add the regs to the
1316   // liveout set for the function.
1317   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1318     for (unsigned i = 0; i != RVLocs.size(); ++i)
1319       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1320   }
1321
1322   SDValue Chain = Op.getOperand(0);
1323   SDValue Flag;
1324
1325   // Copy the result values into the output registers.
1326   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1327     CCValAssign &VA = RVLocs[i];
1328     assert(VA.isRegLoc() && "Can only return in registers!");
1329     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1330     Flag = Chain.getValue(1);
1331   }
1332
1333   if (Flag.getNode())
1334     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1335   else
1336     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1337 }
1338
1339
1340 //===----------------------------------------------------------------------===//
1341 // Vector related lowering:
1342 //===----------------------------------------------------------------------===//
1343
1344 static ConstantSDNode *
1345 getVecImm(SDNode *N) {
1346   SDValue OpVal(0, 0);
1347
1348   // Check to see if this buildvec has a single non-undef value in its elements.
1349   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1350     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1351     if (OpVal.getNode() == 0)
1352       OpVal = N->getOperand(i);
1353     else if (OpVal != N->getOperand(i))
1354       return 0;
1355   }
1356
1357   if (OpVal.getNode() != 0) {
1358     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1359       return CN;
1360     }
1361   }
1362
1363   return 0; // All UNDEF: use implicit def.; not Constant node
1364 }
1365
1366 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1367 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1368 /// constant
1369 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1370                               MVT ValueType) {
1371   if (ConstantSDNode *CN = getVecImm(N)) {
1372     uint64_t Value = CN->getZExtValue();
1373     if (ValueType == MVT::i64) {
1374       uint64_t UValue = CN->getZExtValue();
1375       uint32_t upper = uint32_t(UValue >> 32);
1376       uint32_t lower = uint32_t(UValue);
1377       if (upper != lower)
1378         return SDValue();
1379       Value = Value >> 32;
1380     }
1381     if (Value <= 0x3ffff)
1382       return DAG.getConstant(Value, ValueType);
1383   }
1384
1385   return SDValue();
1386 }
1387
1388 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1389 /// and the value fits into a signed 16-bit constant, and if so, return the
1390 /// constant
1391 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1392                               MVT ValueType) {
1393   if (ConstantSDNode *CN = getVecImm(N)) {
1394     int64_t Value = CN->getSExtValue();
1395     if (ValueType == MVT::i64) {
1396       uint64_t UValue = CN->getZExtValue();
1397       uint32_t upper = uint32_t(UValue >> 32);
1398       uint32_t lower = uint32_t(UValue);
1399       if (upper != lower)
1400         return SDValue();
1401       Value = Value >> 32;
1402     }
1403     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1404       return DAG.getConstant(Value, ValueType);
1405     }
1406   }
1407
1408   return SDValue();
1409 }
1410
1411 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1412 /// and the value fits into a signed 10-bit constant, and if so, return the
1413 /// constant
1414 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1415                               MVT ValueType) {
1416   if (ConstantSDNode *CN = getVecImm(N)) {
1417     int64_t Value = CN->getSExtValue();
1418     if (ValueType == MVT::i64) {
1419       uint64_t UValue = CN->getZExtValue();
1420       uint32_t upper = uint32_t(UValue >> 32);
1421       uint32_t lower = uint32_t(UValue);
1422       if (upper != lower)
1423         return SDValue();
1424       Value = Value >> 32;
1425     }
1426     if (isS10Constant(Value))
1427       return DAG.getConstant(Value, ValueType);
1428   }
1429
1430   return SDValue();
1431 }
1432
1433 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 8-bit constant, and if so, return the
1435 /// constant.
1436 ///
1437 /// @note: The incoming vector is v16i8 because that's the only way we can load
1438 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1439 /// same value.
1440 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1441                              MVT ValueType) {
1442   if (ConstantSDNode *CN = getVecImm(N)) {
1443     int Value = (int) CN->getZExtValue();
1444     if (ValueType == MVT::i16
1445         && Value <= 0xffff                 /* truncated from uint64_t */
1446         && ((short) Value >> 8) == ((short) Value & 0xff))
1447       return DAG.getConstant(Value & 0xff, ValueType);
1448     else if (ValueType == MVT::i8
1449              && (Value & 0xff) == Value)
1450       return DAG.getConstant(Value, ValueType);
1451   }
1452
1453   return SDValue();
1454 }
1455
1456 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 16-bit constant, and if so, return the
1458 /// constant
1459 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1460                                MVT ValueType) {
1461   if (ConstantSDNode *CN = getVecImm(N)) {
1462     uint64_t Value = CN->getZExtValue();
1463     if ((ValueType == MVT::i32
1464           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1465         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1466       return DAG.getConstant(Value >> 16, ValueType);
1467   }
1468
1469   return SDValue();
1470 }
1471
1472 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1473 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1474   if (ConstantSDNode *CN = getVecImm(N)) {
1475     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1476   }
1477
1478   return SDValue();
1479 }
1480
1481 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1482 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1483   if (ConstantSDNode *CN = getVecImm(N)) {
1484     return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1485   }
1486
1487   return SDValue();
1488 }
1489
1490 // If this is a vector of constants or undefs, get the bits.  A bit in
1491 // UndefBits is set if the corresponding element of the vector is an
1492 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1493 // zero.   Return true if this is not an array of constants, false if it is.
1494 //
1495 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1496                                        uint64_t UndefBits[2]) {
1497   // Start with zero'd results.
1498   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1499
1500   unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1501   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1502     SDValue OpVal = BV->getOperand(i);
1503
1504     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1505     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1506
1507     uint64_t EltBits = 0;
1508     if (OpVal.getOpcode() == ISD::UNDEF) {
1509       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1510       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1511       continue;
1512     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1513       EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1514     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1515       const APFloat &apf = CN->getValueAPF();
1516       EltBits = (CN->getValueType(0) == MVT::f32
1517                  ? FloatToBits(apf.convertToFloat())
1518                  : DoubleToBits(apf.convertToDouble()));
1519     } else {
1520       // Nonconstant element.
1521       return true;
1522     }
1523
1524     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1525   }
1526
1527   //printf("%llx %llx  %llx %llx\n",
1528   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1529   return false;
1530 }
1531
1532 /// If this is a splat (repetition) of a value across the whole vector, return
1533 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1534 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1535 /// SplatSize = 1 byte.
1536 static bool isConstantSplat(const uint64_t Bits128[2],
1537                             const uint64_t Undef128[2],
1538                             int MinSplatBits,
1539                             uint64_t &SplatBits, uint64_t &SplatUndef,
1540                             int &SplatSize) {
1541   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1542   // the same as the lower 64-bits, ignoring undefs.
1543   uint64_t Bits64  = Bits128[0] | Bits128[1];
1544   uint64_t Undef64 = Undef128[0] & Undef128[1];
1545   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1546   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1547   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1548   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1549
1550   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1551     if (MinSplatBits < 64) {
1552
1553       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1554       // undefs.
1555       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1556         if (MinSplatBits < 32) {
1557
1558           // If the top 16-bits are different than the lower 16-bits, ignoring
1559           // undefs, we have an i32 splat.
1560           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1561             if (MinSplatBits < 16) {
1562               // If the top 8-bits are different than the lower 8-bits, ignoring
1563               // undefs, we have an i16 splat.
1564               if ((Bits16 & (uint16_t(~Undef16) >> 8))
1565                   == ((Bits16 >> 8) & ~Undef16)) {
1566                 // Otherwise, we have an 8-bit splat.
1567                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1568                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1569                 SplatSize = 1;
1570                 return true;
1571               }
1572             } else {
1573               SplatBits = Bits16;
1574               SplatUndef = Undef16;
1575               SplatSize = 2;
1576               return true;
1577             }
1578           }
1579         } else {
1580           SplatBits = Bits32;
1581           SplatUndef = Undef32;
1582           SplatSize = 4;
1583           return true;
1584         }
1585       }
1586     } else {
1587       SplatBits = Bits128[0];
1588       SplatUndef = Undef128[0];
1589       SplatSize = 8;
1590       return true;
1591     }
1592   }
1593
1594   return false;  // Can't be a splat if two pieces don't match.
1595 }
1596
1597 // If this is a case we can't handle, return null and let the default
1598 // expansion code take care of it.  If we CAN select this case, and if it
1599 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1600 // this case more efficiently than a constant pool load, lower it to the
1601 // sequence of ops that should be used.
1602 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1603   MVT VT = Op.getValueType();
1604   // If this is a vector of constants or undefs, get the bits.  A bit in
1605   // UndefBits is set if the corresponding element of the vector is an
1606   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1607   // zero.
1608   uint64_t VectorBits[2];
1609   uint64_t UndefBits[2];
1610   uint64_t SplatBits, SplatUndef;
1611   int SplatSize;
1612   if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1613       || !isConstantSplat(VectorBits, UndefBits,
1614                           VT.getVectorElementType().getSizeInBits(),
1615                           SplatBits, SplatUndef, SplatSize))
1616     return SDValue();   // Not a constant vector, not a splat.
1617
1618   switch (VT.getSimpleVT()) {
1619   default:
1620   case MVT::v4f32: {
1621     uint32_t Value32 = SplatBits;
1622     assert(SplatSize == 4
1623            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1624     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1625     SDValue T = DAG.getConstant(Value32, MVT::i32);
1626     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1627                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1628     break;
1629   }
1630   case MVT::v2f64: {
1631     uint64_t f64val = SplatBits;
1632     assert(SplatSize == 8
1633            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1634     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1635     SDValue T = DAG.getConstant(f64val, MVT::i64);
1636     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1637                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1638     break;
1639   }
1640   case MVT::v16i8: {
1641    // 8-bit constants have to be expanded to 16-bits
1642    unsigned short Value16 = SplatBits | (SplatBits << 8);
1643    SDValue Ops[8];
1644    for (int i = 0; i < 8; ++i)
1645      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1646    return DAG.getNode(ISD::BIT_CONVERT, VT,
1647                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1648   }
1649   case MVT::v8i16: {
1650     unsigned short Value16;
1651     if (SplatSize == 2)
1652       Value16 = (unsigned short) (SplatBits & 0xffff);
1653     else
1654       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1655     SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1656     SDValue Ops[8];
1657     for (int i = 0; i < 8; ++i) Ops[i] = T;
1658     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1659   }
1660   case MVT::v4i32: {
1661     unsigned int Value = SplatBits;
1662     SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1663     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1664   }
1665   case MVT::v2i64: {
1666     uint64_t val = SplatBits;
1667     uint32_t upper = uint32_t(val >> 32);
1668     uint32_t lower = uint32_t(val);
1669
1670     if (upper == lower) {
1671       // Magic constant that can be matched by IL, ILA, et. al.
1672       SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1673       return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1674     } else {
1675       SDValue LO32;
1676       SDValue HI32;
1677       SmallVector<SDValue, 16> ShufBytes;
1678       SDValue Result;
1679       bool upper_special, lower_special;
1680
1681       // NOTE: This code creates common-case shuffle masks that can be easily
1682       // detected as common expressions. It is not attempting to create highly
1683       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1684
1685       // Detect if the upper or lower half is a special shuffle mask pattern:
1686       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1687       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1688
1689       // Create lower vector if not a special pattern
1690       if (!lower_special) {
1691         SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1692         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1693                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1694                                        LO32C, LO32C, LO32C, LO32C));
1695       }
1696
1697       // Create upper vector if not a special pattern
1698       if (!upper_special) {
1699         SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1700         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1701                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1702                                        HI32C, HI32C, HI32C, HI32C));
1703       }
1704
1705       // If either upper or lower are special, then the two input operands are
1706       // the same (basically, one of them is a "don't care")
1707       if (lower_special)
1708         LO32 = HI32;
1709       if (upper_special)
1710         HI32 = LO32;
1711       if (lower_special && upper_special) {
1712         // Unhappy situation... both upper and lower are special, so punt with
1713         // a target constant:
1714         SDValue Zero = DAG.getConstant(0, MVT::i32);
1715         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1716                                   Zero, Zero);
1717       }
1718
1719       for (int i = 0; i < 4; ++i) {
1720         uint64_t val = 0;
1721         for (int j = 0; j < 4; ++j) {
1722           SDValue V;
1723           bool process_upper, process_lower;
1724           val <<= 8;
1725           process_upper = (upper_special && (i & 1) == 0);
1726           process_lower = (lower_special && (i & 1) == 1);
1727
1728           if (process_upper || process_lower) {
1729             if ((process_upper && upper == 0)
1730                 || (process_lower && lower == 0))
1731               val |= 0x80;
1732             else if ((process_upper && upper == 0xffffffff)
1733                      || (process_lower && lower == 0xffffffff))
1734               val |= 0xc0;
1735             else if ((process_upper && upper == 0x80000000)
1736                      || (process_lower && lower == 0x80000000))
1737               val |= (j == 0 ? 0xe0 : 0x80);
1738           } else
1739             val |= i * 4 + j + ((i & 1) * 16);
1740         }
1741
1742         ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1743       }
1744
1745       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1746                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1747                                      &ShufBytes[0], ShufBytes.size()));
1748     }
1749   }
1750   }
1751
1752   return SDValue();
1753 }
1754
1755 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1756 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1757 /// permutation vector, V3, is monotonically increasing with one "exception"
1758 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1759 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1760 /// In either case, the net result is going to eventually invoke SHUFB to
1761 /// permute/shuffle the bytes from V1 and V2.
1762 /// \note
1763 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1764 /// control word for byte/halfword/word insertion. This takes care of a single
1765 /// element move from V2 into V1.
1766 /// \note
1767 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1768 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1769   SDValue V1 = Op.getOperand(0);
1770   SDValue V2 = Op.getOperand(1);
1771   SDValue PermMask = Op.getOperand(2);
1772
1773   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1774
1775   // If we have a single element being moved from V1 to V2, this can be handled
1776   // using the C*[DX] compute mask instructions, but the vector elements have
1777   // to be monotonically increasing with one exception element.
1778   MVT EltVT = V1.getValueType().getVectorElementType();
1779   unsigned EltsFromV2 = 0;
1780   unsigned V2Elt = 0;
1781   unsigned V2EltIdx0 = 0;
1782   unsigned CurrElt = 0;
1783   bool monotonic = true;
1784   if (EltVT == MVT::i8)
1785     V2EltIdx0 = 16;
1786   else if (EltVT == MVT::i16)
1787     V2EltIdx0 = 8;
1788   else if (EltVT == MVT::i32)
1789     V2EltIdx0 = 4;
1790   else
1791     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1792
1793   for (unsigned i = 0, e = PermMask.getNumOperands();
1794        EltsFromV2 <= 1 && monotonic && i != e;
1795        ++i) {
1796     unsigned SrcElt;
1797     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1798       SrcElt = 0;
1799     else
1800       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1801
1802     if (SrcElt >= V2EltIdx0) {
1803       ++EltsFromV2;
1804       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1805     } else if (CurrElt != SrcElt) {
1806       monotonic = false;
1807     }
1808
1809     ++CurrElt;
1810   }
1811
1812   if (EltsFromV2 == 1 && monotonic) {
1813     // Compute mask and shuffle
1814     MachineFunction &MF = DAG.getMachineFunction();
1815     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1816     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1817     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1818     // Initialize temporary register to 0
1819     SDValue InitTempReg =
1820       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1821     // Copy register's contents as index in INSERT_MASK:
1822     SDValue ShufMaskOp =
1823       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1824                   DAG.getTargetConstant(V2Elt, MVT::i32),
1825                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1826     // Use shuffle mask in SHUFB synthetic instruction:
1827     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1828   } else {
1829    // Convert the SHUFFLE_VECTOR mask's input element units to the
1830    // actual bytes.
1831     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1832
1833     SmallVector<SDValue, 16> ResultMask;
1834     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1835       unsigned SrcElt;
1836       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1837         SrcElt = 0;
1838       else
1839         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1840
1841       for (unsigned j = 0; j < BytesPerElement; ++j) {
1842         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1843                                              MVT::i8));
1844       }
1845     }
1846
1847     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1848                                       &ResultMask[0], ResultMask.size());
1849     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1850   }
1851 }
1852
1853 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1854   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1855
1856   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1857     // For a constant, build the appropriate constant vector, which will
1858     // eventually simplify to a vector register load.
1859
1860     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1861     SmallVector<SDValue, 16> ConstVecValues;
1862     MVT VT;
1863     size_t n_copies;
1864
1865     // Create a constant vector:
1866     switch (Op.getValueType().getSimpleVT()) {
1867     default: assert(0 && "Unexpected constant value type in "
1868                          "LowerSCALAR_TO_VECTOR");
1869     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1870     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1871     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1872     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1873     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1874     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1875     }
1876
1877     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1878     for (size_t j = 0; j < n_copies; ++j)
1879       ConstVecValues.push_back(CValue);
1880
1881     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1882                        &ConstVecValues[0], ConstVecValues.size());
1883   } else {
1884     // Otherwise, copy the value from one register to another:
1885     switch (Op0.getValueType().getSimpleVT()) {
1886     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1887     case MVT::i8:
1888     case MVT::i16:
1889     case MVT::i32:
1890     case MVT::i64:
1891     case MVT::f32:
1892     case MVT::f64:
1893       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1894     }
1895   }
1896
1897   return SDValue();
1898 }
1899
1900 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1901   switch (Op.getValueType().getSimpleVT()) {
1902   default:
1903     cerr << "CellSPU: Unknown vector multiplication, got "
1904          << Op.getValueType().getMVTString()
1905          << "\n";
1906     abort();
1907     /*NOTREACHED*/
1908
1909   case MVT::v4i32: {
1910     SDValue rA = Op.getOperand(0);
1911     SDValue rB = Op.getOperand(1);
1912     SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1913     SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1914     SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1915     SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1916
1917     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1918     break;
1919   }
1920
1921   // Multiply two v8i16 vectors (pipeline friendly version):
1922   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1923   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1924   // c) Use SELB to select upper and lower halves from the intermediate results
1925   //
1926   // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1927   // dual-issue. This code does manage to do this, even if it's a little on
1928   // the wacky side
1929   case MVT::v8i16: {
1930     MachineFunction &MF = DAG.getMachineFunction();
1931     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1932     SDValue Chain = Op.getOperand(0);
1933     SDValue rA = Op.getOperand(0);
1934     SDValue rB = Op.getOperand(1);
1935     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1936     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937
1938     SDValue FSMBOp =
1939       DAG.getCopyToReg(Chain, FSMBIreg,
1940                        DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1941                                    DAG.getConstant(0xcccc, MVT::i16)));
1942
1943     SDValue HHProd =
1944       DAG.getCopyToReg(FSMBOp, HiProdReg,
1945                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1946
1947     SDValue HHProd_v4i32 =
1948       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1949                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1950
1951     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1952                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1953                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1954                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1955                                                HHProd_v4i32,
1956                                                DAG.getConstant(16, MVT::i16))),
1957                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1958   }
1959
1960   // This M00sE is N@stI! (apologies to Monty Python)
1961   //
1962   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1963   // is to break it all apart, sign extend, and reassemble the various
1964   // intermediate products.
1965   case MVT::v16i8: {
1966     SDValue rA = Op.getOperand(0);
1967     SDValue rB = Op.getOperand(1);
1968     SDValue c8 = DAG.getConstant(8, MVT::i32);
1969     SDValue c16 = DAG.getConstant(16, MVT::i32);
1970
1971     SDValue LLProd =
1972       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1974                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1975
1976     SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1977
1978     SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1979
1980     SDValue LHProd =
1981       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1982                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1983
1984     SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1985                                      DAG.getConstant(0x2222, MVT::i16));
1986
1987     SDValue LoProdParts =
1988       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1989                   DAG.getNode(SPUISD::SELB, MVT::v8i16,
1990                               LLProd, LHProd, FSMBmask));
1991
1992     SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1993
1994     SDValue LoProd =
1995       DAG.getNode(ISD::AND, MVT::v4i32,
1996                   LoProdParts,
1997                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1998                               LoProdMask, LoProdMask,
1999                               LoProdMask, LoProdMask));
2000
2001     SDValue rAH =
2002       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2003                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2004
2005     SDValue rBH =
2006       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2007                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2008
2009     SDValue HLProd =
2010       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2011                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2012                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2013
2014     SDValue HHProd_1 =
2015       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2016                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2017                               DAG.getNode(SPUISD::VEC_SRA,
2018                                           MVT::v4i32, rAH, c8)),
2019                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2020                               DAG.getNode(SPUISD::VEC_SRA,
2021                                           MVT::v4i32, rBH, c8)));
2022
2023     SDValue HHProd =
2024       DAG.getNode(SPUISD::SELB, MVT::v8i16,
2025                   HLProd,
2026                   DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2027                   FSMBmask);
2028
2029     SDValue HiProd =
2030       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2031
2032     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2033                        DAG.getNode(ISD::OR, MVT::v4i32,
2034                                    LoProd, HiProd));
2035   }
2036   }
2037
2038   return SDValue();
2039 }
2040
2041 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2042   MachineFunction &MF = DAG.getMachineFunction();
2043   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2044
2045   SDValue A = Op.getOperand(0);
2046   SDValue B = Op.getOperand(1);
2047   MVT VT = Op.getValueType();
2048
2049   unsigned VRegBR, VRegC;
2050
2051   if (VT == MVT::f32) {
2052     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2053     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2054   } else {
2055     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2056     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2057   }
2058   // TODO: make sure we're feeding FPInterp the right arguments
2059   // Right now: fi B, frest(B)
2060
2061   // Computes BRcpl =
2062   // (Floating Interpolate (FP Reciprocal Estimate B))
2063   SDValue BRcpl =
2064       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2065                        DAG.getNode(SPUISD::FPInterp, VT, B,
2066                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2067
2068   // Computes A * BRcpl and stores in a temporary register
2069   SDValue AxBRcpl =
2070       DAG.getCopyToReg(BRcpl, VRegC,
2071                  DAG.getNode(ISD::FMUL, VT, A,
2072                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2073   // What's the Chain variable do? It's magic!
2074   // TODO: set Chain = Op(0).getEntryNode()
2075
2076   return DAG.getNode(ISD::FADD, VT,
2077                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2078                 DAG.getNode(ISD::FMUL, VT,
2079                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2080                         DAG.getNode(ISD::FSUB, VT, A,
2081                             DAG.getNode(ISD::FMUL, VT, B,
2082                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2083 }
2084
2085 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2086   MVT VT = Op.getValueType();
2087   SDValue N = Op.getOperand(0);
2088   SDValue Elt = Op.getOperand(1);
2089   SDValue ShufMask[16];
2090   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2091
2092   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2093
2094   int EltNo = (int) C->getZExtValue();
2095
2096   // sanity checks:
2097   if (VT == MVT::i8 && EltNo >= 16)
2098     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2099   else if (VT == MVT::i16 && EltNo >= 8)
2100     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2101   else if (VT == MVT::i32 && EltNo >= 4)
2102     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2103   else if (VT == MVT::i64 && EltNo >= 2)
2104     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2105
2106   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2107     // i32 and i64: Element 0 is the preferred slot
2108     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2109   }
2110
2111   // Need to generate shuffle mask and extract:
2112   int prefslot_begin = -1, prefslot_end = -1;
2113   int elt_byte = EltNo * VT.getSizeInBits() / 8;
2114
2115   switch (VT.getSimpleVT()) {
2116   default:
2117     assert(false && "Invalid value type!");
2118   case MVT::i8: {
2119     prefslot_begin = prefslot_end = 3;
2120     break;
2121   }
2122   case MVT::i16: {
2123     prefslot_begin = 2; prefslot_end = 3;
2124     break;
2125   }
2126   case MVT::i32: {
2127     prefslot_begin = 0; prefslot_end = 3;
2128     break;
2129   }
2130   case MVT::i64: {
2131     prefslot_begin = 0; prefslot_end = 7;
2132     break;
2133   }
2134   }
2135
2136   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2137          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2138
2139   for (int i = 0; i < 16; ++i) {
2140     // zero fill uppper part of preferred slot, don't care about the
2141     // other slots:
2142     unsigned int mask_val;
2143
2144     if (i <= prefslot_end) {
2145       mask_val =
2146         ((i < prefslot_begin)
2147          ? 0x80
2148          : elt_byte + (i - prefslot_begin));
2149
2150       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2151     } else
2152       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2153   }
2154
2155   SDValue ShufMaskVec =
2156     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2157                 &ShufMask[0],
2158                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2159
2160   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2161                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2162                                  N, N, ShufMaskVec));
2163
2164 }
2165
2166 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2167   SDValue VecOp = Op.getOperand(0);
2168   SDValue ValOp = Op.getOperand(1);
2169   SDValue IdxOp = Op.getOperand(2);
2170   MVT VT = Op.getValueType();
2171
2172   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2173   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2174
2175   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2176   // Use $2 because it's always 16-byte aligned and it's available:
2177   SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2178
2179   SDValue result =
2180     DAG.getNode(SPUISD::SHUFB, VT,
2181                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2182                 VecOp,
2183                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2184                             DAG.getNode(ISD::ADD, PtrVT,
2185                                         PtrBase,
2186                                         DAG.getConstant(CN->getZExtValue(),
2187                                                         PtrVT))));
2188
2189   return result;
2190 }
2191
2192 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2193 {
2194   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2195
2196   assert(Op.getValueType() == MVT::i8);
2197   switch (Opc) {
2198   default:
2199     assert(0 && "Unhandled i8 math operator");
2200     /*NOTREACHED*/
2201     break;
2202   case ISD::SUB: {
2203     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2204     // the result:
2205     SDValue N1 = Op.getOperand(1);
2206     N0 = (N0.getOpcode() != ISD::Constant
2207           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2208           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2209                             MVT::i16));
2210     N1 = (N1.getOpcode() != ISD::Constant
2211           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2212           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2213                             MVT::i16));
2214     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2215                        DAG.getNode(Opc, MVT::i16, N0, N1));
2216   }
2217   case ISD::ROTR:
2218   case ISD::ROTL: {
2219     SDValue N1 = Op.getOperand(1);
2220     unsigned N1Opc;
2221     N0 = (N0.getOpcode() != ISD::Constant
2222           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2223           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2224                             MVT::i16));
2225     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2226             ? ISD::ZERO_EXTEND
2227             : ISD::TRUNCATE;
2228     N1 = (N1.getOpcode() != ISD::Constant
2229           ? DAG.getNode(N1Opc, MVT::i16, N1)
2230           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2231                             MVT::i16));
2232     SDValue ExpandArg =
2233       DAG.getNode(ISD::OR, MVT::i16, N0,
2234                   DAG.getNode(ISD::SHL, MVT::i16,
2235                               N0, DAG.getConstant(8, MVT::i16)));
2236     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2238   }
2239   case ISD::SRL:
2240   case ISD::SHL: {
2241     SDValue N1 = Op.getOperand(1);
2242     unsigned N1Opc;
2243     N0 = (N0.getOpcode() != ISD::Constant
2244           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2245           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2246                             MVT::i16));
2247     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2248             ? ISD::ZERO_EXTEND
2249             : ISD::TRUNCATE;
2250     N1 = (N1.getOpcode() != ISD::Constant
2251           ? DAG.getNode(N1Opc, MVT::i16, N1)
2252           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2253                             MVT::i16));
2254     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2255                        DAG.getNode(Opc, MVT::i16, N0, N1));
2256   }
2257   case ISD::SRA: {
2258     SDValue N1 = Op.getOperand(1);
2259     unsigned N1Opc;
2260     N0 = (N0.getOpcode() != ISD::Constant
2261           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2262           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2263                             MVT::i16));
2264     N1Opc = N1.getValueType().bitsLT(MVT::i16)
2265             ? ISD::SIGN_EXTEND
2266             : ISD::TRUNCATE;
2267     N1 = (N1.getOpcode() != ISD::Constant
2268           ? DAG.getNode(N1Opc, MVT::i16, N1)
2269           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2270                             MVT::i16));
2271     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2272                        DAG.getNode(Opc, MVT::i16, N0, N1));
2273   }
2274   case ISD::MUL: {
2275     SDValue N1 = Op.getOperand(1);
2276     unsigned N1Opc;
2277     N0 = (N0.getOpcode() != ISD::Constant
2278           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2279           : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2280                             MVT::i16));
2281     N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2282     N1 = (N1.getOpcode() != ISD::Constant
2283           ? DAG.getNode(N1Opc, MVT::i16, N1)
2284           : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2285                             MVT::i16));
2286     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2287                        DAG.getNode(Opc, MVT::i16, N0, N1));
2288     break;
2289   }
2290   }
2291
2292   return SDValue();
2293 }
2294
2295 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2296 {
2297   MVT VT = Op.getValueType();
2298   MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2299
2300   SDValue Op0 = Op.getOperand(0);
2301
2302   switch (Opc) {
2303   case ISD::ZERO_EXTEND:
2304   case ISD::SIGN_EXTEND:
2305   case ISD::ANY_EXTEND: {
2306     MVT Op0VT = Op0.getValueType();
2307     MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2308
2309     assert(Op0VT == MVT::i32
2310            && "CellSPU: Zero/sign extending something other than i32");
2311     DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2312
2313     unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2314                       ? SPUISD::ROTBYTES_RIGHT_S
2315                       : SPUISD::ROTQUAD_RZ_BYTES);
2316     SDValue PromoteScalar =
2317       DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2318
2319     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2320                        DAG.getNode(ISD::BIT_CONVERT, VecVT,
2321                                    DAG.getNode(NewOpc, Op0VecVT,
2322                                                PromoteScalar,
2323                                                DAG.getConstant(4, MVT::i32))));
2324   }
2325
2326   case ISD::ADD: {
2327     // Turn operands into vectors to satisfy type checking (shufb works on
2328     // vectors)
2329     SDValue Op0 =
2330       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2331     SDValue Op1 =
2332       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2333     SmallVector<SDValue, 16> ShufBytes;
2334
2335     // Create the shuffle mask for "rotating" the borrow up one register slot
2336     // once the borrow is generated.
2337     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2338     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2339     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2340     ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2341
2342     SDValue CarryGen =
2343       DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2344     SDValue ShiftedCarry =
2345       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2346                   CarryGen, CarryGen,
2347                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2348                               &ShufBytes[0], ShufBytes.size()));
2349
2350     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2351                        DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2352                                    Op0, Op1, ShiftedCarry));
2353   }
2354
2355   case ISD::SUB: {
2356     // Turn operands into vectors to satisfy type checking (shufb works on
2357     // vectors)
2358     SDValue Op0 =
2359       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2360     SDValue Op1 =
2361       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2362     SmallVector<SDValue, 16> ShufBytes;
2363
2364     // Create the shuffle mask for "rotating" the borrow up one register slot
2365     // once the borrow is generated.
2366     ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2367     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2368     ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2369     ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2370
2371     SDValue BorrowGen =
2372       DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2373     SDValue ShiftedBorrow =
2374       DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2375                   BorrowGen, BorrowGen,
2376                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2377                               &ShufBytes[0], ShufBytes.size()));
2378
2379     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2380                        DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2381                                    Op0, Op1, ShiftedBorrow));
2382   }
2383
2384   case ISD::SHL: {
2385     SDValue ShiftAmt = Op.getOperand(1);
2386     MVT ShiftAmtVT = ShiftAmt.getValueType();
2387     SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2388     SDValue MaskLower =
2389       DAG.getNode(SPUISD::SELB, VecVT,
2390                   Op0Vec,
2391                   DAG.getConstant(0, VecVT),
2392                   DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2393                               DAG.getConstant(0xff00ULL, MVT::i16)));
2394     SDValue ShiftAmtBytes =
2395       DAG.getNode(ISD::SRL, ShiftAmtVT,
2396                   ShiftAmt,
2397                   DAG.getConstant(3, ShiftAmtVT));
2398     SDValue ShiftAmtBits =
2399       DAG.getNode(ISD::AND, ShiftAmtVT,
2400                   ShiftAmt,
2401                   DAG.getConstant(7, ShiftAmtVT));
2402
2403     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2404                        DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2405                                    DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2406                                                MaskLower, ShiftAmtBytes),
2407                                    ShiftAmtBits));
2408   }
2409
2410   case ISD::SRL: {
2411     MVT VT = Op.getValueType();
2412     SDValue ShiftAmt = Op.getOperand(1);
2413     MVT ShiftAmtVT = ShiftAmt.getValueType();
2414     SDValue ShiftAmtBytes =
2415       DAG.getNode(ISD::SRL, ShiftAmtVT,
2416                   ShiftAmt,
2417                   DAG.getConstant(3, ShiftAmtVT));
2418     SDValue ShiftAmtBits =
2419       DAG.getNode(ISD::AND, ShiftAmtVT,
2420                   ShiftAmt,
2421                   DAG.getConstant(7, ShiftAmtVT));
2422
2423     return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2424                        DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2425                                    Op0, ShiftAmtBytes),
2426                        ShiftAmtBits);
2427   }
2428
2429   case ISD::SRA: {
2430     // Promote Op0 to vector
2431     SDValue Op0 =
2432       DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2433     SDValue ShiftAmt = Op.getOperand(1);
2434     MVT ShiftVT = ShiftAmt.getValueType();
2435
2436     // Negate variable shift amounts
2437     if (!isa<ConstantSDNode>(ShiftAmt)) {
2438       ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2439                              DAG.getConstant(0, ShiftVT), ShiftAmt);
2440     }
2441
2442     SDValue UpperHalfSign =
2443       DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2444                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2445                               DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2446                                           Op0, DAG.getConstant(31, MVT::i32))));
2447     SDValue UpperHalfSignMask =
2448       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2449     SDValue UpperLowerMask =
2450       DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2451                   DAG.getConstant(0xff00, MVT::i16));
2452     SDValue UpperLowerSelect =
2453       DAG.getNode(SPUISD::SELB, MVT::v2i64,
2454                   UpperHalfSignMask, Op0, UpperLowerMask);
2455     SDValue RotateLeftBytes =
2456       DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2457                   UpperLowerSelect, ShiftAmt);
2458     SDValue RotateLeftBits =
2459       DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2460                   RotateLeftBytes, ShiftAmt);
2461
2462     return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2463                        RotateLeftBits);
2464   }
2465   }
2466
2467   return SDValue();
2468 }
2469
2470 //! Lower byte immediate operations for v16i8 vectors:
2471 static SDValue
2472 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2473   SDValue ConstVec;
2474   SDValue Arg;
2475   MVT VT = Op.getValueType();
2476
2477   ConstVec = Op.getOperand(0);
2478   Arg = Op.getOperand(1);
2479   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2480     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2481       ConstVec = ConstVec.getOperand(0);
2482     } else {
2483       ConstVec = Op.getOperand(1);
2484       Arg = Op.getOperand(0);
2485       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2486         ConstVec = ConstVec.getOperand(0);
2487       }
2488     }
2489   }
2490
2491   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2492     uint64_t VectorBits[2];
2493     uint64_t UndefBits[2];
2494     uint64_t SplatBits, SplatUndef;
2495     int SplatSize;
2496
2497     if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2498         && isConstantSplat(VectorBits, UndefBits,
2499                            VT.getVectorElementType().getSizeInBits(),
2500                            SplatBits, SplatUndef, SplatSize)) {
2501       SDValue tcVec[16];
2502       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2503       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2504
2505       // Turn the BUILD_VECTOR into a set of target constants:
2506       for (size_t i = 0; i < tcVecSize; ++i)
2507         tcVec[i] = tc;
2508
2509       return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2510                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2511     }
2512   }
2513   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2514   // lowered.  Return the operation, rather than a null SDValue.
2515   return Op;
2516 }
2517
2518 //! Lower i32 multiplication
2519 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2520                           unsigned Opc) {
2521   switch (VT.getSimpleVT()) {
2522   default:
2523     cerr << "CellSPU: Unknown LowerMUL value type, got "
2524          << Op.getValueType().getMVTString()
2525          << "\n";
2526     abort();
2527     /*NOTREACHED*/
2528
2529   case MVT::i32: {
2530     SDValue rA = Op.getOperand(0);
2531     SDValue rB = Op.getOperand(1);
2532
2533     return DAG.getNode(ISD::ADD, MVT::i32,
2534                        DAG.getNode(ISD::ADD, MVT::i32,
2535                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2536                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2537                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2538   }
2539   }
2540
2541   return SDValue();
2542 }
2543
2544 //! Custom lowering for CTPOP (count population)
2545 /*!
2546   Custom lowering code that counts the number ones in the input
2547   operand. SPU has such an instruction, but it counts the number of
2548   ones per byte, which then have to be accumulated.
2549 */
2550 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2551   MVT VT = Op.getValueType();
2552   MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2553
2554   switch (VT.getSimpleVT()) {
2555   default:
2556     assert(false && "Invalid value type!");
2557   case MVT::i8: {
2558     SDValue N = Op.getOperand(0);
2559     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2560
2561     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2562     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2563
2564     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2565   }
2566
2567   case MVT::i16: {
2568     MachineFunction &MF = DAG.getMachineFunction();
2569     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2570
2571     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2572
2573     SDValue N = Op.getOperand(0);
2574     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2575     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2576     SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2577
2578     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2579     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2580
2581     // CNTB_result becomes the chain to which all of the virtual registers
2582     // CNTB_reg, SUM1_reg become associated:
2583     SDValue CNTB_result =
2584       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2585
2586     SDValue CNTB_rescopy =
2587       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2588
2589     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2590
2591     return DAG.getNode(ISD::AND, MVT::i16,
2592                        DAG.getNode(ISD::ADD, MVT::i16,
2593                                    DAG.getNode(ISD::SRL, MVT::i16,
2594                                                Tmp1, Shift1),
2595                                    Tmp1),
2596                        Mask0);
2597   }
2598
2599   case MVT::i32: {
2600     MachineFunction &MF = DAG.getMachineFunction();
2601     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2602
2603     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2604     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2605
2606     SDValue N = Op.getOperand(0);
2607     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2608     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2609     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2610     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2611
2612     SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2613     SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2614
2615     // CNTB_result becomes the chain to which all of the virtual registers
2616     // CNTB_reg, SUM1_reg become associated:
2617     SDValue CNTB_result =
2618       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2619
2620     SDValue CNTB_rescopy =
2621       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2622
2623     SDValue Comp1 =
2624       DAG.getNode(ISD::SRL, MVT::i32,
2625                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2626
2627     SDValue Sum1 =
2628       DAG.getNode(ISD::ADD, MVT::i32,
2629                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2630
2631     SDValue Sum1_rescopy =
2632       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2633
2634     SDValue Comp2 =
2635       DAG.getNode(ISD::SRL, MVT::i32,
2636                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2637                   Shift2);
2638     SDValue Sum2 =
2639       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2640                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2641
2642     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2643   }
2644
2645   case MVT::i64:
2646     break;
2647   }
2648
2649   return SDValue();
2650 }
2651
2652 /// LowerOperation - Provide custom lowering hooks for some operations.
2653 ///
2654 SDValue
2655 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2656 {
2657   unsigned Opc = (unsigned) Op.getOpcode();
2658   MVT VT = Op.getValueType();
2659
2660   switch (Opc) {
2661   default: {
2662     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2663     cerr << "Op.getOpcode() = " << Opc << "\n";
2664     cerr << "*Op.getNode():\n";
2665     Op.getNode()->dump();
2666     abort();
2667   }
2668   case ISD::LOAD:
2669   case ISD::SEXTLOAD:
2670   case ISD::ZEXTLOAD:
2671     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2672   case ISD::STORE:
2673     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2674   case ISD::ConstantPool:
2675     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2676   case ISD::GlobalAddress:
2677     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2678   case ISD::JumpTable:
2679     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2680   case ISD::Constant:
2681     return LowerConstant(Op, DAG);
2682   case ISD::ConstantFP:
2683     return LowerConstantFP(Op, DAG);
2684   case ISD::BRCOND:
2685     return LowerBRCOND(Op, DAG);
2686   case ISD::FORMAL_ARGUMENTS:
2687     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2688   case ISD::CALL:
2689     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::RET:
2691     return LowerRET(Op, DAG, getTargetMachine());
2692
2693
2694   // i8, i64 math ops:
2695   case ISD::ZERO_EXTEND:
2696   case ISD::SIGN_EXTEND:
2697   case ISD::ANY_EXTEND:
2698   case ISD::ADD:
2699   case ISD::SUB:
2700   case ISD::ROTR:
2701   case ISD::ROTL:
2702   case ISD::SRL:
2703   case ISD::SHL:
2704   case ISD::SRA: {
2705     if (VT == MVT::i8)
2706       return LowerI8Math(Op, DAG, Opc);
2707     else if (VT == MVT::i64)
2708       return LowerI64Math(Op, DAG, Opc);
2709     break;
2710   }
2711
2712   // Vector-related lowering.
2713   case ISD::BUILD_VECTOR:
2714     return LowerBUILD_VECTOR(Op, DAG);
2715   case ISD::SCALAR_TO_VECTOR:
2716     return LowerSCALAR_TO_VECTOR(Op, DAG);
2717   case ISD::VECTOR_SHUFFLE:
2718     return LowerVECTOR_SHUFFLE(Op, DAG);
2719   case ISD::EXTRACT_VECTOR_ELT:
2720     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2721   case ISD::INSERT_VECTOR_ELT:
2722     return LowerINSERT_VECTOR_ELT(Op, DAG);
2723
2724   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2725   case ISD::AND:
2726   case ISD::OR:
2727   case ISD::XOR:
2728     return LowerByteImmed(Op, DAG);
2729
2730   // Vector and i8 multiply:
2731   case ISD::MUL:
2732     if (VT.isVector())
2733       return LowerVectorMUL(Op, DAG);
2734     else if (VT == MVT::i8)
2735       return LowerI8Math(Op, DAG, Opc);
2736     else
2737       return LowerMUL(Op, DAG, VT, Opc);
2738
2739   case ISD::FDIV:
2740     if (VT == MVT::f32 || VT == MVT::v4f32)
2741       return LowerFDIVf32(Op, DAG);
2742 //    else if (Op.getValueType() == MVT::f64)
2743 //      return LowerFDIVf64(Op, DAG);
2744     else
2745       assert(0 && "Calling FDIV on unsupported MVT");
2746
2747   case ISD::CTPOP:
2748     return LowerCTPOP(Op, DAG);
2749   }
2750
2751   return SDValue();
2752 }
2753
2754 //===----------------------------------------------------------------------===//
2755 // Target Optimization Hooks
2756 //===----------------------------------------------------------------------===//
2757
2758 SDValue
2759 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2760 {
2761 #if 0
2762   TargetMachine &TM = getTargetMachine();
2763 #endif
2764   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2765   SelectionDAG &DAG = DCI.DAG;
2766   SDValue Op0 = N->getOperand(0);      // everything has at least one operand
2767   SDValue Result;                     // Initially, NULL result
2768
2769   switch (N->getOpcode()) {
2770   default: break;
2771   case ISD::ADD: {
2772     SDValue Op1 = N->getOperand(1);
2773
2774     if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2775       SDValue Op01 = Op0.getOperand(1);
2776       if (Op01.getOpcode() == ISD::Constant
2777           || Op01.getOpcode() == ISD::TargetConstant) {
2778         // (add <const>, (SPUindirect <arg>, <const>)) ->
2779         // (SPUindirect <arg>, <const + const>)
2780         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2781         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2782         SDValue combinedConst =
2783           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2784                           Op0.getValueType());
2785
2786         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2787                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2788         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2789                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2790         return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2791                            Op0.getOperand(0), combinedConst);
2792       }
2793     } else if (isa<ConstantSDNode>(Op0)
2794                && Op1.getOpcode() == SPUISD::IndirectAddr) {
2795       SDValue Op11 = Op1.getOperand(1);
2796       if (Op11.getOpcode() == ISD::Constant
2797           || Op11.getOpcode() == ISD::TargetConstant) {
2798         // (add (SPUindirect <arg>, <const>), <const>) ->
2799         // (SPUindirect <arg>, <const + const>)
2800         ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2801         ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2802         SDValue combinedConst =
2803           DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2804                           Op0.getValueType());
2805
2806         DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2807                    << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2808         DEBUG(cerr << "With:    (SPUindirect <arg>, "
2809                    << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2810
2811         return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2812                            Op1.getOperand(0), combinedConst);
2813       }
2814     }
2815     break;
2816   }
2817   case ISD::SIGN_EXTEND:
2818   case ISD::ZERO_EXTEND:
2819   case ISD::ANY_EXTEND: {
2820     if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2821         N->getValueType(0) == Op0.getValueType()) {
2822       // (any_extend (SPUextract_elt0 <arg>)) ->
2823       // (SPUextract_elt0 <arg>)
2824       // Types must match, however...
2825       DEBUG(cerr << "Replace: ");
2826       DEBUG(N->dump(&DAG));
2827       DEBUG(cerr << "\nWith:    ");
2828       DEBUG(Op0.getNode()->dump(&DAG));
2829       DEBUG(cerr << "\n");
2830
2831       return Op0;
2832     }
2833     break;
2834   }
2835   case SPUISD::IndirectAddr: {
2836     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2837       ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2838       if (CN->getZExtValue() == 0) {
2839         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2840         // (SPUaform <addr>, 0)
2841
2842         DEBUG(cerr << "Replace: ");
2843         DEBUG(N->dump(&DAG));
2844         DEBUG(cerr << "\nWith:    ");
2845         DEBUG(Op0.getNode()->dump(&DAG));
2846         DEBUG(cerr << "\n");
2847
2848         return Op0;
2849       }
2850     }
2851     break;
2852   }
2853   case SPUISD::SHLQUAD_L_BITS:
2854   case SPUISD::SHLQUAD_L_BYTES:
2855   case SPUISD::VEC_SHL:
2856   case SPUISD::VEC_SRL:
2857   case SPUISD::VEC_SRA:
2858   case SPUISD::ROTQUAD_RZ_BYTES:
2859   case SPUISD::ROTQUAD_RZ_BITS: {
2860     SDValue Op1 = N->getOperand(1);
2861
2862     if (isa<ConstantSDNode>(Op1)) {
2863       // Kill degenerate vector shifts:
2864       ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2865
2866       if (CN->getZExtValue() == 0) {
2867         Result = Op0;
2868       }
2869     }
2870     break;
2871   }
2872   case SPUISD::PROMOTE_SCALAR: {
2873     switch (Op0.getOpcode()) {
2874     default:
2875       break;
2876     case ISD::ANY_EXTEND:
2877     case ISD::ZERO_EXTEND:
2878     case ISD::SIGN_EXTEND: {
2879       // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2880       // <arg>
2881       // but only if the SPUpromote_scalar and <arg> types match.
2882       SDValue Op00 = Op0.getOperand(0);
2883       if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2884         SDValue Op000 = Op00.getOperand(0);
2885         if (Op000.getValueType() == N->getValueType(0)) {
2886           Result = Op000;
2887         }
2888       }
2889       break;
2890     }
2891     case SPUISD::EXTRACT_ELT0: {
2892       // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2893       // <arg>
2894       Result = Op0.getOperand(0);
2895       break;
2896     }
2897     }
2898     break;
2899   }
2900   }
2901   // Otherwise, return unchanged.
2902 #if 1
2903   if (Result.getNode()) {
2904     DEBUG(cerr << "\nReplace.SPU: ");
2905     DEBUG(N->dump(&DAG));
2906     DEBUG(cerr << "\nWith:        ");
2907     DEBUG(Result.getNode()->dump(&DAG));
2908     DEBUG(cerr << "\n");
2909   }
2910 #endif
2911
2912   return Result;
2913 }
2914
2915 //===----------------------------------------------------------------------===//
2916 // Inline Assembly Support
2917 //===----------------------------------------------------------------------===//
2918
2919 /// getConstraintType - Given a constraint letter, return the type of
2920 /// constraint it is for this target.
2921 SPUTargetLowering::ConstraintType
2922 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2923   if (ConstraintLetter.size() == 1) {
2924     switch (ConstraintLetter[0]) {
2925     default: break;
2926     case 'b':
2927     case 'r':
2928     case 'f':
2929     case 'v':
2930     case 'y':
2931       return C_RegisterClass;
2932     }
2933   }
2934   return TargetLowering::getConstraintType(ConstraintLetter);
2935 }
2936
2937 std::pair<unsigned, const TargetRegisterClass*>
2938 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2939                                                 MVT VT) const
2940 {
2941   if (Constraint.size() == 1) {
2942     // GCC RS6000 Constraint Letters
2943     switch (Constraint[0]) {
2944     case 'b':   // R1-R31
2945     case 'r':   // R0-R31
2946       if (VT == MVT::i64)
2947         return std::make_pair(0U, SPU::R64CRegisterClass);
2948       return std::make_pair(0U, SPU::R32CRegisterClass);
2949     case 'f':
2950       if (VT == MVT::f32)
2951         return std::make_pair(0U, SPU::R32FPRegisterClass);
2952       else if (VT == MVT::f64)
2953         return std::make_pair(0U, SPU::R64FPRegisterClass);
2954       break;
2955     case 'v':
2956       return std::make_pair(0U, SPU::GPRCRegisterClass);
2957     }
2958   }
2959
2960   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2961 }
2962
2963 //! Compute used/known bits for a SPU operand
2964 void
2965 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2966                                                   const APInt &Mask,
2967                                                   APInt &KnownZero,
2968                                                   APInt &KnownOne,
2969                                                   const SelectionDAG &DAG,
2970                                                   unsigned Depth ) const {
2971 #if 0
2972   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2973 #endif
2974
2975   switch (Op.getOpcode()) {
2976   default:
2977     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2978     break;
2979
2980 #if 0
2981   case CALL:
2982   case SHUFB:
2983   case INSERT_MASK:
2984   case CNTB:
2985 #endif
2986
2987   case SPUISD::PROMOTE_SCALAR: {
2988     SDValue Op0 = Op.getOperand(0);
2989     MVT Op0VT = Op0.getValueType();
2990     unsigned Op0VTBits = Op0VT.getSizeInBits();
2991     uint64_t InMask = Op0VT.getIntegerVTBitMask();
2992     KnownZero |= APInt(Op0VTBits, ~InMask, false);
2993     KnownOne |= APInt(Op0VTBits, InMask, false);
2994     break;
2995   }
2996
2997   case SPUISD::LDRESULT:
2998   case SPUISD::EXTRACT_ELT0:
2999   case SPUISD::EXTRACT_ELT0_CHAINED: {
3000     MVT OpVT = Op.getValueType();
3001     unsigned OpVTBits = OpVT.getSizeInBits();
3002     uint64_t InMask = OpVT.getIntegerVTBitMask();
3003     KnownZero |= APInt(OpVTBits, ~InMask, false);
3004     KnownOne |= APInt(OpVTBits, InMask, false);
3005     break;
3006   }
3007
3008 #if 0
3009   case EXTRACT_I1_ZEXT:
3010   case EXTRACT_I1_SEXT:
3011   case EXTRACT_I8_ZEXT:
3012   case EXTRACT_I8_SEXT:
3013   case MPY:
3014   case MPYU:
3015   case MPYH:
3016   case MPYHH:
3017   case SPUISD::SHLQUAD_L_BITS:
3018   case SPUISD::SHLQUAD_L_BYTES:
3019   case SPUISD::VEC_SHL:
3020   case SPUISD::VEC_SRL:
3021   case SPUISD::VEC_SRA:
3022   case SPUISD::VEC_ROTL:
3023   case SPUISD::VEC_ROTR:
3024   case SPUISD::ROTQUAD_RZ_BYTES:
3025   case SPUISD::ROTQUAD_RZ_BITS:
3026   case SPUISD::ROTBYTES_RIGHT_S:
3027   case SPUISD::ROTBYTES_LEFT:
3028   case SPUISD::ROTBYTES_LEFT_CHAINED:
3029   case SPUISD::SELECT_MASK:
3030   case SPUISD::SELB:
3031   case SPUISD::FPInterp:
3032   case SPUISD::FPRecipEst:
3033   case SPUISD::SEXT32TO64:
3034 #endif
3035   }
3036 }
3037
3038 // LowerAsmOperandForConstraint
3039 void
3040 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3041                                                 char ConstraintLetter,
3042                                                 bool hasMemory,
3043                                                 std::vector<SDValue> &Ops,
3044                                                 SelectionDAG &DAG) const {
3045   // Default, for the time being, to the base class handler
3046   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3047                                                Ops, DAG);
3048 }
3049
3050 /// isLegalAddressImmediate - Return true if the integer value can be used
3051 /// as the offset of the target addressing mode.
3052 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3053                                                 const Type *Ty) const {
3054   // SPU's addresses are 256K:
3055   return (V > -(1 << 18) && V < (1 << 18) - 1);
3056 }
3057
3058 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3059   return false;
3060 }
3061
3062 bool
3063 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3064   // The SPU target isn't yet aware of offsets.
3065   return false;
3066 }