lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "llvm/ADT/VectorExtras.h"
  18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/SelectionDAG.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT::ValueType mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT::ValueType        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << MVT::getValueTypeString(VT)
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an A-form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDOperand &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc == ISD::JumpTable
  94             || Opc == ISD::ConstantPool
  95             || Opc == ISD::ExternalSymbol
  96             || Opc == ISD::TargetGlobalAddress
  97             || Opc == ISD::TargetGlobalTLSAddress
  98             || Opc == ISD::TargetJumpTable
  99             || Opc == ISD::TargetConstantPool
 100             || Opc == ISD::TargetExternalSymbol
 101             || Opc == SPUISD::AFormAddr);
 102   }
 103
 104   //! Predicate that returns true if the operand is an indirect target
 105   bool isIndirectOperand(const SDOperand &Op)
 106   {
 107     const unsigned Opc = Op.getOpcode();
 108     return (Opc == ISD::Register
 109             || Opc == SPUISD::LDRESULT);
 110   }
 111 }
 112
 113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 114   : TargetLowering(TM),
 115     SPUTM(TM)
 116 {
 117   // Fold away setcc operations if possible.
 118   setPow2DivIsCheap();
 119
 120   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 121   setUseUnderscoreSetJmp(true);
 122   setUseUnderscoreLongJmp(true);
 123
 124   // Set up the SPU's register classes:
 125   // NOTE: i8 register class is not registered because we cannot determine when
 126   // we need to zero or sign extend for custom-lowered loads and stores.
 127   // NOTE: Ignore the previous note. For now. :-)
 128   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 129   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 130   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 131   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 132   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 133   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 134   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 135
 136   // SPU has no sign or zero extended loads for i1, i8, i16:
 137   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
 138   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 139   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 140   setTruncStoreAction(MVT::i8, MVT::i1, Custom);
 141   setTruncStoreAction(MVT::i16, MVT::i1, Custom);
 142   setTruncStoreAction(MVT::i32, MVT::i1, Custom);
 143   setTruncStoreAction(MVT::i64, MVT::i1, Custom);
 144   setTruncStoreAction(MVT::i128, MVT::i1, Custom);
 145
 146   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 147   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 148   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 149   setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
 150   setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
 151   setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
 152   setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
 153   setTruncStoreAction(MVT::i128, MVT::i8, Custom);
 154
 155   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 156   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 157   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 158
 159   // SPU constant load actions are custom lowered:
 160   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 161   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
 162   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 163
 164   // SPU's loads and stores have to be custom lowered:
 165   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 166        ++sctype) {
 167     setOperationAction(ISD::LOAD, sctype, Custom);
 168     setOperationAction(ISD::STORE, sctype, Custom);
 169   }
 170
 171   // Custom lower BRCOND for i1, i8 to "promote" the result to
 172   // i32 and i16, respectively.
 173   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 174
 175   // Expand the jumptable branches
 176   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 177   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 178   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 179
 180   // SPU has no intrinsics for these particular operations:
 181   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
 182   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
 183   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
 184
 185   // PowerPC has no SREM/UREM instructions
 186   setOperationAction(ISD::SREM, MVT::i32, Expand);
 187   setOperationAction(ISD::UREM, MVT::i32, Expand);
 188   setOperationAction(ISD::SREM, MVT::i64, Expand);
 189   setOperationAction(ISD::UREM, MVT::i64, Expand);
 190
 191   // We don't support sin/cos/sqrt/fmod
 192   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 193   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 194   setOperationAction(ISD::FREM , MVT::f64, Expand);
 195   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 196   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 197   setOperationAction(ISD::FREM , MVT::f32, Expand);
 198
 199   // If we're enabling GP optimizations, use hardware square root
 200   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 201   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 202
 203   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 204   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 205
 206   // SPU can do rotate right and left, so legalize it... but customize for i8
 207   // because instructions don't exist.
 208   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 209   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 210   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 211   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 212   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 213   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 214   // SPU has no native version of shift left/right for i8
 215   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 216   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 217   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 218
 219   // Custom lower i32 multiplications
 220   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 221
 222   // Need to custom handle (some) common i8 math ops
 223   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 224   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 225
 226   // SPU does not have BSWAP. It does have i32 support CTLZ.
 227   // CTPOP has to be custom lowered.
 228   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 229   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 230
 231   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 232   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 233   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 234   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 235
 236   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 237   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 238
 239   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 240
 241   // SPU does not have select or setcc
 242   setOperationAction(ISD::SELECT, MVT::i1,   Expand);
 243   setOperationAction(ISD::SELECT, MVT::i8,   Expand);
 244   setOperationAction(ISD::SELECT, MVT::i16,  Expand);
 245   setOperationAction(ISD::SELECT, MVT::i32,  Expand);
 246   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 247   setOperationAction(ISD::SELECT, MVT::f32,  Expand);
 248   setOperationAction(ISD::SELECT, MVT::f64,  Expand);
 249
 250   setOperationAction(ISD::SETCC, MVT::i1,   Expand);
 251   setOperationAction(ISD::SETCC, MVT::i8,   Expand);
 252   setOperationAction(ISD::SETCC, MVT::i16,  Expand);
 253   setOperationAction(ISD::SETCC, MVT::i32,  Expand);
 254   setOperationAction(ISD::SETCC, MVT::i64,  Expand);
 255   setOperationAction(ISD::SETCC, MVT::f32,  Expand);
 256   setOperationAction(ISD::SETCC, MVT::f64,  Expand);
 257
 258   // SPU has a legal FP -> signed INT instruction
 259   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 260   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 261   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 262   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 263
 264   // FDIV on SPU requires custom lowering
 265   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 266   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 267
 268   // SPU has [U|S]INT_TO_FP
 269   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 270   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 271   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 272   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 273   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 274   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 275   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 276   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 277
 278   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 279   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 280   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 281   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 282
 283   // We cannot sextinreg(i1).  Expand to shifts.
 284   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 285
 286   // Support label based line numbers.
 287   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 288   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 289
 290   // We want to legalize GlobalAddress and ConstantPool nodes into the
 291   // appropriate instructions to materialize the address.
 292   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 293   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 294   setOperationAction(ISD::ConstantPool,  MVT::f32, Custom);
 295   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 296   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 297   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 298   setOperationAction(ISD::ConstantPool,  MVT::f64, Custom);
 299   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 300
 301   // RET must be custom lowered, to meet ABI requirements
 302   setOperationAction(ISD::RET,           MVT::Other, Custom);
 303
 304   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 305   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 306
 307   // Use the default implementation.
 308   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 309   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 310   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 311   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 312   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 313   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 314   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 315
 316   // Cell SPU has instructions for converting between i64 and fp.
 317   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 318   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 319
 320   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 321   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 322
 323   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 324   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 325
 326   // First set operation action for all vector types to expand. Then we
 327   // will selectively turn on ones that can be effectively codegen'd.
 328   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 329   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 330   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 331   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 332   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 333   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 334
 335   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 336        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
 337     // add/sub are legal for all supported vector VT's.
 338     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
 339     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
 340     // mul has to be custom lowered.
 341     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
 342
 343     setOperationAction(ISD::AND   , (MVT::ValueType)VT, Legal);
 344     setOperationAction(ISD::OR    , (MVT::ValueType)VT, Legal);
 345     setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Legal);
 346     setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Legal);
 347     setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
 348     setOperationAction(ISD::STORE,  (MVT::ValueType)VT, Legal);
 349
 350     // These operations need to be expanded:
 351     setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
 352     setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
 353     setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
 354     setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
 355     setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
 356
 357     // Custom lower build_vector, constant pool spills, insert and
 358     // extract vector elements:
 359     setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
 360     setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
 361     setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
 362     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 363     setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 364     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
 365   }
 366
 367   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 368   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 369   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 370   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 371   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 372
 373   setSetCCResultType(MVT::i32);
 374   setShiftAmountType(MVT::i32);
 375   setSetCCResultContents(ZeroOrOneSetCCResult);
 376
 377   setStackPointerRegisterToSaveRestore(SPU::R1);
 378
 379   // We have target-specific dag combine patterns for the following nodes:
 380   // e.g., setTargetDAGCombine(ISD::SUB);
 381
 382   computeRegisterProperties();
 383 }
 384
 385 const char *
 386 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 387 {
 388   if (node_names.empty()) {
 389     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 390     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 391     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 392     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 393     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 394     node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
 395     node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
 396     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 397     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 398     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 399     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 400     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 401     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 402     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 403     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 404     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 405     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 406     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 407     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 408     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 409     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 410     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 411     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 412     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 413     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 414     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 415     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 416     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 417     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
 418       "SPUISD::ROTBYTES_RIGHT_Z";
 419     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 420       "SPUISD::ROTBYTES_RIGHT_S";
 421     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 422     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 423       "SPUISD::ROTBYTES_LEFT_CHAINED";
 424     node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
 425     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 426     node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
 427     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 428     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 429     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 430   }
 431
 432   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 433
 434   return ((i != node_names.end()) ? i->second : 0);
 435 }
 436
 437 //===----------------------------------------------------------------------===//
 438 // Calling convention code:
 439 //===----------------------------------------------------------------------===//
 440
 441 #include "SPUGenCallingConv.inc"
 442
 443 //===----------------------------------------------------------------------===//
 444 //  LowerOperation implementation
 445 //===----------------------------------------------------------------------===//
 446
 447 /// Aligned load common code for CellSPU
 448 /*!
 449   \param[in] Op The SelectionDAG load or store operand
 450   \param[in] DAG The selection DAG
 451   \param[in] ST CellSPU subtarget information structure
 452   \param[in,out] alignment Caller initializes this to the load or store node's
 453   value from getAlignment(), may be updated while generating the aligned load
 454   \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
 455   offset (divisible by 16, modulo 16 == 0)
 456   \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
 457   offset of the preferred slot (modulo 16 != 0)
 458   \param[in,out] VT Caller initializes this value type to the the load or store
 459   node's loaded or stored value type; may be updated if an i1-extended load or
 460   store.
 461   \param[out] was16aligned true if the base pointer had 16-byte alignment,
 462   otherwise false. Can help to determine if the chunk needs to be rotated.
 463
 464  Both load and store lowering load a block of data aligned on a 16-byte
 465  boundary. This is the common aligned load code shared between both.
 466  */
 467 static SDOperand
 468 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
 469             LSBaseSDNode *LSN,
 470             unsigned &alignment, int &alignOffs, int &prefSlotOffs,
 471             MVT::ValueType &VT, bool &was16aligned)
 472 {
 473   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 474   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 475   SDOperand basePtr = LSN->getBasePtr();
 476   SDOperand chain = LSN->getChain();
 477
 478   if (basePtr.getOpcode() == ISD::ADD) {
 479     SDOperand Op1 = basePtr.Val->getOperand(1);
 480
 481     if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
 482       const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
 483
 484       alignOffs = (int) CN->getValue();
 485       prefSlotOffs = (int) (alignOffs & 0xf);
 486
 487       // Adjust the rotation amount to ensure that the final result ends up in
 488       // the preferred slot:
 489       prefSlotOffs -= vtm->prefslot_byte;
 490       basePtr = basePtr.getOperand(0);
 491
 492       // Loading from memory, can we adjust alignment?
 493       if (basePtr.getOpcode() == SPUISD::AFormAddr) {
 494         SDOperand APtr = basePtr.getOperand(0);
 495         if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
 496           GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
 497           alignment = GSDN->getGlobal()->getAlignment();
 498         }
 499       }
 500     } else {
 501       alignOffs = 0;
 502       prefSlotOffs = -vtm->prefslot_byte;
 503     }
 504   } else {
 505     alignOffs = 0;
 506     prefSlotOffs = -vtm->prefslot_byte;
 507   }
 508
 509   if (alignment == 16) {
 510     // Realign the base pointer as a D-Form address:
 511     if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
 512       basePtr = DAG.getNode(ISD::ADD, PtrVT,
 513                             basePtr,
 514                             DAG.getConstant((alignOffs & ~0xf), PtrVT));
 515     }
 516
 517     // Emit the vector load:
 518     was16aligned = true;
 519     return DAG.getLoad(MVT::v16i8, chain, basePtr,
 520                        LSN->getSrcValue(), LSN->getSrcValueOffset(),
 521                        LSN->isVolatile(), 16);
 522   }
 523
 524   // Unaligned load or we're using the "large memory" model, which means that
 525   // we have to be very pessimistic:
 526   if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
 527     basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
 528   }
 529
 530   // Add the offset
 531   basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
 532   was16aligned = false;
 533   return DAG.getLoad(MVT::v16i8, chain, basePtr,
 534                      LSN->getSrcValue(), LSN->getSrcValueOffset(),
 535                      LSN->isVolatile(), 16);
 536 }
 537
 538 /// Custom lower loads for CellSPU
 539 /*!
 540  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 541  within a 16-byte block, we have to rotate to extract the requested element.
 542  */
 543 static SDOperand
 544 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 545   LoadSDNode *LN = cast<LoadSDNode>(Op);
 546   SDOperand the_chain = LN->getChain();
 547   MVT::ValueType VT = LN->getLoadedVT();
 548   MVT::ValueType OpVT = Op.Val->getValueType(0);
 549   ISD::LoadExtType ExtType = LN->getExtensionType();
 550   unsigned alignment = LN->getAlignment();
 551   SDOperand Ops[8];
 552
 553   switch (LN->getAddressingMode()) {
 554   case ISD::UNINDEXED: {
 555     int offset, rotamt;
 556     bool was16aligned;
 557     SDOperand result =
 558       AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
 559
 560     if (result.Val == 0)
 561       return result;
 562
 563     the_chain = result.getValue(1);
 564     // Rotate the chunk if necessary
 565     if (rotamt < 0)
 566       rotamt += 16;
 567     if (rotamt != 0 || !was16aligned) {
 568       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 569
 570       Ops[0] = the_chain;
 571       Ops[1] = result;
 572       if (was16aligned) {
 573         Ops[2] = DAG.getConstant(rotamt, MVT::i16);
 574       } else {
 575         MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 576         LoadSDNode *LN1 = cast<LoadSDNode>(result);
 577         Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
 578                              DAG.getConstant(rotamt, PtrVT));
 579       }
 580
 581       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 582       the_chain = result.getValue(1);
 583     }
 584
 585     if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 586       SDVTList scalarvts;
 587       MVT::ValueType vecVT = MVT::v16i8;
 588
 589       // Convert the loaded v16i8 vector to the appropriate vector type
 590       // specified by the operand:
 591       if (OpVT == VT) {
 592         if (VT != MVT::i1)
 593           vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 594       } else
 595         vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
 596
 597       Ops[0] = the_chain;
 598       Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
 599       scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
 600       result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 601       the_chain = result.getValue(1);
 602     } else {
 603       // Handle the sign and zero-extending loads for i1 and i8:
 604       unsigned NewOpC;
 605
 606       if (ExtType == ISD::SEXTLOAD) {
 607         NewOpC = (OpVT == MVT::i1
 608                   ? SPUISD::EXTRACT_I1_SEXT
 609                   : SPUISD::EXTRACT_I8_SEXT);
 610       } else {
 611         assert(ExtType == ISD::ZEXTLOAD);
 612         NewOpC = (OpVT == MVT::i1
 613                   ? SPUISD::EXTRACT_I1_ZEXT
 614                   : SPUISD::EXTRACT_I8_ZEXT);
 615       }
 616
 617       result = DAG.getNode(NewOpC, OpVT, result);
 618     }
 619
 620     SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 621     SDOperand retops[3] = {
 622       result,
 623       the_chain,
 624       DAG.getConstant(alignment, MVT::i32)
 625     };
 626
 627     result = DAG.getNode(SPUISD::LDRESULT, retvts,
 628                          retops, sizeof(retops) / sizeof(retops[0]));
 629     return result;
 630   }
 631   case ISD::PRE_INC:
 632   case ISD::PRE_DEC:
 633   case ISD::POST_INC:
 634   case ISD::POST_DEC:
 635   case ISD::LAST_INDEXED_MODE:
 636     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 637             "UNINDEXED\n";
 638     cerr << (unsigned) LN->getAddressingMode() << "\n";
 639     abort();
 640     /*NOTREACHED*/
 641   }
 642
 643   return SDOperand();
 644 }
 645
 646 /// Custom lower stores for CellSPU
 647 /*!
 648  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 649  within a 16-byte block, we have to generate a shuffle to insert the
 650  requested element into its place, then store the resulting block.
 651  */
 652 static SDOperand
 653 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 654   StoreSDNode *SN = cast<StoreSDNode>(Op);
 655   SDOperand Value = SN->getValue();
 656   MVT::ValueType VT = Value.getValueType();
 657   MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
 658   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 659   unsigned alignment = SN->getAlignment();
 660
 661   switch (SN->getAddressingMode()) {
 662   case ISD::UNINDEXED: {
 663     int chunk_offset, slot_offset;
 664     bool was16aligned;
 665
 666     // The vector type we really want to load from the 16-byte chunk, except
 667     // in the case of MVT::i1, which has to be v16i8.
 668     unsigned vecVT, stVecVT = MVT::v16i8;
 669
 670     if (StVT != MVT::i1)
 671       stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
 672     vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 673
 674     SDOperand alignLoadVec =
 675       AlignedLoad(Op, DAG, ST, SN, alignment,
 676                   chunk_offset, slot_offset, VT, was16aligned);
 677
 678     if (alignLoadVec.Val == 0)
 679       return alignLoadVec;
 680
 681     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 682     SDOperand basePtr = LN->getBasePtr();
 683     SDOperand the_chain = alignLoadVec.getValue(1);
 684     SDOperand theValue = SN->getValue();
 685     SDOperand result;
 686
 687     if (StVT != VT
 688         && (theValue.getOpcode() == ISD::AssertZext
 689             || theValue.getOpcode() == ISD::AssertSext)) {
 690       // Drill down and get the value for zero- and sign-extended
 691       // quantities
 692       theValue = theValue.getOperand(0);
 693     }
 694
 695     chunk_offset &= 0xf;
 696
 697     SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
 698     SDOperand insertEltPtr;
 699     SDOperand insertEltOp;
 700
 701     // If the base pointer is already a D-form address, then just create
 702     // a new D-form address with a slot offset and the orignal base pointer.
 703     // Otherwise generate a D-form address with the slot offset relative
 704     // to the stack pointer, which is always aligned.
 705     DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
 706     DEBUG(basePtr.Val->dump(&DAG));
 707     DEBUG(cerr << "\n");
 708
 709     if (basePtr.getOpcode() == SPUISD::DFormAddr) {
 710       // Hmmmm... do we ever actually hit this code?
 711       insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 712                                  basePtr.getOperand(0),
 713                                  insertEltOffs);
 714     } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
 715                (basePtr.getOpcode() == ISD::ADD
 716                 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
 717       insertEltPtr = basePtr;
 718     } else {
 719       // $sp is always aligned, so use it instead of potentially loading an
 720       // address into a new register:
 721       insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 722                                  DAG.getRegister(SPU::R1, PtrVT),
 723                                  insertEltOffs);
 724     }
 725
 726     insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
 727     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 728                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 729                          alignLoadVec,
 730                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 731
 732     result = DAG.getStore(the_chain, result, basePtr,
 733                           LN->getSrcValue(), LN->getSrcValueOffset(),
 734                           LN->isVolatile(), LN->getAlignment());
 735
 736     return result;
 737     /*UNREACHED*/
 738   }
 739   case ISD::PRE_INC:
 740   case ISD::PRE_DEC:
 741   case ISD::POST_INC:
 742   case ISD::POST_DEC:
 743   case ISD::LAST_INDEXED_MODE:
 744     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 745             "UNINDEXED\n";
 746     cerr << (unsigned) SN->getAddressingMode() << "\n";
 747     abort();
 748     /*NOTREACHED*/
 749   }
 750
 751   return SDOperand();
 752 }
 753
 754 /// Generate the address of a constant pool entry.
 755 static SDOperand
 756 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 757   MVT::ValueType PtrVT = Op.getValueType();
 758   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 759   Constant *C = CP->getConstVal();
 760   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 761   SDOperand Zero = DAG.getConstant(0, PtrVT);
 762   const TargetMachine &TM = DAG.getTarget();
 763
 764   if (TM.getRelocationModel() == Reloc::Static) {
 765     if (!ST->usingLargeMem()) {
 766       // Just return the SDOperand with the constant pool address in it.
 767       return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
 768     } else {
 769 #if 1
 770       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 771       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 772
 773       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 774 #else
 775       return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
 776 #endif
 777     }
 778   }
 779
 780   assert(0 &&
 781          "LowerConstantPool: Relocation model other than static not supported.");
 782   return SDOperand();
 783 }
 784
 785 static SDOperand
 786 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 787   MVT::ValueType PtrVT = Op.getValueType();
 788   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 789   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 790   SDOperand Zero = DAG.getConstant(0, PtrVT);
 791   const TargetMachine &TM = DAG.getTarget();
 792
 793   if (TM.getRelocationModel() == Reloc::Static) {
 794     return (!ST->usingLargeMem()
 795             ? DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero)
 796             : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
 797   }
 798
 799   assert(0 &&
 800          "LowerJumpTable: Relocation model other than static not supported.");
 801   return SDOperand();
 802 }
 803
 804 static SDOperand
 805 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 806   MVT::ValueType PtrVT = Op.getValueType();
 807   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 808   GlobalValue *GV = GSDN->getGlobal();
 809   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 810   const TargetMachine &TM = DAG.getTarget();
 811   SDOperand Zero = DAG.getConstant(0, PtrVT);
 812
 813   if (TM.getRelocationModel() == Reloc::Static) {
 814     return (!ST->usingLargeMem()
 815             ? DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero)
 816             : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
 817   } else {
 818     cerr << "LowerGlobalAddress: Relocation model other than static not "
 819          << "supported.\n";
 820     abort();
 821     /*NOTREACHED*/
 822   }
 823
 824   return SDOperand();
 825 }
 826
 827 //! Custom lower i64 integer constants
 828 /*!
 829  This code inserts all of the necessary juggling that needs to occur to load
 830  a 64-bit constant into a register.
 831  */
 832 static SDOperand
 833 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 834   unsigned VT = Op.getValueType();
 835   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 836
 837   if (VT == MVT::i64) {
 838     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 839     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 840                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 841
 842   } else {
 843     cerr << "LowerConstant: unhandled constant type "
 844          << MVT::getValueTypeString(VT)
 845          << "\n";
 846     abort();
 847     /*NOTREACHED*/
 848   }
 849
 850   return SDOperand();
 851 }
 852
 853 //! Custom lower single precision floating point constants
 854 /*!
 855   "float" immediates can be lowered as if they were unsigned 32-bit integers.
 856   The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
 857   target description.
 858  */
 859 static SDOperand
 860 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 861   unsigned VT = Op.getValueType();
 862   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 863
 864   assert((FP != 0) &&
 865          "LowerConstantFP: Node is not ConstantFPSDNode");
 866
 867   if (VT == MVT::f32) {
 868     float targetConst = FP->getValueAPF().convertToFloat();
 869     return DAG.getNode(SPUISD::SFPConstant, VT,
 870                        DAG.getTargetConstantFP(targetConst, VT));
 871   } else if (VT == MVT::f64) {
 872     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 873     return DAG.getNode(ISD::BIT_CONVERT, VT,
 874                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 875   }
 876
 877   return SDOperand();
 878 }
 879
 880 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
 881 static SDOperand
 882 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
 883 {
 884   SDOperand Cond = Op.getOperand(1);
 885   MVT::ValueType CondVT = Cond.getValueType();
 886   MVT::ValueType CondNVT;
 887
 888   if (CondVT == MVT::i1 || CondVT == MVT::i8) {
 889     CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
 890     return DAG.getNode(ISD::BRCOND, Op.getValueType(),
 891                       Op.getOperand(0),
 892                       DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
 893                       Op.getOperand(2));
 894   } else
 895     return SDOperand();                // Unchanged
 896 }
 897
 898 static SDOperand
 899 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 900 {
 901   MachineFunction &MF = DAG.getMachineFunction();
 902   MachineFrameInfo *MFI = MF.getFrameInfo();
 903   MachineRegisterInfo &RegInfo = MF.getRegInfo();
 904   SmallVector<SDOperand, 8> ArgValues;
 905   SDOperand Root = Op.getOperand(0);
 906   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 907
 908   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 909   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 910
 911   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 912   unsigned ArgRegIdx = 0;
 913   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 914
 915   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 916
 917   // Add DAG nodes to load the arguments or copy them out of registers.
 918   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 919     SDOperand ArgVal;
 920     bool needsLoad = false;
 921     MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
 922     unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
 923
 924     switch (ObjectVT) {
 925     default: {
 926       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 927            << MVT::getValueTypeString(ObjectVT)
 928            << "\n";
 929       abort();
 930     }
 931     case MVT::i8:
 932       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 933         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
 934         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 935         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 936         ++ArgRegIdx;
 937       } else {
 938         needsLoad = true;
 939       }
 940       break;
 941     case MVT::i16:
 942       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 943         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
 944         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 945         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 946         ++ArgRegIdx;
 947       } else {
 948         needsLoad = true;
 949       }
 950       break;
 951     case MVT::i32:
 952       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 953         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 954         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 955         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 956         ++ArgRegIdx;
 957       } else {
 958         needsLoad = true;
 959       }
 960       break;
 961     case MVT::i64:
 962       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 963         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
 964         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 965         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 966         ++ArgRegIdx;
 967       } else {
 968         needsLoad = true;
 969       }
 970       break;
 971     case MVT::f32:
 972       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 973         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
 974         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 975         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 976         ++ArgRegIdx;
 977       } else {
 978         needsLoad = true;
 979       }
 980       break;
 981     case MVT::f64:
 982       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 983         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
 984         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 985         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
 986         ++ArgRegIdx;
 987       } else {
 988         needsLoad = true;
 989       }
 990       break;
 991     case MVT::v2f64:
 992     case MVT::v4f32:
 993     case MVT::v4i32:
 994     case MVT::v8i16:
 995     case MVT::v16i8:
 996       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 997         unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
 998         RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 999         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1000         ++ArgRegIdx;
1001       } else {
1002         needsLoad = true;
1003       }
1004       break;
1005     }
1006
1007     // We need to load the argument to a virtual register if we determined above
1008     // that we ran out of physical registers of the appropriate type
1009     if (needsLoad) {
1010       // If the argument is actually used, emit a load from the right stack
1011       // slot.
1012       if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1013         int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1014         SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1015         ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016       } else {
1017         // Don't emit a dead load.
1018         ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1019       }
1020
1021       ArgOffset += StackSlotSize;
1022     }
1023
1024     ArgValues.push_back(ArgVal);
1025   }
1026
1027   // If the function takes variable number of arguments, make a frame index for
1028   // the start of the first vararg value... for expansion of llvm.va_start.
1029   if (isVarArg) {
1030     VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1031                                                ArgOffset);
1032     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1033     // If this function is vararg, store any remaining integer argument regs to
1034     // their spots on the stack so that they may be loaded by deferencing the
1035     // result of va_next.
1036     SmallVector<SDOperand, 8> MemOps;
1037     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1038       unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1039       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1040       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1041       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1042       MemOps.push_back(Store);
1043       // Increment the address by four for the next argument to store
1044       SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1045       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1046     }
1047     if (!MemOps.empty())
1048       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1049   }
1050
1051   ArgValues.push_back(Root);
1052
1053   // Return the new list of results.
1054   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1055                                     Op.Val->value_end());
1056   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1057 }
1058
1059 /// isLSAAddress - Return the immediate to use if the specified
1060 /// value is representable as a LSA address.
1061 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1062   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1063   if (!C) return 0;
1064
1065   int Addr = C->getValue();
1066   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1067       (Addr << 14 >> 14) != Addr)
1068     return 0;  // Top 14 bits have to be sext of immediate.
1069
1070   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1071 }
1072
1073 static
1074 SDOperand
1075 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1076   SDOperand Chain = Op.getOperand(0);
1077 #if 0
1078   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1079   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1080 #endif
1081   SDOperand Callee    = Op.getOperand(4);
1082   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1083   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1084   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1085   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1086
1087   // Handy pointer type
1088   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1089
1090   // Accumulate how many bytes are to be pushed on the stack, including the
1091   // linkage area, and parameter passing area.  According to the SPU ABI,
1092   // we minimally need space for [LR] and [SP]
1093   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1094
1095   // Set up a copy of the stack pointer for use loading and storing any
1096   // arguments that may not fit in the registers available for argument
1097   // passing.
1098   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1099
1100   // Figure out which arguments are going to go in registers, and which in
1101   // memory.
1102   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1103   unsigned ArgRegIdx = 0;
1104
1105   // Keep track of registers passing arguments
1106   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1107   // And the arguments passed on the stack
1108   SmallVector<SDOperand, 8> MemOpChains;
1109
1110   for (unsigned i = 0; i != NumOps; ++i) {
1111     SDOperand Arg = Op.getOperand(5+2*i);
1112
1113     // PtrOff will be used to store the current argument to the stack if a
1114     // register cannot be found for it.
1115     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1116     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1117
1118     switch (Arg.getValueType()) {
1119     default: assert(0 && "Unexpected ValueType for argument!");
1120     case MVT::i32:
1121     case MVT::i64:
1122     case MVT::i128:
1123       if (ArgRegIdx != NumArgRegs) {
1124         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1125       } else {
1126         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1127         ArgOffset += StackSlotSize;
1128       }
1129       break;
1130     case MVT::f32:
1131     case MVT::f64:
1132       if (ArgRegIdx != NumArgRegs) {
1133         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134       } else {
1135         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136         ArgOffset += StackSlotSize;
1137       }
1138       break;
1139     case MVT::v4f32:
1140     case MVT::v4i32:
1141     case MVT::v8i16:
1142     case MVT::v16i8:
1143       if (ArgRegIdx != NumArgRegs) {
1144         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145       } else {
1146         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1147         ArgOffset += StackSlotSize;
1148       }
1149       break;
1150     }
1151   }
1152
1153   // Update number of stack bytes actually used, insert a call sequence start
1154   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1155   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1156
1157   if (!MemOpChains.empty()) {
1158     // Adjust the stack pointer for the stack arguments.
1159     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1160                         &MemOpChains[0], MemOpChains.size());
1161   }
1162
1163   // Build a sequence of copy-to-reg nodes chained together with token chain
1164   // and flag operands which copy the outgoing args into the appropriate regs.
1165   SDOperand InFlag;
1166   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1167     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1168                              InFlag);
1169     InFlag = Chain.getValue(1);
1170   }
1171
1172   std::vector<MVT::ValueType> NodeTys;
1173   NodeTys.push_back(MVT::Other);   // Returns a chain
1174   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1175
1176   SmallVector<SDOperand, 8> Ops;
1177   unsigned CallOpc = SPUISD::CALL;
1178
1179   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1180   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1181   // node so that legalize doesn't hack it.
1182   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1183     GlobalValue *GV = G->getGlobal();
1184     unsigned CalleeVT = Callee.getValueType();
1185     SDOperand Zero = DAG.getConstant(0, PtrVT);
1186     SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1187
1188     if (!ST->usingLargeMem()) {
1189       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1190       // style calls, otherwise, external symbols are BRASL calls. This assumes
1191       // that declared/defined symbols are in the same compilation unit and can
1192       // be reached through PC-relative jumps.
1193       //
1194       // NOTE:
1195       // This may be an unsafe assumption for JIT and really large compilation
1196       // units.
1197       if (GV->isDeclaration()) {
1198         Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1199       } else {
1200         Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1201       }
1202     } else {
1203       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1204       // address pairs:
1205       Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
1206     }
1207   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1208     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1209   else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1210     // If this is an absolute destination address that appears to be a legal
1211     // local store address, use the munged value.
1212     Callee = SDOperand(Dest, 0);
1213   }
1214
1215   Ops.push_back(Chain);
1216   Ops.push_back(Callee);
1217
1218   // Add argument registers to the end of the list so that they are known live
1219   // into the call.
1220   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1221     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1222                                   RegsToPass[i].second.getValueType()));
1223
1224   if (InFlag.Val)
1225     Ops.push_back(InFlag);
1226   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1227   InFlag = Chain.getValue(1);
1228
1229   SDOperand ResultVals[3];
1230   unsigned NumResults = 0;
1231   NodeTys.clear();
1232
1233   // If the call has results, copy the values out of the ret val registers.
1234   switch (Op.Val->getValueType(0)) {
1235   default: assert(0 && "Unexpected ret value!");
1236   case MVT::Other: break;
1237   case MVT::i32:
1238     if (Op.Val->getValueType(1) == MVT::i32) {
1239       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1240       ResultVals[0] = Chain.getValue(0);
1241       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1242                                  Chain.getValue(2)).getValue(1);
1243       ResultVals[1] = Chain.getValue(0);
1244       NumResults = 2;
1245       NodeTys.push_back(MVT::i32);
1246     } else {
1247       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1248       ResultVals[0] = Chain.getValue(0);
1249       NumResults = 1;
1250     }
1251     NodeTys.push_back(MVT::i32);
1252     break;
1253   case MVT::i64:
1254     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1255     ResultVals[0] = Chain.getValue(0);
1256     NumResults = 1;
1257     NodeTys.push_back(MVT::i64);
1258     break;
1259   case MVT::f32:
1260   case MVT::f64:
1261     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1262                                InFlag).getValue(1);
1263     ResultVals[0] = Chain.getValue(0);
1264     NumResults = 1;
1265     NodeTys.push_back(Op.Val->getValueType(0));
1266     break;
1267   case MVT::v2f64:
1268   case MVT::v4f32:
1269   case MVT::v4i32:
1270   case MVT::v8i16:
1271   case MVT::v16i8:
1272     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1273                                    InFlag).getValue(1);
1274     ResultVals[0] = Chain.getValue(0);
1275     NumResults = 1;
1276     NodeTys.push_back(Op.Val->getValueType(0));
1277     break;
1278   }
1279
1280   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1281                       DAG.getConstant(NumStackBytes, PtrVT));
1282   NodeTys.push_back(MVT::Other);
1283
1284   // If the function returns void, just return the chain.
1285   if (NumResults == 0)
1286     return Chain;
1287
1288   // Otherwise, merge everything together with a MERGE_VALUES node.
1289   ResultVals[NumResults++] = Chain;
1290   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1291                               ResultVals, NumResults);
1292   return Res.getValue(Op.ResNo);
1293 }
1294
1295 static SDOperand
1296 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1297   SmallVector<CCValAssign, 16> RVLocs;
1298   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1299   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1300   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1301   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1302
1303   // If this is the first return lowered for this function, add the regs to the
1304   // liveout set for the function.
1305   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1306     for (unsigned i = 0; i != RVLocs.size(); ++i)
1307       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1308   }
1309
1310   SDOperand Chain = Op.getOperand(0);
1311   SDOperand Flag;
1312
1313   // Copy the result values into the output registers.
1314   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1315     CCValAssign &VA = RVLocs[i];
1316     assert(VA.isRegLoc() && "Can only return in registers!");
1317     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1318     Flag = Chain.getValue(1);
1319   }
1320
1321   if (Flag.Val)
1322     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1323   else
1324     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1325 }
1326
1327
1328 //===----------------------------------------------------------------------===//
1329 // Vector related lowering:
1330 //===----------------------------------------------------------------------===//
1331
1332 static ConstantSDNode *
1333 getVecImm(SDNode *N) {
1334   SDOperand OpVal(0, 0);
1335
1336   // Check to see if this buildvec has a single non-undef value in its elements.
1337   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1338     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1339     if (OpVal.Val == 0)
1340       OpVal = N->getOperand(i);
1341     else if (OpVal != N->getOperand(i))
1342       return 0;
1343   }
1344
1345   if (OpVal.Val != 0) {
1346     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1347       return CN;
1348     }
1349   }
1350
1351   return 0; // All UNDEF: use implicit def.; not Constant node
1352 }
1353
1354 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1355 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1356 /// constant
1357 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1358                               MVT::ValueType ValueType) {
1359   if (ConstantSDNode *CN = getVecImm(N)) {
1360     uint64_t Value = CN->getValue();
1361     if (Value <= 0x3ffff)
1362       return DAG.getConstant(Value, ValueType);
1363   }
1364
1365   return SDOperand();
1366 }
1367
1368 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1369 /// and the value fits into a signed 16-bit constant, and if so, return the
1370 /// constant
1371 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1372                               MVT::ValueType ValueType) {
1373   if (ConstantSDNode *CN = getVecImm(N)) {
1374     if (ValueType == MVT::i32) {
1375       int Value = (int) CN->getValue();
1376       int SExtValue = ((Value & 0xffff) << 16) >> 16;
1377
1378       if (Value == SExtValue)
1379         return DAG.getConstant(Value, ValueType);
1380     } else if (ValueType == MVT::i16) {
1381       short Value = (short) CN->getValue();
1382       int SExtValue = ((int) Value << 16) >> 16;
1383
1384       if (Value == (short) SExtValue)
1385         return DAG.getConstant(Value, ValueType);
1386     } else if (ValueType == MVT::i64) {
1387       int64_t Value = CN->getValue();
1388       int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1389
1390       if (Value == SExtValue)
1391         return DAG.getConstant(Value, ValueType);
1392     }
1393   }
1394
1395   return SDOperand();
1396 }
1397
1398 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1399 /// and the value fits into a signed 10-bit constant, and if so, return the
1400 /// constant
1401 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1402                               MVT::ValueType ValueType) {
1403   if (ConstantSDNode *CN = getVecImm(N)) {
1404     int Value = (int) CN->getValue();
1405     if ((ValueType == MVT::i32 && isS10Constant(Value))
1406         || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1407       return DAG.getConstant(Value, ValueType);
1408   }
1409
1410   return SDOperand();
1411 }
1412
1413 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1414 /// and the value fits into a signed 8-bit constant, and if so, return the
1415 /// constant.
1416 ///
1417 /// @note: The incoming vector is v16i8 because that's the only way we can load
1418 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1419 /// same value.
1420 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1421                              MVT::ValueType ValueType) {
1422   if (ConstantSDNode *CN = getVecImm(N)) {
1423     int Value = (int) CN->getValue();
1424     if (ValueType == MVT::i16
1425         && Value <= 0xffff                 /* truncated from uint64_t */
1426         && ((short) Value >> 8) == ((short) Value & 0xff))
1427       return DAG.getConstant(Value & 0xff, ValueType);
1428     else if (ValueType == MVT::i8
1429              && (Value & 0xff) == Value)
1430       return DAG.getConstant(Value, ValueType);
1431   }
1432
1433   return SDOperand();
1434 }
1435
1436 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1437 /// and the value fits into a signed 16-bit constant, and if so, return the
1438 /// constant
1439 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1440                                MVT::ValueType ValueType) {
1441   if (ConstantSDNode *CN = getVecImm(N)) {
1442     uint64_t Value = CN->getValue();
1443     if ((ValueType == MVT::i32
1444           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1445         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1446       return DAG.getConstant(Value >> 16, ValueType);
1447   }
1448
1449   return SDOperand();
1450 }
1451
1452 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1453 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1454   if (ConstantSDNode *CN = getVecImm(N)) {
1455     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1456   }
1457
1458   return SDOperand();
1459 }
1460
1461 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1462 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1463   if (ConstantSDNode *CN = getVecImm(N)) {
1464     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1465   }
1466
1467   return SDOperand();
1468 }
1469
1470 // If this is a vector of constants or undefs, get the bits.  A bit in
1471 // UndefBits is set if the corresponding element of the vector is an
1472 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1473 // zero.   Return true if this is not an array of constants, false if it is.
1474 //
1475 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1476                                        uint64_t UndefBits[2]) {
1477   // Start with zero'd results.
1478   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1479
1480   unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1481   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1482     SDOperand OpVal = BV->getOperand(i);
1483
1484     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1485     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1486
1487     uint64_t EltBits = 0;
1488     if (OpVal.getOpcode() == ISD::UNDEF) {
1489       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1490       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1491       continue;
1492     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1493       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1494     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1495       const APFloat &apf = CN->getValueAPF();
1496       EltBits = (CN->getValueType(0) == MVT::f32
1497                  ? FloatToBits(apf.convertToFloat())
1498                  : DoubleToBits(apf.convertToDouble()));
1499     } else {
1500       // Nonconstant element.
1501       return true;
1502     }
1503
1504     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1505   }
1506
1507   //printf("%llx %llx  %llx %llx\n",
1508   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1509   return false;
1510 }
1511
1512 /// If this is a splat (repetition) of a value across the whole vector, return
1513 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1514 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1515 /// SplatSize = 1 byte.
1516 static bool isConstantSplat(const uint64_t Bits128[2],
1517                             const uint64_t Undef128[2],
1518                             int MinSplatBits,
1519                             uint64_t &SplatBits, uint64_t &SplatUndef,
1520                             int &SplatSize) {
1521   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1522   // the same as the lower 64-bits, ignoring undefs.
1523   uint64_t Bits64  = Bits128[0] | Bits128[1];
1524   uint64_t Undef64 = Undef128[0] & Undef128[1];
1525   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1526   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1527   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1528   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1529
1530   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1531     if (MinSplatBits < 64) {
1532
1533       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1534       // undefs.
1535       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1536         if (MinSplatBits < 32) {
1537
1538           // If the top 16-bits are different than the lower 16-bits, ignoring
1539           // undefs, we have an i32 splat.
1540           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1541             if (MinSplatBits < 16) {
1542               // If the top 8-bits are different than the lower 8-bits, ignoring
1543               // undefs, we have an i16 splat.
1544               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1545                 // Otherwise, we have an 8-bit splat.
1546                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1547                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1548                 SplatSize = 1;
1549                 return true;
1550               }
1551             } else {
1552               SplatBits = Bits16;
1553               SplatUndef = Undef16;
1554               SplatSize = 2;
1555               return true;
1556             }
1557           }
1558         } else {
1559           SplatBits = Bits32;
1560           SplatUndef = Undef32;
1561           SplatSize = 4;
1562           return true;
1563         }
1564       }
1565     } else {
1566       SplatBits = Bits128[0];
1567       SplatUndef = Undef128[0];
1568       SplatSize = 8;
1569       return true;
1570     }
1571   }
1572
1573   return false;  // Can't be a splat if two pieces don't match.
1574 }
1575
1576 // If this is a case we can't handle, return null and let the default
1577 // expansion code take care of it.  If we CAN select this case, and if it
1578 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1579 // this case more efficiently than a constant pool load, lower it to the
1580 // sequence of ops that should be used.
1581 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1582   MVT::ValueType VT = Op.getValueType();
1583   // If this is a vector of constants or undefs, get the bits.  A bit in
1584   // UndefBits is set if the corresponding element of the vector is an
1585   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1586   // zero.
1587   uint64_t VectorBits[2];
1588   uint64_t UndefBits[2];
1589   uint64_t SplatBits, SplatUndef;
1590   int SplatSize;
1591   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1592       || !isConstantSplat(VectorBits, UndefBits,
1593                           MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1594                           SplatBits, SplatUndef, SplatSize))
1595     return SDOperand();   // Not a constant vector, not a splat.
1596
1597   switch (VT) {
1598   default:
1599   case MVT::v4f32: {
1600     uint32_t Value32 = SplatBits;
1601     assert(SplatSize == 4
1602            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1603     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1604     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1605     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1606                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1607     break;
1608   }
1609   case MVT::v2f64: {
1610     uint64_t f64val = SplatBits;
1611     assert(SplatSize == 8
1612            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1613     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1614     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1615     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1616                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1617     break;
1618   }
1619   case MVT::v16i8: {
1620    // 8-bit constants have to be expanded to 16-bits
1621    unsigned short Value16 = SplatBits | (SplatBits << 8);
1622    SDOperand Ops[8];
1623    for (int i = 0; i < 8; ++i)
1624      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1625    return DAG.getNode(ISD::BIT_CONVERT, VT,
1626                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1627   }
1628   case MVT::v8i16: {
1629     unsigned short Value16;
1630     if (SplatSize == 2)
1631       Value16 = (unsigned short) (SplatBits & 0xffff);
1632     else
1633       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1634     SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1635     SDOperand Ops[8];
1636     for (int i = 0; i < 8; ++i) Ops[i] = T;
1637     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1638   }
1639   case MVT::v4i32: {
1640     unsigned int Value = SplatBits;
1641     SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1642     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1643   }
1644   case MVT::v2i64: {
1645     uint64_t val = SplatBits;
1646     uint32_t upper = uint32_t(val >> 32);
1647     uint32_t lower = uint32_t(val);
1648
1649     if (val != 0) {
1650       SDOperand LO32;
1651       SDOperand HI32;
1652       SmallVector<SDOperand, 16> ShufBytes;
1653       SDOperand Result;
1654       bool upper_special, lower_special;
1655
1656       // NOTE: This code creates common-case shuffle masks that can be easily
1657       // detected as common expressions. It is not attempting to create highly
1658       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659
1660       // Detect if the upper or lower half is a special shuffle mask pattern:
1661       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1662       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1663
1664       // Create lower vector if not a special pattern
1665       if (!lower_special) {
1666         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1667         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669                                        LO32C, LO32C, LO32C, LO32C));
1670       }
1671
1672       // Create upper vector if not a special pattern
1673       if (!upper_special) {
1674         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1675         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1676                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1677                                        HI32C, HI32C, HI32C, HI32C));
1678       }
1679
1680       // If either upper or lower are special, then the two input operands are
1681       // the same (basically, one of them is a "don't care")
1682       if (lower_special)
1683         LO32 = HI32;
1684       if (upper_special)
1685         HI32 = LO32;
1686       if (lower_special && upper_special) {
1687         // Unhappy situation... both upper and lower are special, so punt with
1688         // a target constant:
1689         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1690         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1691                                   Zero, Zero);
1692       }
1693
1694       for (int i = 0; i < 4; ++i) {
1695         for (int j = 0; j < 4; ++j) {
1696           SDOperand V;
1697           bool process_upper, process_lower;
1698           uint64_t val = 0;
1699
1700           process_upper = (upper_special && (i & 1) == 0);
1701           process_lower = (lower_special && (i & 1) == 1);
1702
1703           if (process_upper || process_lower) {
1704             if ((process_upper && upper == 0)
1705                 || (process_lower && lower == 0))
1706               val = 0x80;
1707             else if ((process_upper && upper == 0xffffffff)
1708                      || (process_lower && lower == 0xffffffff))
1709               val = 0xc0;
1710             else if ((process_upper && upper == 0x80000000)
1711                      || (process_lower && lower == 0x80000000))
1712               val = (j == 0 ? 0xe0 : 0x80);
1713           } else
1714             val = i * 4 + j + ((i & 1) * 16);
1715
1716           ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1717         }
1718       }
1719
1720       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1721                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1722                                      &ShufBytes[0], ShufBytes.size()));
1723     } else {
1724       // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1725       SDOperand Zero = DAG.getConstant(0, MVT::i32);
1726       return DAG.getNode(ISD::BIT_CONVERT, VT,
1727                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1728                                      Zero, Zero, Zero, Zero));
1729     }
1730   }
1731   }
1732
1733   return SDOperand();
1734 }
1735
1736 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1738 /// permutation vector, V3, is monotonically increasing with one "exception"
1739 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741 /// In either case, the net result is going to eventually invoke SHUFB to
1742 /// permute/shuffle the bytes from V1 and V2.
1743 /// \note
1744 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1745 /// control word for byte/halfword/word insertion. This takes care of a single
1746 /// element move from V2 into V1.
1747 /// \note
1748 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1750   SDOperand V1 = Op.getOperand(0);
1751   SDOperand V2 = Op.getOperand(1);
1752   SDOperand PermMask = Op.getOperand(2);
1753
1754   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1755
1756   // If we have a single element being moved from V1 to V2, this can be handled
1757   // using the C*[DX] compute mask instructions, but the vector elements have
1758   // to be monotonically increasing with one exception element.
1759   MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1760   unsigned EltsFromV2 = 0;
1761   unsigned V2Elt = 0;
1762   unsigned V2EltIdx0 = 0;
1763   unsigned CurrElt = 0;
1764   bool monotonic = true;
1765   if (EltVT == MVT::i8)
1766     V2EltIdx0 = 16;
1767   else if (EltVT == MVT::i16)
1768     V2EltIdx0 = 8;
1769   else if (EltVT == MVT::i32)
1770     V2EltIdx0 = 4;
1771   else
1772     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1773
1774   for (unsigned i = 0, e = PermMask.getNumOperands();
1775        EltsFromV2 <= 1 && monotonic && i != e;
1776        ++i) {
1777     unsigned SrcElt;
1778     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1779       SrcElt = 0;
1780     else
1781       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1782
1783     if (SrcElt >= V2EltIdx0) {
1784       ++EltsFromV2;
1785       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1786     } else if (CurrElt != SrcElt) {
1787       monotonic = false;
1788     }
1789
1790     ++CurrElt;
1791   }
1792
1793   if (EltsFromV2 == 1 && monotonic) {
1794     // Compute mask and shuffle
1795     MachineFunction &MF = DAG.getMachineFunction();
1796     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1797     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1798     MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1799     // Initialize temporary register to 0
1800     SDOperand InitTempReg =
1801       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1802     // Copy register's contents as index in INSERT_MASK:
1803     SDOperand ShufMaskOp =
1804       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1805                   DAG.getTargetConstant(V2Elt, MVT::i32),
1806                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1807     // Use shuffle mask in SHUFB synthetic instruction:
1808     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1809   } else {
1810     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1811     unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1812
1813     SmallVector<SDOperand, 16> ResultMask;
1814     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1815       unsigned SrcElt;
1816       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1817         SrcElt = 0;
1818       else
1819         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1820
1821       for (unsigned j = 0; j != BytesPerElement; ++j) {
1822         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1823                                              MVT::i8));
1824       }
1825     }
1826
1827     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1828                                       &ResultMask[0], ResultMask.size());
1829     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1830   }
1831 }
1832
1833 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1834   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1835
1836   if (Op0.Val->getOpcode() == ISD::Constant) {
1837     // For a constant, build the appropriate constant vector, which will
1838     // eventually simplify to a vector register load.
1839
1840     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1841     SmallVector<SDOperand, 16> ConstVecValues;
1842     MVT::ValueType VT;
1843     size_t n_copies;
1844
1845     // Create a constant vector:
1846     switch (Op.getValueType()) {
1847     default: assert(0 && "Unexpected constant value type in "
1848                          "LowerSCALAR_TO_VECTOR");
1849     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1850     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1851     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1852     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1853     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1854     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1855     }
1856
1857     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1858     for (size_t j = 0; j < n_copies; ++j)
1859       ConstVecValues.push_back(CValue);
1860
1861     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1862                        &ConstVecValues[0], ConstVecValues.size());
1863   } else {
1864     // Otherwise, copy the value from one register to another:
1865     switch (Op0.getValueType()) {
1866     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1867     case MVT::i8:
1868     case MVT::i16:
1869     case MVT::i32:
1870     case MVT::i64:
1871     case MVT::f32:
1872     case MVT::f64:
1873       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1874     }
1875   }
1876
1877   return SDOperand();
1878 }
1879
1880 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1881   switch (Op.getValueType()) {
1882   case MVT::v4i32: {
1883     SDOperand rA = Op.getOperand(0);
1884     SDOperand rB = Op.getOperand(1);
1885     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1886     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1887     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1888     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1889
1890     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1891     break;
1892   }
1893
1894   // Multiply two v8i16 vectors (pipeline friendly version):
1895   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1896   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1897   // c) Use SELB to select upper and lower halves from the intermediate results
1898   //
1899   // NOTE: We really want to move the FSMBI to earlier to actually get the
1900   // dual-issue. This code does manage to do this, even if it's a little on
1901   // the wacky side
1902   case MVT::v8i16: {
1903     MachineFunction &MF = DAG.getMachineFunction();
1904     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1905     SDOperand Chain = Op.getOperand(0);
1906     SDOperand rA = Op.getOperand(0);
1907     SDOperand rB = Op.getOperand(1);
1908     unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1909     unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1910
1911     SDOperand FSMBOp =
1912       DAG.getCopyToReg(Chain, FSMBIreg,
1913                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1914                                    DAG.getConstant(0xcccc, MVT::i32)));
1915
1916     SDOperand HHProd =
1917       DAG.getCopyToReg(FSMBOp, HiProdReg,
1918                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1919
1920     SDOperand HHProd_v4i32 =
1921       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1922                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1923
1924     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1925                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1926                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1927                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1928                                                HHProd_v4i32,
1929                                                DAG.getConstant(16, MVT::i16))),
1930                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1931   }
1932
1933   // This M00sE is N@stI! (apologies to Monty Python)
1934   //
1935   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1936   // is to break it all apart, sign extend, and reassemble the various
1937   // intermediate products.
1938   case MVT::v16i8: {
1939     MachineFunction &MF = DAG.getMachineFunction();
1940     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1941     SDOperand Chain = Op.getOperand(0);
1942     SDOperand rA = Op.getOperand(0);
1943     SDOperand rB = Op.getOperand(1);
1944     SDOperand c8 = DAG.getConstant(8, MVT::i8);
1945     SDOperand c16 = DAG.getConstant(16, MVT::i8);
1946
1947     unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1948     unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1949     unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1950
1951     SDOperand LLProd =
1952       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1953                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1954                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1955
1956     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1957
1958     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1959
1960     SDOperand LHProd =
1961       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1962                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1963
1964     SDOperand FSMBdef_2222 =
1965       DAG.getCopyToReg(Chain, FSMBreg_2222,
1966                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1967                                    DAG.getConstant(0x2222, MVT::i32)));
1968
1969     SDOperand FSMBuse_2222 =
1970       DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1971
1972     SDOperand LoProd_1 =
1973       DAG.getCopyToReg(Chain, LoProd_reg,
1974                        DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1975                                    FSMBuse_2222));
1976
1977     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1978
1979     SDOperand LoProd =
1980       DAG.getNode(ISD::AND, MVT::v4i32,
1981                   DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1982                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1983                               LoProdMask, LoProdMask,
1984                               LoProdMask, LoProdMask));
1985
1986     SDOperand rAH =
1987       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1988                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1989
1990     SDOperand rBH =
1991       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1993
1994     SDOperand HLProd =
1995       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1997                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1998
1999     SDOperand HHProd_1 =
2000       DAG.getNode(SPUISD::MPY, MVT::v8i16,
2001                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2002                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2003                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2004                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2005
2006     SDOperand HHProd =
2007       DAG.getCopyToReg(Chain, HiProd_reg,
2008                        DAG.getNode(SPUISD::SELB, MVT::v8i16,
2009                                    HLProd,
2010                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2011                                    FSMBuse_2222));
2012
2013     SDOperand HiProd =
2014       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2015                   DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2016
2017     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2018                        DAG.getNode(ISD::OR, MVT::v4i32,
2019                                    LoProd, HiProd));
2020   }
2021
2022   default:
2023     cerr << "CellSPU: Unknown vector multiplication, got "
2024          << MVT::getValueTypeString(Op.getValueType())
2025          << "\n";
2026     abort();
2027     /*NOTREACHED*/
2028   }
2029
2030   return SDOperand();
2031 }
2032
2033 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2034   MachineFunction &MF = DAG.getMachineFunction();
2035   MachineRegisterInfo &RegInfo = MF.getRegInfo();
2036
2037   SDOperand A = Op.getOperand(0);
2038   SDOperand B = Op.getOperand(1);
2039   unsigned VT = Op.getValueType();
2040
2041   unsigned VRegBR, VRegC;
2042
2043   if (VT == MVT::f32) {
2044     VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2045     VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2046   } else {
2047     VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2048     VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2049   }
2050   // TODO: make sure we're feeding FPInterp the right arguments
2051   // Right now: fi B, frest(B)
2052
2053   // Computes BRcpl =
2054   // (Floating Interpolate (FP Reciprocal Estimate B))
2055   SDOperand BRcpl =
2056       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2057                        DAG.getNode(SPUISD::FPInterp, VT, B,
2058                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2059
2060   // Computes A * BRcpl and stores in a temporary register
2061   SDOperand AxBRcpl =
2062       DAG.getCopyToReg(BRcpl, VRegC,
2063                  DAG.getNode(ISD::FMUL, VT, A,
2064                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2065   // What's the Chain variable do? It's magic!
2066   // TODO: set Chain = Op(0).getEntryNode()
2067
2068   return DAG.getNode(ISD::FADD, VT,
2069                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2070                 DAG.getNode(ISD::FMUL, VT,
2071                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2072                         DAG.getNode(ISD::FSUB, VT, A,
2073                             DAG.getNode(ISD::FMUL, VT, B,
2074                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2075 }
2076
2077 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2078   unsigned VT = Op.getValueType();
2079   SDOperand N = Op.getOperand(0);
2080   SDOperand Elt = Op.getOperand(1);
2081   SDOperand ShufMask[16];
2082   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2083
2084   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2085
2086   int EltNo = (int) C->getValue();
2087
2088   // sanity checks:
2089   if (VT == MVT::i8 && EltNo >= 16)
2090     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2091   else if (VT == MVT::i16 && EltNo >= 8)
2092     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2093   else if (VT == MVT::i32 && EltNo >= 4)
2094     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2095   else if (VT == MVT::i64 && EltNo >= 2)
2096     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2097
2098   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2099     // i32 and i64: Element 0 is the preferred slot
2100     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2101   }
2102
2103   // Need to generate shuffle mask and extract:
2104   int prefslot_begin = -1, prefslot_end = -1;
2105   int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2106
2107   switch (VT) {
2108   case MVT::i8: {
2109     prefslot_begin = prefslot_end = 3;
2110     break;
2111   }
2112   case MVT::i16: {
2113     prefslot_begin = 2; prefslot_end = 3;
2114     break;
2115   }
2116   case MVT::i32: {
2117     prefslot_begin = 0; prefslot_end = 3;
2118     break;
2119   }
2120   case MVT::i64: {
2121     prefslot_begin = 0; prefslot_end = 7;
2122     break;
2123   }
2124   }
2125
2126   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2127          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2128
2129   for (int i = 0; i < 16; ++i) {
2130     // zero fill uppper part of preferred slot, don't care about the
2131     // other slots:
2132     unsigned int mask_val;
2133
2134     if (i <= prefslot_end) {
2135       mask_val =
2136         ((i < prefslot_begin)
2137          ? 0x80
2138          : elt_byte + (i - prefslot_begin));
2139
2140       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2141     } else
2142       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2143   }
2144
2145   SDOperand ShufMaskVec =
2146     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2147                 &ShufMask[0],
2148                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2149
2150   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2151                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2152                                  N, N, ShufMaskVec));
2153
2154 }
2155
2156 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2157   SDOperand VecOp = Op.getOperand(0);
2158   SDOperand ValOp = Op.getOperand(1);
2159   SDOperand IdxOp = Op.getOperand(2);
2160   MVT::ValueType VT = Op.getValueType();
2161
2162   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2163   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2164
2165   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2166   // Use $2 because it's always 16-byte aligned and it's available:
2167   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2168
2169   SDOperand result =
2170     DAG.getNode(SPUISD::SHUFB, VT,
2171                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2172                 VecOp,
2173                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2174                             DAG.getNode(ISD::ADD, PtrVT,
2175                                         PtrBase,
2176                                         DAG.getConstant(CN->getValue(),
2177                                                         PtrVT))));
2178
2179   return result;
2180 }
2181
2182 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2183   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2184
2185   assert(Op.getValueType() == MVT::i8);
2186   switch (Opc) {
2187   default:
2188     assert(0 && "Unhandled i8 math operator");
2189     /*NOTREACHED*/
2190     break;
2191   case ISD::SUB: {
2192     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2193     // the result:
2194     SDOperand N1 = Op.getOperand(1);
2195     N0 = (N0.getOpcode() != ISD::Constant
2196           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2197           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2198     N1 = (N1.getOpcode() != ISD::Constant
2199           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2200           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2201     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202                        DAG.getNode(Opc, MVT::i16, N0, N1));
2203   }
2204   case ISD::ROTR:
2205   case ISD::ROTL: {
2206     SDOperand N1 = Op.getOperand(1);
2207     unsigned N1Opc;
2208     N0 = (N0.getOpcode() != ISD::Constant
2209           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2211     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2212     N1 = (N1.getOpcode() != ISD::Constant
2213           ? DAG.getNode(N1Opc, MVT::i16, N1)
2214           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2215     SDOperand ExpandArg =
2216       DAG.getNode(ISD::OR, MVT::i16, N0,
2217                   DAG.getNode(ISD::SHL, MVT::i16,
2218                               N0, DAG.getConstant(8, MVT::i16)));
2219     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2220                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2221   }
2222   case ISD::SRL:
2223   case ISD::SHL: {
2224     SDOperand N1 = Op.getOperand(1);
2225     unsigned N1Opc;
2226     N0 = (N0.getOpcode() != ISD::Constant
2227           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2228           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2229     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2230     N1 = (N1.getOpcode() != ISD::Constant
2231           ? DAG.getNode(N1Opc, MVT::i16, N1)
2232           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2233     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2234                        DAG.getNode(Opc, MVT::i16, N0, N1));
2235   }
2236   case ISD::SRA: {
2237     SDOperand N1 = Op.getOperand(1);
2238     unsigned N1Opc;
2239     N0 = (N0.getOpcode() != ISD::Constant
2240           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2241           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2242     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2243     N1 = (N1.getOpcode() != ISD::Constant
2244           ? DAG.getNode(N1Opc, MVT::i16, N1)
2245           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2246     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247                        DAG.getNode(Opc, MVT::i16, N0, N1));
2248   }
2249   case ISD::MUL: {
2250     SDOperand N1 = Op.getOperand(1);
2251     unsigned N1Opc;
2252     N0 = (N0.getOpcode() != ISD::Constant
2253           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2254           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2255     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2256     N1 = (N1.getOpcode() != ISD::Constant
2257           ? DAG.getNode(N1Opc, MVT::i16, N1)
2258           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2259     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2260                        DAG.getNode(Opc, MVT::i16, N0, N1));
2261     break;
2262   }
2263   }
2264
2265   return SDOperand();
2266 }
2267
2268 //! Lower byte immediate operations for v16i8 vectors:
2269 static SDOperand
2270 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2271   SDOperand ConstVec;
2272   SDOperand Arg;
2273   MVT::ValueType VT = Op.getValueType();
2274
2275   ConstVec = Op.getOperand(0);
2276   Arg = Op.getOperand(1);
2277   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2278     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279       ConstVec = ConstVec.getOperand(0);
2280     } else {
2281       ConstVec = Op.getOperand(1);
2282       Arg = Op.getOperand(0);
2283       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284         ConstVec = ConstVec.getOperand(0);
2285       }
2286     }
2287   }
2288
2289   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2290     uint64_t VectorBits[2];
2291     uint64_t UndefBits[2];
2292     uint64_t SplatBits, SplatUndef;
2293     int SplatSize;
2294
2295     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2296         && isConstantSplat(VectorBits, UndefBits,
2297                            MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2298                            SplatBits, SplatUndef, SplatSize)) {
2299       SDOperand tcVec[16];
2300       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2301       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2302
2303       // Turn the BUILD_VECTOR into a set of target constants:
2304       for (size_t i = 0; i < tcVecSize; ++i)
2305         tcVec[i] = tc;
2306
2307       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2308                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2309     }
2310   }
2311
2312   return SDOperand();
2313 }
2314
2315 //! Lower i32 multiplication
2316 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2317                           unsigned Opc) {
2318   switch (VT) {
2319   default:
2320     cerr << "CellSPU: Unknown LowerMUL value type, got "
2321          << MVT::getValueTypeString(Op.getValueType())
2322          << "\n";
2323     abort();
2324     /*NOTREACHED*/
2325
2326   case MVT::i32: {
2327     SDOperand rA = Op.getOperand(0);
2328     SDOperand rB = Op.getOperand(1);
2329
2330     return DAG.getNode(ISD::ADD, MVT::i32,
2331                        DAG.getNode(ISD::ADD, MVT::i32,
2332                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2333                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2334                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2335   }
2336   }
2337
2338   return SDOperand();
2339 }
2340
2341 //! Custom lowering for CTPOP (count population)
2342 /*!
2343   Custom lowering code that counts the number ones in the input
2344   operand. SPU has such an instruction, but it counts the number of
2345   ones per byte, which then have to be accumulated.
2346 */
2347 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2348   unsigned VT = Op.getValueType();
2349   unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2350
2351   switch (VT) {
2352   case MVT::i8: {
2353     SDOperand N = Op.getOperand(0);
2354     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2355
2356     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2357     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2358
2359     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2360   }
2361
2362   case MVT::i16: {
2363     MachineFunction &MF = DAG.getMachineFunction();
2364     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2365
2366     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2367
2368     SDOperand N = Op.getOperand(0);
2369     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2370     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2371     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2372
2373     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2374     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2375
2376     // CNTB_result becomes the chain to which all of the virtual registers
2377     // CNTB_reg, SUM1_reg become associated:
2378     SDOperand CNTB_result =
2379       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2380
2381     SDOperand CNTB_rescopy =
2382       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2383
2384     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2385
2386     return DAG.getNode(ISD::AND, MVT::i16,
2387                        DAG.getNode(ISD::ADD, MVT::i16,
2388                                    DAG.getNode(ISD::SRL, MVT::i16,
2389                                                Tmp1, Shift1),
2390                                    Tmp1),
2391                        Mask0);
2392   }
2393
2394   case MVT::i32: {
2395     MachineFunction &MF = DAG.getMachineFunction();
2396     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2397
2398     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2399     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2400
2401     SDOperand N = Op.getOperand(0);
2402     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2403     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2404     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2405     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2406
2407     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2408     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2409
2410     // CNTB_result becomes the chain to which all of the virtual registers
2411     // CNTB_reg, SUM1_reg become associated:
2412     SDOperand CNTB_result =
2413       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2414
2415     SDOperand CNTB_rescopy =
2416       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2417
2418     SDOperand Comp1 =
2419       DAG.getNode(ISD::SRL, MVT::i32,
2420                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2421
2422     SDOperand Sum1 =
2423       DAG.getNode(ISD::ADD, MVT::i32,
2424                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2425
2426     SDOperand Sum1_rescopy =
2427       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2428
2429     SDOperand Comp2 =
2430       DAG.getNode(ISD::SRL, MVT::i32,
2431                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2432                   Shift2);
2433     SDOperand Sum2 =
2434       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2435                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2436
2437     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2438   }
2439
2440   case MVT::i64:
2441     break;
2442   }
2443
2444   return SDOperand();
2445 }
2446
2447 /// LowerOperation - Provide custom lowering hooks for some operations.
2448 ///
2449 SDOperand
2450 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2451 {
2452   switch (Op.getOpcode()) {
2453   default: {
2454     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2455     cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2456     cerr << "*Op.Val:\n";
2457     Op.Val->dump();
2458     abort();
2459   }
2460   case ISD::LOAD:
2461   case ISD::SEXTLOAD:
2462   case ISD::ZEXTLOAD:
2463     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2464   case ISD::STORE:
2465     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2466   case ISD::ConstantPool:
2467     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2468   case ISD::GlobalAddress:
2469     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2470   case ISD::JumpTable:
2471     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2472   case ISD::Constant:
2473     return LowerConstant(Op, DAG);
2474   case ISD::ConstantFP:
2475     return LowerConstantFP(Op, DAG);
2476   case ISD::BRCOND:
2477     return LowerBRCOND(Op, DAG);
2478   case ISD::FORMAL_ARGUMENTS:
2479     return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2480   case ISD::CALL:
2481     return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2482   case ISD::RET:
2483     return LowerRET(Op, DAG, getTargetMachine());
2484
2485   // i8 math ops:
2486   case ISD::SUB:
2487   case ISD::ROTR:
2488   case ISD::ROTL:
2489   case ISD::SRL:
2490   case ISD::SHL:
2491   case ISD::SRA:
2492     return LowerI8Math(Op, DAG, Op.getOpcode());
2493
2494   // Vector-related lowering.
2495   case ISD::BUILD_VECTOR:
2496     return LowerBUILD_VECTOR(Op, DAG);
2497   case ISD::SCALAR_TO_VECTOR:
2498     return LowerSCALAR_TO_VECTOR(Op, DAG);
2499   case ISD::VECTOR_SHUFFLE:
2500     return LowerVECTOR_SHUFFLE(Op, DAG);
2501   case ISD::EXTRACT_VECTOR_ELT:
2502     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2503   case ISD::INSERT_VECTOR_ELT:
2504     return LowerINSERT_VECTOR_ELT(Op, DAG);
2505
2506   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2507   case ISD::AND:
2508   case ISD::OR:
2509   case ISD::XOR:
2510     return LowerByteImmed(Op, DAG);
2511
2512   // Vector and i8 multiply:
2513   case ISD::MUL:
2514     if (MVT::isVector(Op.getValueType()))
2515       return LowerVectorMUL(Op, DAG);
2516     else if (Op.getValueType() == MVT::i8)
2517       return LowerI8Math(Op, DAG, Op.getOpcode());
2518     else
2519       return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2520
2521   case ISD::FDIV:
2522     if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2523       return LowerFDIVf32(Op, DAG);
2524 //    else if (Op.getValueType() == MVT::f64)
2525 //      return LowerFDIVf64(Op, DAG);
2526     else
2527       assert(0 && "Calling FDIV on unsupported MVT");
2528
2529   case ISD::CTPOP:
2530     return LowerCTPOP(Op, DAG);
2531   }
2532
2533   return SDOperand();
2534 }
2535
2536 //===----------------------------------------------------------------------===//
2537 //  Other Lowering Code
2538 //===----------------------------------------------------------------------===//
2539
2540 MachineBasicBlock *
2541 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2542                                            MachineBasicBlock *BB)
2543 {
2544   return BB;
2545 }
2546
2547 //===----------------------------------------------------------------------===//
2548 // Target Optimization Hooks
2549 //===----------------------------------------------------------------------===//
2550
2551 SDOperand
2552 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2553 {
2554 #if 0
2555   TargetMachine &TM = getTargetMachine();
2556   SelectionDAG &DAG = DCI.DAG;
2557   SDOperand N0 = N->getOperand(0);      // everything has at least one operand
2558
2559   switch (N->getOpcode()) {
2560   default: break;
2561     // Do something creative here for ISD nodes that can be coalesced in unique
2562     // ways.
2563   }
2564 #endif
2565
2566   // Otherwise, return unchanged.
2567   return SDOperand();
2568 }
2569
2570 //===----------------------------------------------------------------------===//
2571 // Inline Assembly Support
2572 //===----------------------------------------------------------------------===//
2573
2574 /// getConstraintType - Given a constraint letter, return the type of
2575 /// constraint it is for this target.
2576 SPUTargetLowering::ConstraintType
2577 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2578   if (ConstraintLetter.size() == 1) {
2579     switch (ConstraintLetter[0]) {
2580     default: break;
2581     case 'b':
2582     case 'r':
2583     case 'f':
2584     case 'v':
2585     case 'y':
2586       return C_RegisterClass;
2587     }
2588   }
2589   return TargetLowering::getConstraintType(ConstraintLetter);
2590 }
2591
2592 std::pair<unsigned, const TargetRegisterClass*>
2593 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2594                                                 MVT::ValueType VT) const
2595 {
2596   if (Constraint.size() == 1) {
2597     // GCC RS6000 Constraint Letters
2598     switch (Constraint[0]) {
2599     case 'b':   // R1-R31
2600     case 'r':   // R0-R31
2601       if (VT == MVT::i64)
2602         return std::make_pair(0U, SPU::R64CRegisterClass);
2603       return std::make_pair(0U, SPU::R32CRegisterClass);
2604     case 'f':
2605       if (VT == MVT::f32)
2606         return std::make_pair(0U, SPU::R32FPRegisterClass);
2607       else if (VT == MVT::f64)
2608         return std::make_pair(0U, SPU::R64FPRegisterClass);
2609       break;
2610     case 'v':
2611       return std::make_pair(0U, SPU::GPRCRegisterClass);
2612     }
2613   }
2614
2615   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2616 }
2617
2618 void
2619 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2620                                                   uint64_t Mask,
2621                                                   uint64_t &KnownZero,
2622                                                   uint64_t &KnownOne,
2623                                                   const SelectionDAG &DAG,
2624                                                   unsigned Depth ) const {
2625   KnownZero = 0;
2626   KnownOne = 0;
2627 }
2628
2629 // LowerAsmOperandForConstraint
2630 void
2631 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2632                                                 char ConstraintLetter,
2633                                                 std::vector<SDOperand> &Ops,
2634                                                 SelectionDAG &DAG) {
2635   // Default, for the time being, to the base class handler
2636   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2637 }
2638
2639 /// isLegalAddressImmediate - Return true if the integer value can be used
2640 /// as the offset of the target addressing mode.
2641 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2642   // SPU's addresses are 256K:
2643   return (V > -(1 << 18) && V < (1 << 18) - 1);
2644 }
2645
2646 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2647   return false;
2648 }