lib/Target/CellSPU/SPUISelLowering.cpp

   1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "llvm/ADT/VectorExtras.h"
  18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  19 #include "llvm/CodeGen/CallingConvLower.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SSARegMap.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/Function.h"
  27 #include "llvm/Intrinsics.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/Support/MathExtras.h"
  30 #include "llvm/Target/TargetOptions.h"
  31
  32 #include <map>
  33
  34 using namespace llvm;
  35
  36 // Used in getTargetNodeName() below
  37 namespace {
  38   std::map<unsigned, const char *> node_names;
  39
  40   //! MVT::ValueType mapping to useful data for Cell SPU
  41   struct valtype_map_s {
  42     const MVT::ValueType        valtype;
  43     const int                   prefslot_byte;
  44   };
  45
  46   const valtype_map_s valtype_map[] = {
  47     { MVT::i1,   3 },
  48     { MVT::i8,   3 },
  49     { MVT::i16,  2 },
  50     { MVT::i32,  0 },
  51     { MVT::f32,  0 },
  52     { MVT::i64,  0 },
  53     { MVT::f64,  0 },
  54     { MVT::i128, 0 }
  55   };
  56
  57   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  58
  59   const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
  60     const valtype_map_s *retval = 0;
  61
  62     for (size_t i = 0; i < n_valtype_map; ++i) {
  63       if (valtype_map[i].valtype == VT) {
  64         retval = valtype_map + i;
  65         break;
  66       }
  67     }
  68
  69 #ifndef NDEBUG
  70     if (retval == 0) {
  71       cerr << "getValueTypeMapEntry returns NULL for "
  72            << MVT::getValueTypeString(VT)
  73            << "\n";
  74       abort();
  75     }
  76 #endif
  77
  78     return retval;
  79   }
  80
  81   //! Predicate that returns true if operand is a memory target
  82   /*!
  83     \arg Op Operand to test
  84     \return true if the operand is a memory target (i.e., global
  85     address, external symbol, constant pool) or an existing D-Form
  86     address.
  87    */
  88   bool isMemoryOperand(const SDOperand &Op)
  89   {
  90     const unsigned Opc = Op.getOpcode();
  91     return (Opc == ISD::GlobalAddress
  92             || Opc == ISD::GlobalTLSAddress
  93             || Opc ==  ISD::FrameIndex
  94             || Opc == ISD::JumpTable
  95             || Opc == ISD::ConstantPool
  96             || Opc == ISD::ExternalSymbol
  97             || Opc == ISD::TargetGlobalAddress
  98             || Opc == ISD::TargetGlobalTLSAddress
  99             || Opc == ISD::TargetFrameIndex
 100             || Opc == ISD::TargetJumpTable
 101             || Opc == ISD::TargetConstantPool
 102             || Opc == ISD::TargetExternalSymbol
 103             || Opc == SPUISD::DFormAddr);
 104   }
 105 }
 106
 107 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 108   : TargetLowering(TM),
 109     SPUTM(TM)
 110 {
 111   // Fold away setcc operations if possible.
 112   setPow2DivIsCheap();
 113
 114   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 115   setUseUnderscoreSetJmp(true);
 116   setUseUnderscoreLongJmp(true);
 117
 118   // Set up the SPU's register classes:
 119   // NOTE: i8 register class is not registered because we cannot determine when
 120   // we need to zero or sign extend for custom-lowered loads and stores.
 121   // NOTE: Ignore the previous note. For now. :-)
 122   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 123   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 124   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 125   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 126   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 127   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 128   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 129
 130   // SPU has no sign or zero extended loads for i1, i8, i16:
 131   setLoadXAction(ISD::EXTLOAD,  MVT::i1, Custom);
 132   setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
 133   setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 134   setStoreXAction(MVT::i1, Custom);
 135
 136   setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
 137   setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
 138   setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
 139   setStoreXAction(MVT::i8, Custom);
 140
 141   setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
 142   setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
 143   setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
 144
 145   // SPU constant load actions are custom lowered:
 146   setOperationAction(ISD::Constant,   MVT::i64, Custom);
 147   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
 148   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 149
 150   // SPU's loads and stores have to be custom lowered:
 151   for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
 152        ++sctype) {
 153     setOperationAction(ISD::LOAD, sctype, Custom);
 154     setOperationAction(ISD::STORE, sctype, Custom);
 155   }
 156
 157   // SPU supports BRCOND, although DAGCombine will convert BRCONDs
 158   // into BR_CCs. BR_CC instructions are custom selected in
 159   // SPUDAGToDAGISel.
 160   setOperationAction(ISD::BRCOND, MVT::Other, Legal);
 161
 162   // Expand the jumptable branches
 163   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 164   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 165   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 166
 167   // SPU has no intrinsics for these particular operations:
 168   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
 169   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
 170   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
 171
 172   // PowerPC has no SREM/UREM instructions
 173   setOperationAction(ISD::SREM, MVT::i32, Expand);
 174   setOperationAction(ISD::UREM, MVT::i32, Expand);
 175   setOperationAction(ISD::SREM, MVT::i64, Expand);
 176   setOperationAction(ISD::UREM, MVT::i64, Expand);
 177
 178   // We don't support sin/cos/sqrt/fmod
 179   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 180   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 181   setOperationAction(ISD::FREM , MVT::f64, Expand);
 182   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 183   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 184   setOperationAction(ISD::FREM , MVT::f32, Expand);
 185
 186   // If we're enabling GP optimizations, use hardware square root
 187   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 188   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 189
 190   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 191   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 192
 193   // SPU can do rotate right and left, so legalize it... but customize for i8
 194   // because instructions don't exist.
 195   setOperationAction(ISD::ROTR, MVT::i32,    Legal);
 196   setOperationAction(ISD::ROTR, MVT::i16,    Legal);
 197   setOperationAction(ISD::ROTR, MVT::i8,     Custom);
 198   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 199   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 200   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 201   // SPU has no native version of shift left/right for i8
 202   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 203   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 204   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 205
 206   // Custom lower i32 multiplications
 207   setOperationAction(ISD::MUL,  MVT::i32,    Custom);
 208
 209   // Need to custom handle (some) common i8 math ops
 210   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 211   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 212
 213   // SPU does not have BSWAP. It does have i32 support CTLZ.
 214   // CTPOP has to be custom lowered.
 215   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 216   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 217
 218   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 219   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 220   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 221   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 222
 223   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 224   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 225
 226   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 227
 228   // SPU does not have select or setcc
 229   setOperationAction(ISD::SELECT, MVT::i1,   Expand);
 230   setOperationAction(ISD::SELECT, MVT::i8,   Expand);
 231   setOperationAction(ISD::SELECT, MVT::i16,  Expand);
 232   setOperationAction(ISD::SELECT, MVT::i32,  Expand);
 233   setOperationAction(ISD::SELECT, MVT::i64,  Expand);
 234   setOperationAction(ISD::SELECT, MVT::f32,  Expand);
 235   setOperationAction(ISD::SELECT, MVT::f64,  Expand);
 236
 237   setOperationAction(ISD::SETCC, MVT::i1,   Expand);
 238   setOperationAction(ISD::SETCC, MVT::i8,   Expand);
 239   setOperationAction(ISD::SETCC, MVT::i16,  Expand);
 240   setOperationAction(ISD::SETCC, MVT::i32,  Expand);
 241   setOperationAction(ISD::SETCC, MVT::i64,  Expand);
 242   setOperationAction(ISD::SETCC, MVT::f32,  Expand);
 243   setOperationAction(ISD::SETCC, MVT::f64,  Expand);
 244
 245   // SPU has a legal FP -> signed INT instruction
 246   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
 247   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 248   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
 249   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 250
 251   // FDIV on SPU requires custom lowering
 252   setOperationAction(ISD::FDIV, MVT::f32, Custom);
 253   //setOperationAction(ISD::FDIV, MVT::f64, Custom);
 254
 255   // SPU has [U|S]INT_TO_FP
 256   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
 257   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 258   setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
 259   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
 260   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 261   setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
 262   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 263   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 264
 265   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 266   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 267   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 268   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 269
 270   // We cannot sextinreg(i1).  Expand to shifts.
 271   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 272
 273   // Support label based line numbers.
 274   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 275   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 276
 277   // We want to legalize GlobalAddress and ConstantPool nodes into the
 278   // appropriate instructions to materialize the address.
 279   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 280   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 281   setOperationAction(ISD::ConstantPool,  MVT::f32, Custom);
 282   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 283   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 284   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 285   setOperationAction(ISD::ConstantPool,  MVT::f64, Custom);
 286   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 287
 288   // RET must be custom lowered, to meet ABI requirements
 289   setOperationAction(ISD::RET,           MVT::Other, Custom);
 290
 291   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 292   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 293
 294   // Use the default implementation.
 295   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 296   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 297   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 298   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 299   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 300   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 301   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 302
 303   // Cell SPU has instructions for converting between i64 and fp.
 304   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 305   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 306
 307   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 308   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 309
 310   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 311   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 312
 313   // First set operation action for all vector types to expand. Then we
 314   // will selectively turn on ones that can be effectively codegen'd.
 315   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 316   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 317   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 318   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 319   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 320   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 321
 322   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 323        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
 324     // add/sub are legal for all supported vector VT's.
 325     setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
 326     setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
 327     // mul has to be custom lowered.
 328     setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
 329
 330     setOperationAction(ISD::AND   , (MVT::ValueType)VT, Legal);
 331     setOperationAction(ISD::OR    , (MVT::ValueType)VT, Legal);
 332     setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Legal);
 333     setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Legal);
 334     setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
 335     setOperationAction(ISD::STORE,  (MVT::ValueType)VT, Legal);
 336
 337     // These operations need to be expanded:
 338     setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
 339     setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
 340     setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
 341     setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
 342     setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
 343
 344     // Custom lower build_vector, constant pool spills, insert and
 345     // extract vector elements:
 346     setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
 347     setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
 348     setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
 349     setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 350     setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
 351     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
 352   }
 353
 354   setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 355   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 356   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 357   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 358   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 359
 360   setSetCCResultType(MVT::i32);
 361   setShiftAmountType(MVT::i32);
 362   setSetCCResultContents(ZeroOrOneSetCCResult);
 363
 364   setStackPointerRegisterToSaveRestore(SPU::R1);
 365
 366   // We have target-specific dag combine patterns for the following nodes:
 367   // e.g., setTargetDAGCombine(ISD::SUB);
 368
 369   computeRegisterProperties();
 370 }
 371
 372 const char *
 373 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 374 {
 375   if (node_names.empty()) {
 376     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 377     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 378     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 379     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 380     node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
 381     node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
 382     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 383     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 384     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 385     node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
 386     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 387     node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
 388     node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
 389     node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
 390     node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
 391     node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
 392     node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
 393     node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
 394     node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
 395     node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
 396     node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
 397     node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
 398     node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
 399     node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
 400     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
 401     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 402     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 403     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
 404       "SPUISD::ROTBYTES_RIGHT_Z";
 405     node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
 406       "SPUISD::ROTBYTES_RIGHT_S";
 407     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 408     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
 409       "SPUISD::ROTBYTES_LEFT_CHAINED";
 410     node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
 411     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 412     node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
 413     node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
 414     node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
 415     node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
 416   }
 417
 418   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 419
 420   return ((i != node_names.end()) ? i->second : 0);
 421 }
 422
 423 //===----------------------------------------------------------------------===//
 424 // Calling convention code:
 425 //===----------------------------------------------------------------------===//
 426
 427 #include "SPUGenCallingConv.inc"
 428
 429 //===----------------------------------------------------------------------===//
 430 //  LowerOperation implementation
 431 //===----------------------------------------------------------------------===//
 432
 433 /// Custom lower loads for CellSPU
 434 /*!
 435  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 436  within a 16-byte block, we have to rotate to extract the requested element.
 437  */
 438 static SDOperand
 439 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 440   LoadSDNode *LN = cast<LoadSDNode>(Op);
 441   SDOperand basep = LN->getBasePtr();
 442   SDOperand the_chain = LN->getChain();
 443   MVT::ValueType BasepOpc = basep.Val->getOpcode();
 444   MVT::ValueType VT = LN->getLoadedVT();
 445   MVT::ValueType OpVT = Op.Val->getValueType(0);
 446   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 447   ISD::LoadExtType ExtType = LN->getExtensionType();
 448   unsigned alignment = LN->getAlignment();
 449   const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 450   SDOperand Ops[8];
 451
 452   if (BasepOpc == ISD::FrameIndex) {
 453     // Loading from a frame index is always properly aligned. Always.
 454     return SDOperand();
 455   }
 456
 457   // For an extending load of an i1 variable, just call it i8 (or whatever we
 458   // were passed) and make it zero-extended:
 459   if (VT == MVT::i1) {
 460     VT = OpVT;
 461     ExtType = ISD::ZEXTLOAD;
 462   }
 463
 464   switch (LN->getAddressingMode()) {
 465   case ISD::UNINDEXED: {
 466     SDOperand result;
 467     SDOperand rot_op, rotamt;
 468     SDOperand ptrp;
 469     int c_offset;
 470     int c_rotamt;
 471
 472     // The vector type we really want to be when we load the 16-byte chunk
 473     MVT::ValueType vecVT, opVecVT;
 474
 475     vecVT = MVT::v16i8;
 476     if (VT != MVT::i1)
 477       vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 478     opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
 479
 480     if (basep.getOpcode() == ISD::ADD) {
 481       const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
 482
 483       assert(CN != NULL
 484              && "LowerLOAD: ISD::ADD operand 1 is not constant");
 485
 486       c_offset = (int) CN->getValue();
 487       c_rotamt = (int) (c_offset & 0xf);
 488
 489       // Adjust the rotation amount to ensure that the final result ends up in
 490       // the preferred slot:
 491       c_rotamt -= vtm->prefslot_byte;
 492       ptrp = basep.getOperand(0);
 493     } else {
 494       c_offset = 0;
 495       c_rotamt = -vtm->prefslot_byte;
 496       ptrp = basep;
 497     }
 498
 499     if (alignment == 16) {
 500       // 16-byte aligned load into preferred slot, no rotation
 501       if (c_rotamt == 0) {
 502         if (isMemoryOperand(ptrp))
 503           // Return unchanged
 504           return SDOperand();
 505         else {
 506           // Return modified D-Form address for pointer:
 507           ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 508                              ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
 509           if (VT == OpVT)
 510             return DAG.getLoad(VT, LN->getChain(), ptrp,
 511                                LN->getSrcValue(), LN->getSrcValueOffset(),
 512                                LN->isVolatile(), 16);
 513           else
 514             return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
 515                                   LN->getSrcValueOffset(), OpVT,
 516                                   LN->isVolatile(), 16);
 517         }
 518       } else {
 519         // Need to rotate...
 520         if (c_rotamt < 0)
 521           c_rotamt += 16;
 522         // Realign the base pointer, with a D-Form address
 523         if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
 524           basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
 525                               ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
 526         else
 527           basep = ptrp;
 528
 529         // Rotate the load:
 530         rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
 531                              LN->getSrcValue(), LN->getSrcValueOffset(),
 532                              LN->isVolatile(), 16);
 533         the_chain = rot_op.getValue(1);
 534         rotamt = DAG.getConstant(c_rotamt, MVT::i16);
 535
 536         SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
 537         Ops[0] = the_chain;
 538         Ops[1] = rot_op;
 539         Ops[2] = rotamt;
 540
 541         result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
 542         the_chain = result.getValue(1);
 543
 544         if (VT == OpVT || ExtType == ISD::EXTLOAD) {
 545           SDVTList scalarvts;
 546           Ops[0] = the_chain;
 547           Ops[1] = result;
 548           if (OpVT == VT) {
 549             scalarvts = DAG.getVTList(VT, MVT::Other);
 550           } else {
 551             scalarvts = DAG.getVTList(OpVT, MVT::Other);
 552           }
 553
 554           result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
 555                                result);
 556           Ops[0] = the_chain;
 557           Ops[1] = result;
 558           result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
 559           the_chain = result.getValue(1);
 560         } else {
 561           // Handle the sign and zero-extending loads for i1 and i8:
 562           unsigned NewOpC;
 563
 564           if (ExtType == ISD::SEXTLOAD) {
 565             NewOpC = (OpVT == MVT::i1
 566                       ? SPUISD::EXTRACT_I1_SEXT
 567                       : SPUISD::EXTRACT_I8_SEXT);
 568           } else {
 569       assert(ExtType == ISD::ZEXTLOAD);
 570             NewOpC = (OpVT == MVT::i1
 571                       ? SPUISD::EXTRACT_I1_ZEXT
 572                       : SPUISD::EXTRACT_I8_ZEXT);
 573           }
 574
 575           result = DAG.getNode(NewOpC, OpVT, result);
 576         }
 577
 578         SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 579         SDOperand retops[2] = { result, the_chain };
 580
 581         result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
 582         return result;
 583         /*UNREACHED*/
 584       }
 585     } else {
 586       // Misaligned 16-byte load:
 587       if (basep.getOpcode() == ISD::LOAD) {
 588         LN = cast<LoadSDNode>(basep);
 589         if (LN->getAlignment() == 16) {
 590           // We can verify that we're really loading from a 16-byte aligned
 591           // chunk. Encapsulate basep as a D-Form address and return a new
 592           // load:
 593           basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
 594                               DAG.getConstant(0, PtrVT));
 595           if (OpVT == VT)
 596             return DAG.getLoad(VT, LN->getChain(), basep,
 597                                LN->getSrcValue(), LN->getSrcValueOffset(),
 598                                LN->isVolatile(), 16);
 599           else
 600             return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
 601                                   LN->getSrcValue(), LN->getSrcValueOffset(),
 602                                   OpVT, LN->isVolatile(), 16);
 603         }
 604       }
 605
 606       // Catch all other cases where we can't guarantee that we have a
 607       // 16-byte aligned entity, which means resorting to an X-form
 608       // address scheme:
 609
 610       SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
 611       SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
 612       SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
 613
 614       ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
 615
 616       SDOperand alignLoad =
 617         DAG.getLoad(opVecVT, LN->getChain(), ptrp,
 618                     LN->getSrcValue(), LN->getSrcValueOffset(),
 619                     LN->isVolatile(), 16);
 620
 621       SDOperand insertEltOp =
 622         DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
 623
 624       result = DAG.getNode(SPUISD::SHUFB, opVecVT,
 625                            alignLoad,
 626                            alignLoad,
 627                            DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
 628
 629       result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
 630
 631       SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
 632       SDOperand retops[2] = { result, the_chain };
 633
 634       result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
 635       return result;
 636     }
 637     break;
 638   }
 639   case ISD::PRE_INC:
 640   case ISD::PRE_DEC:
 641   case ISD::POST_INC:
 642   case ISD::POST_DEC:
 643   case ISD::LAST_INDEXED_MODE:
 644     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 645             "UNINDEXED\n";
 646     cerr << (unsigned) LN->getAddressingMode() << "\n";
 647     abort();
 648     /*NOTREACHED*/
 649   }
 650
 651   return SDOperand();
 652 }
 653
 654 /// Custom lower stores for CellSPU
 655 /*!
 656  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 657  within a 16-byte block, we have to generate a shuffle to insert the
 658  requested element into its place, then store the resulting block.
 659  */
 660 static SDOperand
 661 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 662   StoreSDNode *SN = cast<StoreSDNode>(Op);
 663   SDOperand Value = SN->getValue();
 664   MVT::ValueType VT = Value.getValueType();
 665   MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
 666   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 667   SDOperand the_chain = SN->getChain();
 668   //unsigned alignment = SN->getAlignment();
 669   //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
 670
 671   switch (SN->getAddressingMode()) {
 672   case ISD::UNINDEXED: {
 673     SDOperand basep = SN->getBasePtr();
 674     SDOperand ptrOp;
 675     int offset;
 676
 677     if (basep.getOpcode() == ISD::FrameIndex) {
 678       // FrameIndex nodes are always properly aligned. Really.
 679       return SDOperand();
 680     }
 681
 682     if (basep.getOpcode() == ISD::ADD) {
 683       const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
 684       assert(CN != NULL
 685              && "LowerSTORE: ISD::ADD operand 1 is not constant");
 686       offset = unsigned(CN->getValue());
 687       ptrOp = basep.getOperand(0);
 688       DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
 689                  << offset
 690                  << "\n");
 691     } else {
 692       ptrOp = basep;
 693       offset = 0;
 694     }
 695
 696     // The vector type we really want to load from the 16-byte chunk, except
 697     // in the case of MVT::i1, which has to be v16i8.
 698     unsigned vecVT, stVecVT;
 699
 700     if (StVT != MVT::i1)
 701       stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
 702     else
 703       stVecVT = MVT::v16i8;
 704     vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
 705
 706     // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
 707     // the actual dform addr offs($reg).
 708     basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
 709                         DAG.getConstant((offset & ~0xf), PtrVT));
 710
 711     // Create the 16-byte aligned vector load
 712     SDOperand alignLoad =
 713       DAG.getLoad(vecVT, the_chain, basep,
 714                   SN->getSrcValue(), SN->getSrcValueOffset(),
 715                   SN->isVolatile(), 16);
 716     the_chain = alignLoad.getValue(1);
 717
 718     LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
 719     SDOperand theValue = SN->getValue();
 720     SDOperand result;
 721
 722     if (StVT != VT
 723         && (theValue.getOpcode() == ISD::AssertZext
 724             || theValue.getOpcode() == ISD::AssertSext)) {
 725       // Drill down and get the value for zero- and sign-extended
 726       // quantities
 727       theValue = theValue.getOperand(0);
 728     }
 729
 730     SDOperand insertEltOp =
 731       DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
 732                   DAG.getNode(SPUISD::DFormAddr, PtrVT,
 733                               ptrOp,
 734                               DAG.getConstant((offset & 0xf), PtrVT)));
 735
 736     result = DAG.getNode(SPUISD::SHUFB, vecVT,
 737                          DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
 738                          alignLoad,
 739                          DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
 740
 741     result = DAG.getStore(the_chain, result, basep,
 742                           LN->getSrcValue(), LN->getSrcValueOffset(),
 743                           LN->isVolatile(), LN->getAlignment());
 744
 745     return result;
 746     /*UNREACHED*/
 747   }
 748   case ISD::PRE_INC:
 749   case ISD::PRE_DEC:
 750   case ISD::POST_INC:
 751   case ISD::POST_DEC:
 752   case ISD::LAST_INDEXED_MODE:
 753     cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 754             "UNINDEXED\n";
 755     cerr << (unsigned) SN->getAddressingMode() << "\n";
 756     abort();
 757     /*NOTREACHED*/
 758   }
 759
 760   return SDOperand();
 761 }
 762
 763 /// Generate the address of a constant pool entry.
 764 static SDOperand
 765 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 766   MVT::ValueType PtrVT = Op.getValueType();
 767   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 768   Constant *C = CP->getConstVal();
 769   SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 770   const TargetMachine &TM = DAG.getTarget();
 771   SDOperand Zero = DAG.getConstant(0, PtrVT);
 772
 773   if (TM.getRelocationModel() == Reloc::Static) {
 774     if (!ST->usingLargeMem()) {
 775       // Just return the SDOperand with the constant pool address in it.
 776       return CPI;
 777     } else {
 778       // Generate hi/lo address pair
 779       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
 780       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
 781
 782       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 783     }
 784   }
 785
 786   assert(0 &&
 787          "LowerConstantPool: Relocation model other than static not supported.");
 788   return SDOperand();
 789 }
 790
 791 static SDOperand
 792 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 793   MVT::ValueType PtrVT = Op.getValueType();
 794   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 795   SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 796   SDOperand Zero = DAG.getConstant(0, PtrVT);
 797   const TargetMachine &TM = DAG.getTarget();
 798
 799   if (TM.getRelocationModel() == Reloc::Static) {
 800     if (!ST->usingLargeMem()) {
 801       // Just return the SDOperand with the jump table address in it.
 802       return JTI;
 803     } else {
 804       // Generate hi/lo address pair
 805       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
 806       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
 807
 808       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 809     }
 810   }
 811
 812   assert(0 &&
 813          "LowerJumpTable: Relocation model other than static not supported.");
 814   return SDOperand();
 815 }
 816
 817 static SDOperand
 818 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 819   MVT::ValueType PtrVT = Op.getValueType();
 820   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 821   GlobalValue *GV = GSDN->getGlobal();
 822   SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 823   SDOperand Zero = DAG.getConstant(0, PtrVT);
 824   const TargetMachine &TM = DAG.getTarget();
 825
 826   if (TM.getRelocationModel() == Reloc::Static) {
 827     if (!ST->usingLargeMem()) {
 828       // Generate a local store address
 829       return GA;
 830     } else {
 831       // Generate hi/lo address pair
 832       SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
 833       SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
 834
 835       return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
 836     }
 837   } else {
 838     cerr << "LowerGlobalAddress: Relocation model other than static not "
 839          << "supported.\n";
 840     abort();
 841     /*NOTREACHED*/
 842   }
 843
 844   return SDOperand();
 845 }
 846
 847 //! Custom lower i64 integer constants
 848 /*!
 849  This code inserts all of the necessary juggling that needs to occur to load
 850  a 64-bit constant into a register.
 851  */
 852 static SDOperand
 853 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
 854   unsigned VT = Op.getValueType();
 855   ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
 856
 857   if (VT == MVT::i64) {
 858     SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
 859     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
 860                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
 861
 862   } else {
 863     cerr << "LowerConstant: unhandled constant type "
 864          << MVT::getValueTypeString(VT)
 865          << "\n";
 866     abort();
 867     /*NOTREACHED*/
 868   }
 869
 870   return SDOperand();
 871 }
 872
 873 //! Custom lower single precision floating point constants
 874 /*!
 875   "float" immediates can be lowered as if they were unsigned 32-bit integers.
 876   The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
 877   target description.
 878  */
 879 static SDOperand
 880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
 881   unsigned VT = Op.getValueType();
 882   ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
 883
 884   assert((FP != 0) &&
 885          "LowerConstantFP: Node is not ConstantFPSDNode");
 886
 887   if (VT == MVT::f32) {
 888     float targetConst = FP->getValueAPF().convertToFloat();
 889     return DAG.getNode(SPUISD::SFPConstant, VT,
 890                        DAG.getTargetConstantFP(targetConst, VT));
 891   } else if (VT == MVT::f64) {
 892     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 893     return DAG.getNode(ISD::BIT_CONVERT, VT,
 894                        LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
 895   }
 896
 897   return SDOperand();
 898 }
 899
 900 static SDOperand
 901 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
 902 {
 903   MachineFunction &MF = DAG.getMachineFunction();
 904   MachineFrameInfo *MFI = MF.getFrameInfo();
 905   SSARegMap *RegMap = MF.getSSARegMap();
 906   SmallVector<SDOperand, 8> ArgValues;
 907   SDOperand Root = Op.getOperand(0);
 908   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
 909
 910   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
 911   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
 912
 913   unsigned ArgOffset = SPUFrameInfo::minStackSize();
 914   unsigned ArgRegIdx = 0;
 915   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
 916
 917   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 918
 919   // Add DAG nodes to load the arguments or copy them out of registers.
 920   for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
 921     SDOperand ArgVal;
 922     bool needsLoad = false;
 923     MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
 924     unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
 925
 926     switch (ObjectVT) {
 927     default: {
 928       cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
 929            << MVT::getValueTypeString(ObjectVT)
 930            << "\n";
 931       abort();
 932     }
 933     case MVT::i8:
 934       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 935         unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
 936         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 937         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
 938         ++ArgRegIdx;
 939       } else {
 940         needsLoad = true;
 941       }
 942       break;
 943     case MVT::i16:
 944       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 945         unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
 946         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 947         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
 948         ++ArgRegIdx;
 949       } else {
 950         needsLoad = true;
 951       }
 952       break;
 953     case MVT::i32:
 954       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 955         unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
 956         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 957         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
 958         ++ArgRegIdx;
 959       } else {
 960         needsLoad = true;
 961       }
 962       break;
 963     case MVT::i64:
 964       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 965         unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
 966         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 967         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
 968         ++ArgRegIdx;
 969       } else {
 970         needsLoad = true;
 971       }
 972       break;
 973     case MVT::f32:
 974       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 975         unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
 976         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 977         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
 978         ++ArgRegIdx;
 979       } else {
 980         needsLoad = true;
 981       }
 982       break;
 983     case MVT::f64:
 984       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 985         unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
 986         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
 987         ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
 988         ++ArgRegIdx;
 989       } else {
 990         needsLoad = true;
 991       }
 992       break;
 993     case MVT::v2f64:
 994     case MVT::v4f32:
 995     case MVT::v4i32:
 996     case MVT::v8i16:
 997     case MVT::v16i8:
 998       if (!isVarArg && ArgRegIdx < NumArgRegs) {
 999         unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1000         MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001         ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1002         ++ArgRegIdx;
1003       } else {
1004         needsLoad = true;
1005       }
1006       break;
1007     }
1008
1009     // We need to load the argument to a virtual register if we determined above
1010     // that we ran out of physical registers of the appropriate type
1011     if (needsLoad) {
1012       // If the argument is actually used, emit a load from the right stack
1013       // slot.
1014       if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1015         int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1016         SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1017         ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1018       } else {
1019         // Don't emit a dead load.
1020         ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1021       }
1022
1023       ArgOffset += StackSlotSize;
1024     }
1025
1026     ArgValues.push_back(ArgVal);
1027   }
1028
1029   // If the function takes variable number of arguments, make a frame index for
1030   // the start of the first vararg value... for expansion of llvm.va_start.
1031   if (isVarArg) {
1032     VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1033                                                ArgOffset);
1034     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035     // If this function is vararg, store any remaining integer argument regs to
1036     // their spots on the stack so that they may be loaded by deferencing the
1037     // result of va_next.
1038     SmallVector<SDOperand, 8> MemOps;
1039     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1040       unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1041       MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1042       SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1043       SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1044       MemOps.push_back(Store);
1045       // Increment the address by four for the next argument to store
1046       SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1047       FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1048     }
1049     if (!MemOps.empty())
1050       Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1051   }
1052
1053   ArgValues.push_back(Root);
1054
1055   // Return the new list of results.
1056   std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1057                                     Op.Val->value_end());
1058   return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1059 }
1060
1061 /// isLSAAddress - Return the immediate to use if the specified
1062 /// value is representable as a LSA address.
1063 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1064   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1065   if (!C) return 0;
1066
1067   int Addr = C->getValue();
1068   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1069       (Addr << 14 >> 14) != Addr)
1070     return 0;  // Top 14 bits have to be sext of immediate.
1071
1072   return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1073 }
1074
1075 static
1076 SDOperand
1077 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1078   SDOperand Chain = Op.getOperand(0);
1079 #if 0
1080   bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1081   bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1082 #endif
1083   SDOperand Callee    = Op.getOperand(4);
1084   unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1085   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1086   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1087   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1088
1089   // Handy pointer type
1090   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1091
1092   // Accumulate how many bytes are to be pushed on the stack, including the
1093   // linkage area, and parameter passing area.  According to the SPU ABI,
1094   // we minimally need space for [LR] and [SP]
1095   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1096
1097   // Set up a copy of the stack pointer for use loading and storing any
1098   // arguments that may not fit in the registers available for argument
1099   // passing.
1100   SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1101
1102   // Figure out which arguments are going to go in registers, and which in
1103   // memory.
1104   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1105   unsigned ArgRegIdx = 0;
1106
1107   // Keep track of registers passing arguments
1108   std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1109   // And the arguments passed on the stack
1110   SmallVector<SDOperand, 8> MemOpChains;
1111
1112   for (unsigned i = 0; i != NumOps; ++i) {
1113     SDOperand Arg = Op.getOperand(5+2*i);
1114
1115     // PtrOff will be used to store the current argument to the stack if a
1116     // register cannot be found for it.
1117     SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1118     PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1119
1120     switch (Arg.getValueType()) {
1121     default: assert(0 && "Unexpected ValueType for argument!");
1122     case MVT::i32:
1123     case MVT::i64:
1124     case MVT::i128:
1125       if (ArgRegIdx != NumArgRegs) {
1126         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127       } else {
1128         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129         ArgOffset += StackSlotSize;
1130       }
1131       break;
1132     case MVT::f32:
1133     case MVT::f64:
1134       if (ArgRegIdx != NumArgRegs) {
1135         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136       } else {
1137         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138         ArgOffset += StackSlotSize;
1139       }
1140       break;
1141     case MVT::v4f32:
1142     case MVT::v4i32:
1143     case MVT::v8i16:
1144     case MVT::v16i8:
1145       if (ArgRegIdx != NumArgRegs) {
1146         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1147       } else {
1148         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1149         ArgOffset += StackSlotSize;
1150       }
1151       break;
1152     }
1153   }
1154
1155   // Update number of stack bytes actually used, insert a call sequence start
1156   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1157   Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1158
1159   if (!MemOpChains.empty()) {
1160     // Adjust the stack pointer for the stack arguments.
1161     Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162                         &MemOpChains[0], MemOpChains.size());
1163   }
1164
1165   // Build a sequence of copy-to-reg nodes chained together with token chain
1166   // and flag operands which copy the outgoing args into the appropriate regs.
1167   SDOperand InFlag;
1168   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169     Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170                              InFlag);
1171     InFlag = Chain.getValue(1);
1172   }
1173
1174   std::vector<MVT::ValueType> NodeTys;
1175   NodeTys.push_back(MVT::Other);   // Returns a chain
1176   NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1177
1178   SmallVector<SDOperand, 8> Ops;
1179   unsigned CallOpc = SPUISD::CALL;
1180
1181   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1182   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1183   // node so that legalize doesn't hack it.
1184   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1185     GlobalValue *GV = G->getGlobal();
1186     unsigned CalleeVT = Callee.getValueType();
1187
1188     // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1189     // style calls, otherwise, external symbols are BRASL calls.
1190     // NOTE:
1191     // This may be an unsafe assumption for JIT and really large compilation
1192     // units.
1193     if (GV->isDeclaration()) {
1194       Callee = DAG.getGlobalAddress(GV, CalleeVT);
1195     } else {
1196       Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1197                            DAG.getTargetGlobalAddress(GV, CalleeVT),
1198                            DAG.getConstant(0, PtrVT));
1199     }
1200   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201     Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202   else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1203     // If this is an absolute destination address that appears to be a legal
1204     // local store address, use the munged value.
1205     Callee = SDOperand(Dest, 0);
1206
1207   Ops.push_back(Chain);
1208   Ops.push_back(Callee);
1209
1210   // Add argument registers to the end of the list so that they are known live
1211   // into the call.
1212   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214                                   RegsToPass[i].second.getValueType()));
1215
1216   if (InFlag.Val)
1217     Ops.push_back(InFlag);
1218   Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1219   InFlag = Chain.getValue(1);
1220
1221   SDOperand ResultVals[3];
1222   unsigned NumResults = 0;
1223   NodeTys.clear();
1224
1225   // If the call has results, copy the values out of the ret val registers.
1226   switch (Op.Val->getValueType(0)) {
1227   default: assert(0 && "Unexpected ret value!");
1228   case MVT::Other: break;
1229   case MVT::i32:
1230     if (Op.Val->getValueType(1) == MVT::i32) {
1231       Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1232       ResultVals[0] = Chain.getValue(0);
1233       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1234                                  Chain.getValue(2)).getValue(1);
1235       ResultVals[1] = Chain.getValue(0);
1236       NumResults = 2;
1237       NodeTys.push_back(MVT::i32);
1238     } else {
1239       Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240       ResultVals[0] = Chain.getValue(0);
1241       NumResults = 1;
1242     }
1243     NodeTys.push_back(MVT::i32);
1244     break;
1245   case MVT::i64:
1246     Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1247     ResultVals[0] = Chain.getValue(0);
1248     NumResults = 1;
1249     NodeTys.push_back(MVT::i64);
1250     break;
1251   case MVT::f32:
1252   case MVT::f64:
1253     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1254                                InFlag).getValue(1);
1255     ResultVals[0] = Chain.getValue(0);
1256     NumResults = 1;
1257     NodeTys.push_back(Op.Val->getValueType(0));
1258     break;
1259   case MVT::v2f64:
1260   case MVT::v4f32:
1261   case MVT::v4i32:
1262   case MVT::v8i16:
1263   case MVT::v16i8:
1264     Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1265                                    InFlag).getValue(1);
1266     ResultVals[0] = Chain.getValue(0);
1267     NumResults = 1;
1268     NodeTys.push_back(Op.Val->getValueType(0));
1269     break;
1270   }
1271
1272   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1273                       DAG.getConstant(NumStackBytes, PtrVT));
1274   NodeTys.push_back(MVT::Other);
1275
1276   // If the function returns void, just return the chain.
1277   if (NumResults == 0)
1278     return Chain;
1279
1280   // Otherwise, merge everything together with a MERGE_VALUES node.
1281   ResultVals[NumResults++] = Chain;
1282   SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1283                               ResultVals, NumResults);
1284   return Res.getValue(Op.ResNo);
1285 }
1286
1287 static SDOperand
1288 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1289   SmallVector<CCValAssign, 16> RVLocs;
1290   unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1291   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1292   CCState CCInfo(CC, isVarArg, TM, RVLocs);
1293   CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1294
1295   // If this is the first return lowered for this function, add the regs to the
1296   // liveout set for the function.
1297   if (DAG.getMachineFunction().liveout_empty()) {
1298     for (unsigned i = 0; i != RVLocs.size(); ++i)
1299       DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1300   }
1301
1302   SDOperand Chain = Op.getOperand(0);
1303   SDOperand Flag;
1304
1305   // Copy the result values into the output registers.
1306   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1307     CCValAssign &VA = RVLocs[i];
1308     assert(VA.isRegLoc() && "Can only return in registers!");
1309     Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1310     Flag = Chain.getValue(1);
1311   }
1312
1313   if (Flag.Val)
1314     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1315   else
1316     return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 }
1318
1319
1320 //===----------------------------------------------------------------------===//
1321 // Vector related lowering:
1322 //===----------------------------------------------------------------------===//
1323
1324 static ConstantSDNode *
1325 getVecImm(SDNode *N) {
1326   SDOperand OpVal(0, 0);
1327
1328   // Check to see if this buildvec has a single non-undef value in its elements.
1329   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1330     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1331     if (OpVal.Val == 0)
1332       OpVal = N->getOperand(i);
1333     else if (OpVal != N->getOperand(i))
1334       return 0;
1335   }
1336
1337   if (OpVal.Val != 0) {
1338     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1339       return CN;
1340     }
1341   }
1342
1343   return 0; // All UNDEF: use implicit def.; not Constant node
1344 }
1345
1346 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1347 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1348 /// constant
1349 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1350                               MVT::ValueType ValueType) {
1351   if (ConstantSDNode *CN = getVecImm(N)) {
1352     uint64_t Value = CN->getValue();
1353     if (Value <= 0x3ffff)
1354       return DAG.getConstant(Value, ValueType);
1355   }
1356
1357   return SDOperand();
1358 }
1359
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1362 /// constant
1363 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1364                               MVT::ValueType ValueType) {
1365   if (ConstantSDNode *CN = getVecImm(N)) {
1366     if (ValueType == MVT::i32) {
1367       int Value = (int) CN->getValue();
1368       int SExtValue = ((Value & 0xffff) << 16) >> 16;
1369
1370       if (Value == SExtValue)
1371         return DAG.getConstant(Value, ValueType);
1372     } else if (ValueType == MVT::i16) {
1373       short Value = (short) CN->getValue();
1374       int SExtValue = ((int) Value << 16) >> 16;
1375
1376       if (Value == (short) SExtValue)
1377         return DAG.getConstant(Value, ValueType);
1378     } else if (ValueType == MVT::i64) {
1379       int64_t Value = CN->getValue();
1380       int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1381
1382       if (Value == SExtValue)
1383         return DAG.getConstant(Value, ValueType);
1384     }
1385   }
1386
1387   return SDOperand();
1388 }
1389
1390 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1391 /// and the value fits into a signed 10-bit constant, and if so, return the
1392 /// constant
1393 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1394                               MVT::ValueType ValueType) {
1395   if (ConstantSDNode *CN = getVecImm(N)) {
1396     int Value = (int) CN->getValue();
1397     if ((ValueType == MVT::i32 && isS10Constant(Value))
1398         || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1399       return DAG.getConstant(Value, ValueType);
1400   }
1401
1402   return SDOperand();
1403 }
1404
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1407 /// constant.
1408 ///
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1411 /// same value.
1412 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1413                              MVT::ValueType ValueType) {
1414   if (ConstantSDNode *CN = getVecImm(N)) {
1415     int Value = (int) CN->getValue();
1416     if (ValueType == MVT::i16
1417         && Value <= 0xffff                 /* truncated from uint64_t */
1418         && ((short) Value >> 8) == ((short) Value & 0xff))
1419       return DAG.getConstant(Value & 0xff, ValueType);
1420     else if (ValueType == MVT::i8
1421              && (Value & 0xff) == Value)
1422       return DAG.getConstant(Value, ValueType);
1423   }
1424
1425   return SDOperand();
1426 }
1427
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1430 /// constant
1431 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1432                                MVT::ValueType ValueType) {
1433   if (ConstantSDNode *CN = getVecImm(N)) {
1434     uint64_t Value = CN->getValue();
1435     if ((ValueType == MVT::i32
1436           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438       return DAG.getConstant(Value >> 16, ValueType);
1439   }
1440
1441   return SDOperand();
1442 }
1443
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446   if (ConstantSDNode *CN = getVecImm(N)) {
1447     return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1448   }
1449
1450   return SDOperand();
1451 }
1452
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455   if (ConstantSDNode *CN = getVecImm(N)) {
1456     return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1457   }
1458
1459   return SDOperand();
1460 }
1461
1462 // If this is a vector of constants or undefs, get the bits.  A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1465 // zero.   Return true if this is not an array of constants, false if it is.
1466 //
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468                                        uint64_t UndefBits[2]) {
1469   // Start with zero'd results.
1470   VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1471
1472   unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1473   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474     SDOperand OpVal = BV->getOperand(i);
1475
1476     unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1477     unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1478
1479     uint64_t EltBits = 0;
1480     if (OpVal.getOpcode() == ISD::UNDEF) {
1481       uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482       UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1483       continue;
1484     } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485       EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1486     } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487       const APFloat &apf = CN->getValueAPF();
1488       EltBits = (CN->getValueType(0) == MVT::f32
1489                  ? FloatToBits(apf.convertToFloat())
1490                  : DoubleToBits(apf.convertToDouble()));
1491     } else {
1492       // Nonconstant element.
1493       return true;
1494     }
1495
1496     VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1497   }
1498
1499   //printf("%llx %llx  %llx %llx\n",
1500   //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1501   return false;
1502 }
1503
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it.  For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509                             const uint64_t Undef128[2],
1510                             int MinSplatBits,
1511                             uint64_t &SplatBits, uint64_t &SplatUndef,
1512                             int &SplatSize) {
1513   // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1514   // the same as the lower 64-bits, ignoring undefs.
1515   uint64_t Bits64  = Bits128[0] | Bits128[1];
1516   uint64_t Undef64 = Undef128[0] & Undef128[1];
1517   uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518   uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519   uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1520   uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1521
1522   if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523     if (MinSplatBits < 64) {
1524
1525       // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1526       // undefs.
1527       if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528         if (MinSplatBits < 32) {
1529
1530           // If the top 16-bits are different than the lower 16-bits, ignoring
1531           // undefs, we have an i32 splat.
1532           if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533             if (MinSplatBits < 16) {
1534               // If the top 8-bits are different than the lower 8-bits, ignoring
1535               // undefs, we have an i16 splat.
1536               if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1537                 // Otherwise, we have an 8-bit splat.
1538                 SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1539                 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1540                 SplatSize = 1;
1541                 return true;
1542               }
1543             } else {
1544               SplatBits = Bits16;
1545               SplatUndef = Undef16;
1546               SplatSize = 2;
1547               return true;
1548             }
1549           }
1550         } else {
1551           SplatBits = Bits32;
1552           SplatUndef = Undef32;
1553           SplatSize = 4;
1554           return true;
1555         }
1556       }
1557     } else {
1558       SplatBits = Bits128[0];
1559       SplatUndef = Undef128[0];
1560       SplatSize = 8;
1561       return true;
1562     }
1563   }
1564
1565   return false;  // Can't be a splat if two pieces don't match.
1566 }
1567
1568 // If this is a case we can't handle, return null and let the default
1569 // expansion code take care of it.  If we CAN select this case, and if it
1570 // selects to a single instruction, return Op.  Otherwise, if we can codegen
1571 // this case more efficiently than a constant pool load, lower it to the
1572 // sequence of ops that should be used.
1573 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1574   MVT::ValueType VT = Op.getValueType();
1575   // If this is a vector of constants or undefs, get the bits.  A bit in
1576   // UndefBits is set if the corresponding element of the vector is an
1577   // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1578   // zero.
1579   uint64_t VectorBits[2];
1580   uint64_t UndefBits[2];
1581   uint64_t SplatBits, SplatUndef;
1582   int SplatSize;
1583   if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1584       || !isConstantSplat(VectorBits, UndefBits,
1585                           MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1586                           SplatBits, SplatUndef, SplatSize))
1587     return SDOperand();   // Not a constant vector, not a splat.
1588
1589   switch (VT) {
1590   default:
1591   case MVT::v4f32: {
1592     uint32_t Value32 = SplatBits;
1593     assert(SplatSize == 4
1594            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596     SDOperand T = DAG.getConstant(Value32, MVT::i32);
1597     return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1599     break;
1600   }
1601   case MVT::v2f64: {
1602     uint64_t f64val = SplatBits;
1603     assert(SplatSize == 8
1604            && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1605     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606     SDOperand T = DAG.getConstant(f64val, MVT::i64);
1607     return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1609     break;
1610   }
1611   case MVT::v16i8: {
1612    // 8-bit constants have to be expanded to 16-bits
1613    unsigned short Value16 = SplatBits | (SplatBits << 8);
1614    SDOperand Ops[8];
1615    for (int i = 0; i < 8; ++i)
1616      Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617    return DAG.getNode(ISD::BIT_CONVERT, VT,
1618                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1619   }
1620   case MVT::v8i16: {
1621     unsigned short Value16;
1622     if (SplatSize == 2)
1623       Value16 = (unsigned short) (SplatBits & 0xffff);
1624     else
1625       Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626     SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1627     SDOperand Ops[8];
1628     for (int i = 0; i < 8; ++i) Ops[i] = T;
1629     return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1630   }
1631   case MVT::v4i32: {
1632     unsigned int Value = SplatBits;
1633     SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1634     return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1635   }
1636   case MVT::v2i64: {
1637     uint64_t val = SplatBits;
1638     uint32_t upper = uint32_t(val >> 32);
1639     uint32_t lower = uint32_t(val);
1640
1641     if (val != 0) {
1642       SDOperand LO32;
1643       SDOperand HI32;
1644       SmallVector<SDOperand, 16> ShufBytes;
1645       SDOperand Result;
1646       bool upper_special, lower_special;
1647
1648       // NOTE: This code creates common-case shuffle masks that can be easily
1649       // detected as common expressions. It is not attempting to create highly
1650       // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1651
1652       // Detect if the upper or lower half is a special shuffle mask pattern:
1653       upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1654       lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1655
1656       // Create lower vector if not a special pattern
1657       if (!lower_special) {
1658         SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1659         LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1660                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1661                                        LO32C, LO32C, LO32C, LO32C));
1662       }
1663
1664       // Create upper vector if not a special pattern
1665       if (!upper_special) {
1666         SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1667         HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669                                        HI32C, HI32C, HI32C, HI32C));
1670       }
1671
1672       // If either upper or lower are special, then the two input operands are
1673       // the same (basically, one of them is a "don't care")
1674       if (lower_special)
1675         LO32 = HI32;
1676       if (upper_special)
1677         HI32 = LO32;
1678       if (lower_special && upper_special) {
1679         // Unhappy situation... both upper and lower are special, so punt with
1680         // a target constant:
1681         SDOperand Zero = DAG.getConstant(0, MVT::i32);
1682         HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1683                                   Zero, Zero);
1684       }
1685
1686       for (int i = 0; i < 4; ++i) {
1687         for (int j = 0; j < 4; ++j) {
1688           SDOperand V;
1689           bool process_upper, process_lower;
1690           uint64_t val = 0;
1691
1692           process_upper = (upper_special && (i & 1) == 0);
1693           process_lower = (lower_special && (i & 1) == 1);
1694
1695           if (process_upper || process_lower) {
1696             if ((process_upper && upper == 0)
1697                 || (process_lower && lower == 0))
1698               val = 0x80;
1699             else if ((process_upper && upper == 0xffffffff)
1700                      || (process_lower && lower == 0xffffffff))
1701               val = 0xc0;
1702             else if ((process_upper && upper == 0x80000000)
1703                      || (process_lower && lower == 0x80000000))
1704               val = (j == 0 ? 0xe0 : 0x80);
1705           } else
1706             val = i * 4 + j + ((i & 1) * 16);
1707
1708           ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1709         }
1710       }
1711
1712       return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1713                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1714                                      &ShufBytes[0], ShufBytes.size()));
1715     } else {
1716       // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1717       SDOperand Zero = DAG.getConstant(0, MVT::i32);
1718       return DAG.getNode(ISD::BIT_CONVERT, VT,
1719                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720                                      Zero, Zero, Zero, Zero));
1721     }
1722   }
1723   }
1724
1725   return SDOperand();
1726 }
1727
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1735 /// \note
1736 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1739 /// \note
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1742   SDOperand V1 = Op.getOperand(0);
1743   SDOperand V2 = Op.getOperand(1);
1744   SDOperand PermMask = Op.getOperand(2);
1745
1746   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1747
1748   // If we have a single element being moved from V1 to V2, this can be handled
1749   // using the C*[DX] compute mask instructions, but the vector elements have
1750   // to be monotonically increasing with one exception element.
1751   MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1752   unsigned EltsFromV2 = 0;
1753   unsigned V2Elt = 0;
1754   unsigned V2EltIdx0 = 0;
1755   unsigned CurrElt = 0;
1756   bool monotonic = true;
1757   if (EltVT == MVT::i8)
1758     V2EltIdx0 = 16;
1759   else if (EltVT == MVT::i16)
1760     V2EltIdx0 = 8;
1761   else if (EltVT == MVT::i32)
1762     V2EltIdx0 = 4;
1763   else
1764     assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1765
1766   for (unsigned i = 0, e = PermMask.getNumOperands();
1767        EltsFromV2 <= 1 && monotonic && i != e;
1768        ++i) {
1769     unsigned SrcElt;
1770     if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1771       SrcElt = 0;
1772     else
1773       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1774
1775     if (SrcElt >= V2EltIdx0) {
1776       ++EltsFromV2;
1777       V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778     } else if (CurrElt != SrcElt) {
1779       monotonic = false;
1780     }
1781
1782     ++CurrElt;
1783   }
1784
1785   if (EltsFromV2 == 1 && monotonic) {
1786     // Compute mask and shuffle
1787     MachineFunction &MF = DAG.getMachineFunction();
1788     SSARegMap *RegMap = MF.getSSARegMap();
1789     unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1790     MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791     // Initialize temporary register to 0
1792     SDOperand InitTempReg =
1793       DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794     // Copy register's contents as index in INSERT_MASK:
1795     SDOperand ShufMaskOp =
1796       DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1797                   DAG.getTargetConstant(V2Elt, MVT::i32),
1798                   DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799     // Use shuffle mask in SHUFB synthetic instruction:
1800     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1801   } else {
1802     // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1803     unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1804
1805     SmallVector<SDOperand, 16> ResultMask;
1806     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1807       unsigned SrcElt;
1808       if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1809         SrcElt = 0;
1810       else
1811         SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1812
1813       for (unsigned j = 0; j != BytesPerElement; ++j) {
1814         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1815                                              MVT::i8));
1816       }
1817     }
1818
1819     SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1820                                       &ResultMask[0], ResultMask.size());
1821     return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1822   }
1823 }
1824
1825 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1826   SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1827
1828   if (Op0.Val->getOpcode() == ISD::Constant) {
1829     // For a constant, build the appropriate constant vector, which will
1830     // eventually simplify to a vector register load.
1831
1832     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1833     SmallVector<SDOperand, 16> ConstVecValues;
1834     MVT::ValueType VT;
1835     size_t n_copies;
1836
1837     // Create a constant vector:
1838     switch (Op.getValueType()) {
1839     default: assert(0 && "Unexpected constant value type in "
1840                          "LowerSCALAR_TO_VECTOR");
1841     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1842     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1843     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1844     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1845     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1846     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1847     }
1848
1849     SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1850     for (size_t j = 0; j < n_copies; ++j)
1851       ConstVecValues.push_back(CValue);
1852
1853     return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1854                        &ConstVecValues[0], ConstVecValues.size());
1855   } else {
1856     // Otherwise, copy the value from one register to another:
1857     switch (Op0.getValueType()) {
1858     default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1859     case MVT::i8:
1860     case MVT::i16:
1861     case MVT::i32:
1862     case MVT::i64:
1863     case MVT::f32:
1864     case MVT::f64:
1865       return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1866     }
1867   }
1868
1869   return SDOperand();
1870 }
1871
1872 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1873   switch (Op.getValueType()) {
1874   case MVT::v4i32: {
1875     SDOperand rA = Op.getOperand(0);
1876     SDOperand rB = Op.getOperand(1);
1877     SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1878     SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1879     SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1880     SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1881
1882     return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1883     break;
1884   }
1885
1886   // Multiply two v8i16 vectors (pipeline friendly version):
1887   // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1888   // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1889   // c) Use SELB to select upper and lower halves from the intermediate results
1890   //
1891   // NOTE: We really want to move the FSMBI to earlier to actually get the
1892   // dual-issue. This code does manage to do this, even if it's a little on
1893   // the wacky side
1894   case MVT::v8i16: {
1895     MachineFunction &MF = DAG.getMachineFunction();
1896     SSARegMap *RegMap = MF.getSSARegMap();
1897     SDOperand Chain = Op.getOperand(0);
1898     SDOperand rA = Op.getOperand(0);
1899     SDOperand rB = Op.getOperand(1);
1900     unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1901     unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1902
1903     SDOperand FSMBOp =
1904       DAG.getCopyToReg(Chain, FSMBIreg,
1905                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1906                                    DAG.getConstant(0xcccc, MVT::i32)));
1907
1908     SDOperand HHProd =
1909       DAG.getCopyToReg(FSMBOp, HiProdReg,
1910                        DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1911
1912     SDOperand HHProd_v4i32 =
1913       DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1914                   DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1915
1916     return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1917                        DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1918                        DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1919                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1920                                                HHProd_v4i32,
1921                                                DAG.getConstant(16, MVT::i16))),
1922                        DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1923   }
1924
1925   // This M00sE is N@stI! (apologies to Monty Python)
1926   //
1927   // SPU doesn't know how to do any 8-bit multiplication, so the solution
1928   // is to break it all apart, sign extend, and reassemble the various
1929   // intermediate products.
1930   case MVT::v16i8: {
1931     MachineFunction &MF = DAG.getMachineFunction();
1932     SSARegMap *RegMap = MF.getSSARegMap();
1933     SDOperand Chain = Op.getOperand(0);
1934     SDOperand rA = Op.getOperand(0);
1935     SDOperand rB = Op.getOperand(1);
1936     SDOperand c8 = DAG.getConstant(8, MVT::i8);
1937     SDOperand c16 = DAG.getConstant(16, MVT::i8);
1938
1939     unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1940     unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941     unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1942
1943     SDOperand LLProd =
1944       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1945                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1946                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1947
1948     SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1949
1950     SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1951
1952     SDOperand LHProd =
1953       DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1954                   DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1955
1956     SDOperand FSMBdef_2222 =
1957       DAG.getCopyToReg(Chain, FSMBreg_2222,
1958                        DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1959                                    DAG.getConstant(0x2222, MVT::i32)));
1960
1961     SDOperand FSMBuse_2222 =
1962       DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1963
1964     SDOperand LoProd_1 =
1965       DAG.getCopyToReg(Chain, LoProd_reg,
1966                        DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1967                                    FSMBuse_2222));
1968
1969     SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1970
1971     SDOperand LoProd =
1972       DAG.getNode(ISD::AND, MVT::v4i32,
1973                   DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1974                   DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975                               LoProdMask, LoProdMask,
1976                               LoProdMask, LoProdMask));
1977
1978     SDOperand rAH =
1979       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1981
1982     SDOperand rBH =
1983       DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984                   DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1985
1986     SDOperand HLProd =
1987       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1990
1991     SDOperand HHProd_1 =
1992       DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1995                   DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1996                               DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1997
1998     SDOperand HHProd =
1999       DAG.getCopyToReg(Chain, HiProd_reg,
2000                        DAG.getNode(SPUISD::SELB, MVT::v8i16,
2001                                    HLProd,
2002                                    DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2003                                    FSMBuse_2222));
2004
2005     SDOperand HiProd =
2006       DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2007                   DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2008
2009     return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2010                        DAG.getNode(ISD::OR, MVT::v4i32,
2011                                    LoProd, HiProd));
2012   }
2013
2014   default:
2015     cerr << "CellSPU: Unknown vector multiplication, got "
2016          << MVT::getValueTypeString(Op.getValueType())
2017          << "\n";
2018     abort();
2019     /*NOTREACHED*/
2020   }
2021
2022   return SDOperand();
2023 }
2024
2025 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2026   MachineFunction &MF = DAG.getMachineFunction();
2027   SSARegMap *RegMap = MF.getSSARegMap();
2028
2029   SDOperand A = Op.getOperand(0);
2030   SDOperand B = Op.getOperand(1);
2031   unsigned VT = Op.getValueType();
2032
2033   unsigned VRegBR, VRegC;
2034
2035   if (VT == MVT::f32) {
2036     VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2037     VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2038   } else {
2039     VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2040     VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2041   }
2042   // TODO: make sure we're feeding FPInterp the right arguments
2043   // Right now: fi B, frest(B)
2044
2045   // Computes BRcpl =
2046   // (Floating Interpolate (FP Reciprocal Estimate B))
2047   SDOperand BRcpl =
2048       DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2049                        DAG.getNode(SPUISD::FPInterp, VT, B,
2050                                 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2051
2052   // Computes A * BRcpl and stores in a temporary register
2053   SDOperand AxBRcpl =
2054       DAG.getCopyToReg(BRcpl, VRegC,
2055                  DAG.getNode(ISD::FMUL, VT, A,
2056                         DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2057   // What's the Chain variable do? It's magic!
2058   // TODO: set Chain = Op(0).getEntryNode()
2059
2060   return DAG.getNode(ISD::FADD, VT,
2061                 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2062                 DAG.getNode(ISD::FMUL, VT,
2063                         DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2064                         DAG.getNode(ISD::FSUB, VT, A,
2065                             DAG.getNode(ISD::FMUL, VT, B,
2066                             DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2067 }
2068
2069 // Expands double-precision FDIV
2070 // Expects two doubles as inputs X and Y, does a floating point
2071 // reciprocal estimate, and three iterations of Newton-Raphson
2072 // to increase accuracy.
2073 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2074 //  MachineFunction &MF = DAG.getMachineFunction();
2075 //  SSARegMap *RegMap = MF.getSSARegMap();
2076 //
2077 //  SDOperand X = Op.getOperand(0);
2078 //  SDOperand Y = Op.getOperand(1);
2079 //}
2080
2081 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2082   unsigned VT = Op.getValueType();
2083   SDOperand N = Op.getOperand(0);
2084   SDOperand Elt = Op.getOperand(1);
2085   SDOperand ShufMask[16];
2086   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2087
2088   assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2089
2090   int EltNo = (int) C->getValue();
2091
2092   // sanity checks:
2093   if (VT == MVT::i8 && EltNo >= 16)
2094     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2095   else if (VT == MVT::i16 && EltNo >= 8)
2096     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2097   else if (VT == MVT::i32 && EltNo >= 4)
2098     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2099   else if (VT == MVT::i64 && EltNo >= 2)
2100     assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2101
2102   if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2103     // i32 and i64: Element 0 is the preferred slot
2104     return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2105   }
2106
2107   // Need to generate shuffle mask and extract:
2108   int prefslot_begin = -1, prefslot_end = -1;
2109   int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2110
2111   switch (VT) {
2112   case MVT::i8: {
2113     prefslot_begin = prefslot_end = 3;
2114     break;
2115   }
2116   case MVT::i16: {
2117     prefslot_begin = 2; prefslot_end = 3;
2118     break;
2119   }
2120   case MVT::i32: {
2121     prefslot_begin = 0; prefslot_end = 3;
2122     break;
2123   }
2124   case MVT::i64: {
2125     prefslot_begin = 0; prefslot_end = 7;
2126     break;
2127   }
2128   }
2129
2130   assert(prefslot_begin != -1 && prefslot_end != -1 &&
2131          "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2132
2133   for (int i = 0; i < 16; ++i) {
2134     // zero fill uppper part of preferred slot, don't care about the
2135     // other slots:
2136     unsigned int mask_val;
2137
2138     if (i <= prefslot_end) {
2139       mask_val =
2140         ((i < prefslot_begin)
2141          ? 0x80
2142          : elt_byte + (i - prefslot_begin));
2143
2144       ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2145     } else
2146       ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2147   }
2148
2149   SDOperand ShufMaskVec =
2150     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2151                 &ShufMask[0],
2152                 sizeof(ShufMask) / sizeof(ShufMask[0]));
2153
2154   return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2155                      DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2156                                  N, N, ShufMaskVec));
2157
2158 }
2159
2160 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2161   SDOperand VecOp = Op.getOperand(0);
2162   SDOperand ValOp = Op.getOperand(1);
2163   SDOperand IdxOp = Op.getOperand(2);
2164   MVT::ValueType VT = Op.getValueType();
2165
2166   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2167   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2168
2169   MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2170   // Use $2 because it's always 16-byte aligned and it's available:
2171   SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2172
2173   SDOperand result =
2174     DAG.getNode(SPUISD::SHUFB, VT,
2175                 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2176                 VecOp,
2177                 DAG.getNode(SPUISD::INSERT_MASK, VT,
2178                             DAG.getNode(ISD::ADD, PtrVT,
2179                                         PtrBase,
2180                                         DAG.getConstant(CN->getValue(),
2181                                                         PtrVT))));
2182
2183   return result;
2184 }
2185
2186 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2187   SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2188
2189   assert(Op.getValueType() == MVT::i8);
2190   switch (Opc) {
2191   default:
2192     assert(0 && "Unhandled i8 math operator");
2193     /*NOTREACHED*/
2194     break;
2195   case ISD::SUB: {
2196     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2197     // the result:
2198     SDOperand N1 = Op.getOperand(1);
2199     N0 = (N0.getOpcode() != ISD::Constant
2200           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2201           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2202     N1 = (N1.getOpcode() != ISD::Constant
2203           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2204           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2205     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2206                        DAG.getNode(Opc, MVT::i16, N0, N1));
2207   }
2208   case ISD::ROTR:
2209   case ISD::ROTL: {
2210     SDOperand N1 = Op.getOperand(1);
2211     unsigned N1Opc;
2212     N0 = (N0.getOpcode() != ISD::Constant
2213           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2214           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2215     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2216     N1 = (N1.getOpcode() != ISD::Constant
2217           ? DAG.getNode(N1Opc, MVT::i16, N1)
2218           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2219     SDOperand ExpandArg =
2220       DAG.getNode(ISD::OR, MVT::i16, N0,
2221                   DAG.getNode(ISD::SHL, MVT::i16,
2222                               N0, DAG.getConstant(8, MVT::i16)));
2223     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224                        DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2225   }
2226   case ISD::SRL:
2227   case ISD::SHL: {
2228     SDOperand N1 = Op.getOperand(1);
2229     unsigned N1Opc;
2230     N0 = (N0.getOpcode() != ISD::Constant
2231           ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2232           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2233     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2234     N1 = (N1.getOpcode() != ISD::Constant
2235           ? DAG.getNode(N1Opc, MVT::i16, N1)
2236           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2237     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238                        DAG.getNode(Opc, MVT::i16, N0, N1));
2239   }
2240   case ISD::SRA: {
2241     SDOperand N1 = Op.getOperand(1);
2242     unsigned N1Opc;
2243     N0 = (N0.getOpcode() != ISD::Constant
2244           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2245           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2246     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2247     N1 = (N1.getOpcode() != ISD::Constant
2248           ? DAG.getNode(N1Opc, MVT::i16, N1)
2249           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2250     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2251                        DAG.getNode(Opc, MVT::i16, N0, N1));
2252   }
2253   case ISD::MUL: {
2254     SDOperand N1 = Op.getOperand(1);
2255     unsigned N1Opc;
2256     N0 = (N0.getOpcode() != ISD::Constant
2257           ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2258           : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2259     N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2260     N1 = (N1.getOpcode() != ISD::Constant
2261           ? DAG.getNode(N1Opc, MVT::i16, N1)
2262           : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2263     return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2264                        DAG.getNode(Opc, MVT::i16, N0, N1));
2265     break;
2266   }
2267   }
2268
2269   return SDOperand();
2270 }
2271
2272 //! Lower byte immediate operations for v16i8 vectors:
2273 static SDOperand
2274 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2275   SDOperand ConstVec;
2276   SDOperand Arg;
2277   MVT::ValueType VT = Op.getValueType();
2278
2279   ConstVec = Op.getOperand(0);
2280   Arg = Op.getOperand(1);
2281   if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2282     if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2283       ConstVec = ConstVec.getOperand(0);
2284     } else {
2285       ConstVec = Op.getOperand(1);
2286       Arg = Op.getOperand(0);
2287       if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2288         ConstVec = ConstVec.getOperand(0);
2289       }
2290     }
2291   }
2292
2293   if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2294     uint64_t VectorBits[2];
2295     uint64_t UndefBits[2];
2296     uint64_t SplatBits, SplatUndef;
2297     int SplatSize;
2298
2299     if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2300         && isConstantSplat(VectorBits, UndefBits,
2301                            MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2302                            SplatBits, SplatUndef, SplatSize)) {
2303       SDOperand tcVec[16];
2304       SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2305       const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2306
2307       // Turn the BUILD_VECTOR into a set of target constants:
2308       for (size_t i = 0; i < tcVecSize; ++i)
2309         tcVec[i] = tc;
2310
2311       return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2312                          DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2313     }
2314   }
2315
2316   return SDOperand();
2317 }
2318
2319 //! Lower i32 multiplication
2320 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2321                           unsigned Opc) {
2322   switch (VT) {
2323   default:
2324     cerr << "CellSPU: Unknown LowerMUL value type, got "
2325          << MVT::getValueTypeString(Op.getValueType())
2326          << "\n";
2327     abort();
2328     /*NOTREACHED*/
2329
2330   case MVT::i32: {
2331     SDOperand rA = Op.getOperand(0);
2332     SDOperand rB = Op.getOperand(1);
2333
2334     return DAG.getNode(ISD::ADD, MVT::i32,
2335                        DAG.getNode(ISD::ADD, MVT::i32,
2336                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2337                                    DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2338                        DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2339   }
2340   }
2341
2342   return SDOperand();
2343 }
2344
2345 //! Custom lowering for CTPOP (count population)
2346 /*!
2347   Custom lowering code that counts the number ones in the input
2348   operand. SPU has such an instruction, but it counts the number of
2349   ones per byte, which then have to be accumulated.
2350 */
2351 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2352   unsigned VT = Op.getValueType();
2353   unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2354
2355   switch (VT) {
2356   case MVT::i8: {
2357     SDOperand N = Op.getOperand(0);
2358     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2359
2360     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2361     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2362
2363     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2364   }
2365
2366   case MVT::i16: {
2367     MachineFunction &MF = DAG.getMachineFunction();
2368     SSARegMap *RegMap = MF.getSSARegMap();
2369
2370     unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2371
2372     SDOperand N = Op.getOperand(0);
2373     SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2374     SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2375     SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2376
2377     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2378     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2379
2380     // CNTB_result becomes the chain to which all of the virtual registers
2381     // CNTB_reg, SUM1_reg become associated:
2382     SDOperand CNTB_result =
2383       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2384
2385     SDOperand CNTB_rescopy =
2386       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2387
2388     SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2389
2390     return DAG.getNode(ISD::AND, MVT::i16,
2391                        DAG.getNode(ISD::ADD, MVT::i16,
2392                                    DAG.getNode(ISD::SRL, MVT::i16,
2393                                                Tmp1, Shift1),
2394                                    Tmp1),
2395                        Mask0);
2396   }
2397
2398   case MVT::i32: {
2399     MachineFunction &MF = DAG.getMachineFunction();
2400     SSARegMap *RegMap = MF.getSSARegMap();
2401
2402     unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2403     unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2404
2405     SDOperand N = Op.getOperand(0);
2406     SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2407     SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2408     SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2409     SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2410
2411     SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2412     SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2413
2414     // CNTB_result becomes the chain to which all of the virtual registers
2415     // CNTB_reg, SUM1_reg become associated:
2416     SDOperand CNTB_result =
2417       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2418
2419     SDOperand CNTB_rescopy =
2420       DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2421
2422     SDOperand Comp1 =
2423       DAG.getNode(ISD::SRL, MVT::i32,
2424                   DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2425
2426     SDOperand Sum1 =
2427       DAG.getNode(ISD::ADD, MVT::i32,
2428                   Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2429
2430     SDOperand Sum1_rescopy =
2431       DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2432
2433     SDOperand Comp2 =
2434       DAG.getNode(ISD::SRL, MVT::i32,
2435                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2436                   Shift2);
2437     SDOperand Sum2 =
2438       DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2439                   DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2440
2441     return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2442   }
2443
2444   case MVT::i64:
2445     break;
2446   }
2447
2448   return SDOperand();
2449 }
2450
2451 /// LowerOperation - Provide custom lowering hooks for some operations.
2452 ///
2453 SDOperand
2454 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2455 {
2456   switch (Op.getOpcode()) {
2457   default: {
2458     cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2459     cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2460     cerr << "*Op.Val:\n";
2461     Op.Val->dump();
2462     abort();
2463   }
2464   case ISD::LOAD:
2465   case ISD::SEXTLOAD:
2466   case ISD::ZEXTLOAD:
2467     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2468   case ISD::STORE:
2469     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2470   case ISD::ConstantPool:
2471     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2472   case ISD::GlobalAddress:
2473     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2474   case ISD::JumpTable:
2475     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2476   case ISD::Constant:
2477     return LowerConstant(Op, DAG);
2478   case ISD::ConstantFP:
2479     return LowerConstantFP(Op, DAG);
2480   case ISD::FORMAL_ARGUMENTS:
2481       return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2482   case ISD::CALL:
2483     return LowerCALL(Op, DAG);
2484   case ISD::RET:
2485     return LowerRET(Op, DAG, getTargetMachine());
2486
2487   // i8 math ops:
2488   case ISD::SUB:
2489   case ISD::ROTR:
2490   case ISD::ROTL:
2491   case ISD::SRL:
2492   case ISD::SHL:
2493   case ISD::SRA:
2494     return LowerI8Math(Op, DAG, Op.getOpcode());
2495
2496   // Vector-related lowering.
2497   case ISD::BUILD_VECTOR:
2498     return LowerBUILD_VECTOR(Op, DAG);
2499   case ISD::SCALAR_TO_VECTOR:
2500     return LowerSCALAR_TO_VECTOR(Op, DAG);
2501   case ISD::VECTOR_SHUFFLE:
2502     return LowerVECTOR_SHUFFLE(Op, DAG);
2503   case ISD::EXTRACT_VECTOR_ELT:
2504     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2505   case ISD::INSERT_VECTOR_ELT:
2506     return LowerINSERT_VECTOR_ELT(Op, DAG);
2507
2508   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2509   case ISD::AND:
2510   case ISD::OR:
2511   case ISD::XOR:
2512     return LowerByteImmed(Op, DAG);
2513
2514   // Vector and i8 multiply:
2515   case ISD::MUL:
2516     if (MVT::isVector(Op.getValueType()))
2517       return LowerVectorMUL(Op, DAG);
2518     else if (Op.getValueType() == MVT::i8)
2519       return LowerI8Math(Op, DAG, Op.getOpcode());
2520     else
2521       return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2522
2523   case ISD::FDIV:
2524     if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2525       return LowerFDIVf32(Op, DAG);
2526 //    else if (Op.getValueType() == MVT::f64)
2527 //      return LowerFDIVf64(Op, DAG);
2528     else
2529       assert(0 && "Calling FDIV on unsupported MVT");
2530
2531   case ISD::CTPOP:
2532     return LowerCTPOP(Op, DAG);
2533   }
2534
2535   return SDOperand();
2536 }
2537
2538 //===----------------------------------------------------------------------===//
2539 //  Other Lowering Code
2540 //===----------------------------------------------------------------------===//
2541
2542 MachineBasicBlock *
2543 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2544                                            MachineBasicBlock *BB)
2545 {
2546   return BB;
2547 }
2548
2549 //===----------------------------------------------------------------------===//
2550 // Target Optimization Hooks
2551 //===----------------------------------------------------------------------===//
2552
2553 SDOperand
2554 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2555 {
2556 #if 0
2557   TargetMachine &TM = getTargetMachine();
2558   SelectionDAG &DAG = DCI.DAG;
2559 #endif
2560   SDOperand N0 = N->getOperand(0);      // everything has at least one operand
2561
2562   switch (N->getOpcode()) {
2563   default: break;
2564
2565   // Look for obvious optimizations for shift left:
2566   // a) Replace 0 << V with 0
2567   // b) Replace V << 0 with V
2568   //
2569   // N.B: llvm will generate an undef node if the shift amount is greater than
2570   // 15 (e.g.: V << 16), which will naturally trigger an assert.
2571   case SPU::SHLIr32:
2572   case SPU::SHLHIr16:
2573   case SPU::SHLQBIIvec:
2574   case SPU::ROTHIr16:
2575   case SPU::ROTHIr16_i32:
2576   case SPU::ROTIr32:
2577   case SPU::ROTIr32_i16:
2578   case SPU::ROTQBYIvec:
2579   case SPU::ROTQBYBIvec:
2580   case SPU::ROTQBIIvec:
2581   case SPU::ROTHMIr16:
2582   case SPU::ROTMIr32:
2583   case SPU::ROTQMBYIvec: {
2584     if (N0.getOpcode() == ISD::Constant) {
2585       if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2586         if (C->getValue() == 0)         // 0 << V -> 0.
2587           return N0;
2588       }
2589     }
2590     SDOperand N1 = N->getOperand(1);
2591     if (N1.getOpcode() == ISD::Constant) {
2592       if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2593         if (C->getValue() == 0)         // V << 0 -> V
2594           return N1;
2595       }
2596     }
2597     break;
2598   }
2599   }
2600
2601   return SDOperand();
2602 }
2603
2604 //===----------------------------------------------------------------------===//
2605 // Inline Assembly Support
2606 //===----------------------------------------------------------------------===//
2607
2608 /// getConstraintType - Given a constraint letter, return the type of
2609 /// constraint it is for this target.
2610 SPUTargetLowering::ConstraintType
2611 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2612   if (ConstraintLetter.size() == 1) {
2613     switch (ConstraintLetter[0]) {
2614     default: break;
2615     case 'b':
2616     case 'r':
2617     case 'f':
2618     case 'v':
2619     case 'y':
2620       return C_RegisterClass;
2621     }
2622   }
2623   return TargetLowering::getConstraintType(ConstraintLetter);
2624 }
2625
2626 std::pair<unsigned, const TargetRegisterClass*>
2627 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2628                                                 MVT::ValueType VT) const
2629 {
2630   if (Constraint.size() == 1) {
2631     // GCC RS6000 Constraint Letters
2632     switch (Constraint[0]) {
2633     case 'b':   // R1-R31
2634     case 'r':   // R0-R31
2635       if (VT == MVT::i64)
2636         return std::make_pair(0U, SPU::R64CRegisterClass);
2637       return std::make_pair(0U, SPU::R32CRegisterClass);
2638     case 'f':
2639       if (VT == MVT::f32)
2640         return std::make_pair(0U, SPU::R32FPRegisterClass);
2641       else if (VT == MVT::f64)
2642         return std::make_pair(0U, SPU::R64FPRegisterClass);
2643       break;
2644     case 'v':
2645       return std::make_pair(0U, SPU::GPRCRegisterClass);
2646     }
2647   }
2648
2649   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2650 }
2651
2652 void
2653 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2654                                                   uint64_t Mask,
2655                                                   uint64_t &KnownZero,
2656                                                   uint64_t &KnownOne,
2657                                                   const SelectionDAG &DAG,
2658                                                   unsigned Depth ) const {
2659   KnownZero = 0;
2660   KnownOne = 0;
2661 }
2662
2663 // LowerAsmOperandForConstraint
2664 void
2665 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2666                                                 char ConstraintLetter,
2667                                                 std::vector<SDOperand> &Ops,
2668                                                 SelectionDAG &DAG) {
2669   // Default, for the time being, to the base class handler
2670   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2671 }
2672
2673 /// isLegalAddressImmediate - Return true if the integer value can be used
2674 /// as the offset of the target addressing mode.
2675 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2676   // SPU's addresses are 256K:
2677   return (V > -(1 << 18) && V < (1 << 18) - 1);
2678 }
2679
2680 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2681   return false;
2682 }