lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/Target/TargetLoweringObjectFile.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG,
 122                             Op.getDebugLoc());
 123
 124     return CallInfo.first;
 125   }
 126 }
 127
 128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 129   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 130     SPUTM(TM) {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 161   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 162
 163   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 164
 165   // SPU constant load actions are custom lowered:
 166   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 167   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 168
 169   // SPU's loads and stores have to be custom lowered:
 170   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 171        ++sctype) {
 172     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 173
 174     setOperationAction(ISD::LOAD,   VT, Custom);
 175     setOperationAction(ISD::STORE,  VT, Custom);
 176     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 177     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 178     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 179
 180     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 181       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 182       setTruncStoreAction(VT, StoreVT, Expand);
 183     }
 184   }
 185
 186   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 187        ++sctype) {
 188     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 189
 190     setOperationAction(ISD::LOAD,   VT, Custom);
 191     setOperationAction(ISD::STORE,  VT, Custom);
 192
 193     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 194       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 195       setTruncStoreAction(VT, StoreVT, Expand);
 196     }
 197   }
 198
 199   // Expand the jumptable branches
 200   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 201   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 202
 203   // Custom lower SELECT_CC for most cases, but expand by default
 204   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 208   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 209
 210   // SPU has no intrinsics for these particular operations:
 211   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 212
 213   // SPU has no division/remainder instructions
 214   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 218   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 220   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 224   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 226   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 230   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 232   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 236   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 238   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 242   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 243   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 244
 245   // We don't support sin/cos/sqrt/fmod
 246   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 248   setOperationAction(ISD::FREM , MVT::f64, Expand);
 249   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 250   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 251   setOperationAction(ISD::FREM , MVT::f32, Expand);
 252
 253   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 254   // for f32!)
 255   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 257
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 259   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 260
 261   // SPU can do rotate right and left, so legalize it... but customize for i8
 262   // because instructions don't exist.
 263
 264   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 265   //        .td files.
 266   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 268   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 269
 270   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 272   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 273
 274   // SPU has no native version of shift left/right for i8
 275   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 277   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 278
 279   // Make these operations legal and handle them during instruction selection:
 280   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 282   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 283
 284   // Custom lower i8, i32 and i64 multiplications
 285   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 286   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 287   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 288
 289   // Expand double-width multiplication
 290   // FIXME: It would probably be reasonable to support some of these operations
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 303   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 305   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 306   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 307
 308   // Need to custom handle (some) common i8, i64 math ops
 309   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 310   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 311   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 312   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 313
 314   // SPU does not have BSWAP. It does have i32 support CTLZ.
 315   // CTPOP has to be custom lowered.
 316   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 317   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 318
 319   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 323   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 324
 325   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 329   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 330
 331   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 333   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 334   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 335   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 336
 337   // SPU has a version of select that implements (a&~c)|(b&c), just like
 338   // select ought to work:
 339   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 340   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 342   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 343
 344   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 345   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 348   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 349
 350   // Custom lower i128 -> i64 truncates
 351   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 352
 353   // Custom lower i32/i64 -> i128 sign extend
 354   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 355
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 360   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 361   // to expand to a libcall, hence the custom lowering:
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 366   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 367   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 368
 369   // FDIV on SPU requires custom lowering
 370   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 371
 372   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 379   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 380   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 381
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 385   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 386
 387   // We cannot sextinreg(i1).  Expand to shifts.
 388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 389
 390   // We want to legalize GlobalAddress and ConstantPool nodes into the
 391   // appropriate instructions to materialize the address.
 392   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 393        ++sctype) {
 394     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 395
 396     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 397     setOperationAction(ISD::ConstantPool,   VT, Custom);
 398     setOperationAction(ISD::JumpTable,      VT, Custom);
 399   }
 400
 401   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 402   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 403
 404   // Use the default implementation.
 405   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 406   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 407   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 408   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 409   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 410   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 411   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 412
 413   // Cell SPU has instructions for converting between i64 and fp.
 414   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 415   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 416
 417   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 418   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 419
 420   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 421   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 422
 423   // First set operation action for all vector types to expand. Then we
 424   // will selectively turn on ones that can be effectively codegen'd.
 425   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 428   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 429   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 430   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 431
 432   // "Odd size" vector classes that we're willing to support:
 433   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 434
 435   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 436        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 437     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 438
 439     // add/sub are legal for all supported vector VT's.
 440     setOperationAction(ISD::ADD,     VT, Legal);
 441     setOperationAction(ISD::SUB,     VT, Legal);
 442     // mul has to be custom lowered.
 443     setOperationAction(ISD::MUL,     VT, Legal);
 444
 445     setOperationAction(ISD::AND,     VT, Legal);
 446     setOperationAction(ISD::OR,      VT, Legal);
 447     setOperationAction(ISD::XOR,     VT, Legal);
 448     setOperationAction(ISD::LOAD,    VT, Legal);
 449     setOperationAction(ISD::SELECT,  VT, Legal);
 450     setOperationAction(ISD::STORE,   VT, Legal);
 451
 452     // These operations need to be expanded:
 453     setOperationAction(ISD::SDIV,    VT, Expand);
 454     setOperationAction(ISD::SREM,    VT, Expand);
 455     setOperationAction(ISD::UDIV,    VT, Expand);
 456     setOperationAction(ISD::UREM,    VT, Expand);
 457
 458     // Custom lower build_vector, constant pool spills, insert and
 459     // extract vector elements:
 460     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 461     setOperationAction(ISD::ConstantPool, VT, Custom);
 462     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 463     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 464     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 465     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 466   }
 467
 468   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 469   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 470   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 471   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 472
 473   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 474
 475   setShiftAmountType(MVT::i32);
 476   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 477
 478   setStackPointerRegisterToSaveRestore(SPU::R1);
 479
 480   // We have target-specific dag combine patterns for the following nodes:
 481   setTargetDAGCombine(ISD::ADD);
 482   setTargetDAGCombine(ISD::ZERO_EXTEND);
 483   setTargetDAGCombine(ISD::SIGN_EXTEND);
 484   setTargetDAGCombine(ISD::ANY_EXTEND);
 485
 486   computeRegisterProperties();
 487
 488   // Set pre-RA register scheduler default to BURR, which produces slightly
 489   // better code than the default (could also be TDRR, but TargetLowering.h
 490   // needs a mod to support that model):
 491   setSchedulingPreference(SchedulingForRegPressure);
 492 }
 493
 494 const char *
 495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 496 {
 497   if (node_names.empty()) {
 498     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 499     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 500     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 501     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 502     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 503     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 504     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 505     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 506     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 507     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 508     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 509     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 510     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 511     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 512     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 513     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 514     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 515     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 516     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 517             "SPUISD::ROTBYTES_LEFT_BITS";
 518     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 519     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 520     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 521     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 522     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 523   }
 524
 525   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 526
 527   return ((i != node_names.end()) ? i->second : 0);
 528 }
 529
 530 /// getFunctionAlignment - Return the Log2 alignment of this function.
 531 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 532   return 3;
 533 }
 534
 535 //===----------------------------------------------------------------------===//
 536 // Return the Cell SPU's SETCC result type
 537 //===----------------------------------------------------------------------===//
 538
 539 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 540   // i16 and i32 are valid SETCC result types
 541   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 542     VT.getSimpleVT().SimpleTy :
 543     MVT::i32);
 544 }
 545
 546 //===----------------------------------------------------------------------===//
 547 // Calling convention code:
 548 //===----------------------------------------------------------------------===//
 549
 550 #include "SPUGenCallingConv.inc"
 551
 552 //===----------------------------------------------------------------------===//
 553 //  LowerOperation implementation
 554 //===----------------------------------------------------------------------===//
 555
 556 /// Custom lower loads for CellSPU
 557 /*!
 558  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 559  within a 16-byte block, we have to rotate to extract the requested element.
 560
 561  For extending loads, we also want to ensure that the following sequence is
 562  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 563
 564 \verbatim
 565 %1  v16i8,ch = load
 566 %2  v16i8,ch = rotate %1
 567 %3  v4f8, ch = bitconvert %2
 568 %4  f32      = vec2perfslot %3
 569 %5  f64      = fp_extend %4
 570 \endverbatim
 571 */
 572 static SDValue
 573 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 574   LoadSDNode *LN = cast<LoadSDNode>(Op);
 575   SDValue the_chain = LN->getChain();
 576   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 577   EVT InVT = LN->getMemoryVT();
 578   EVT OutVT = Op.getValueType();
 579   ISD::LoadExtType ExtType = LN->getExtensionType();
 580   unsigned alignment = LN->getAlignment();
 581   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 582   DebugLoc dl = Op.getDebugLoc();
 583
 584   switch (LN->getAddressingMode()) {
 585   case ISD::UNINDEXED: {
 586     SDValue result;
 587     SDValue basePtr = LN->getBasePtr();
 588     SDValue rotate;
 589
 590     if (alignment == 16) {
 591       ConstantSDNode *CN;
 592
 593       // Special cases for a known aligned load to simplify the base pointer
 594       // and the rotation amount:
 595       if (basePtr.getOpcode() == ISD::ADD
 596           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 597         // Known offset into basePtr
 598         int64_t offset = CN->getSExtValue();
 599         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 600
 601         if (rotamt < 0)
 602           rotamt += 16;
 603
 604         rotate = DAG.getConstant(rotamt, MVT::i16);
 605
 606         // Simplify the base pointer for this case:
 607         basePtr = basePtr.getOperand(0);
 608         if ((offset & ~0xf) > 0) {
 609           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 610                                 basePtr,
 611                                 DAG.getConstant((offset & ~0xf), PtrVT));
 612         }
 613       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 614                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 615                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 616                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 617         // Plain aligned a-form address: rotate into preferred slot
 618         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 619         int64_t rotamt = -vtm->prefslot_byte;
 620         if (rotamt < 0)
 621           rotamt += 16;
 622         rotate = DAG.getConstant(rotamt, MVT::i16);
 623       } else {
 624         // Offset the rotate amount by the basePtr and the preferred slot
 625         // byte offset
 626         int64_t rotamt = -vtm->prefslot_byte;
 627         if (rotamt < 0)
 628           rotamt += 16;
 629         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 630                              basePtr,
 631                              DAG.getConstant(rotamt, PtrVT));
 632       }
 633     } else {
 634       // Unaligned load: must be more pessimistic about addressing modes:
 635       if (basePtr.getOpcode() == ISD::ADD) {
 636         MachineFunction &MF = DAG.getMachineFunction();
 637         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 638         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 639         SDValue Flag;
 640
 641         SDValue Op0 = basePtr.getOperand(0);
 642         SDValue Op1 = basePtr.getOperand(1);
 643
 644         if (isa<ConstantSDNode>(Op1)) {
 645           // Convert the (add <ptr>, <const>) to an indirect address contained
 646           // in a register. Note that this is done because we need to avoid
 647           // creating a 0(reg) d-form address due to the SPU's block loads.
 648           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 649           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 650           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 651         } else {
 652           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 653           // will likely be lowered as a reg(reg) x-form address.
 654           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 655         }
 656       } else {
 657         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 658                               basePtr,
 659                               DAG.getConstant(0, PtrVT));
 660       }
 661
 662       // Offset the rotate amount by the basePtr and the preferred slot
 663       // byte offset
 664       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 665                            basePtr,
 666                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 667     }
 668
 669     // Re-emit as a v16i8 vector load
 670     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 671                          LN->getSrcValue(), LN->getSrcValueOffset(),
 672                          LN->isVolatile(), 16);
 673
 674     // Update the chain
 675     the_chain = result.getValue(1);
 676
 677     // Rotate into the preferred slot:
 678     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 679                          result.getValue(0), rotate);
 680
 681     // Convert the loaded v16i8 vector to the appropriate vector type
 682     // specified by the operand:
 683     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 684                                  InVT, (128 / InVT.getSizeInBits()));
 685     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 686                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 687
 688     // Handle extending loads by extending the scalar result:
 689     if (ExtType == ISD::SEXTLOAD) {
 690       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 691     } else if (ExtType == ISD::ZEXTLOAD) {
 692       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 693     } else if (ExtType == ISD::EXTLOAD) {
 694       unsigned NewOpc = ISD::ANY_EXTEND;
 695
 696       if (OutVT.isFloatingPoint())
 697         NewOpc = ISD::FP_EXTEND;
 698
 699       result = DAG.getNode(NewOpc, dl, OutVT, result);
 700     }
 701
 702     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 703     SDValue retops[2] = {
 704       result,
 705       the_chain
 706     };
 707
 708     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 709                          retops, sizeof(retops) / sizeof(retops[0]));
 710     return result;
 711   }
 712   case ISD::PRE_INC:
 713   case ISD::PRE_DEC:
 714   case ISD::POST_INC:
 715   case ISD::POST_DEC:
 716   case ISD::LAST_INDEXED_MODE:
 717     {
 718       std::string msg;
 719       raw_string_ostream Msg(msg);
 720       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 721             "UNINDEXED\n";
 722       Msg << (unsigned) LN->getAddressingMode();
 723       llvm_report_error(Msg.str());
 724       /*NOTREACHED*/
 725     }
 726   }
 727
 728   return SDValue();
 729 }
 730
 731 /// Custom lower stores for CellSPU
 732 /*!
 733  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 734  within a 16-byte block, we have to generate a shuffle to insert the
 735  requested element into its place, then store the resulting block.
 736  */
 737 static SDValue
 738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 739   StoreSDNode *SN = cast<StoreSDNode>(Op);
 740   SDValue Value = SN->getValue();
 741   EVT VT = Value.getValueType();
 742   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 743   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 744   DebugLoc dl = Op.getDebugLoc();
 745   unsigned alignment = SN->getAlignment();
 746
 747   switch (SN->getAddressingMode()) {
 748   case ISD::UNINDEXED: {
 749     // The vector type we really want to load from the 16-byte chunk.
 750     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 751                                  VT, (128 / VT.getSizeInBits())),
 752         stVecVT = EVT::getVectorVT(*DAG.getContext(),
 753                                    StVT, (128 / StVT.getSizeInBits()));
 754
 755     SDValue alignLoadVec;
 756     SDValue basePtr = SN->getBasePtr();
 757     SDValue the_chain = SN->getChain();
 758     SDValue insertEltOffs;
 759
 760     if (alignment == 16) {
 761       ConstantSDNode *CN;
 762
 763       // Special cases for a known aligned load to simplify the base pointer
 764       // and insertion byte:
 765       if (basePtr.getOpcode() == ISD::ADD
 766           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 767         // Known offset into basePtr
 768         int64_t offset = CN->getSExtValue();
 769
 770         // Simplify the base pointer for this case:
 771         basePtr = basePtr.getOperand(0);
 772         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 773                                     basePtr,
 774                                     DAG.getConstant((offset & 0xf), PtrVT));
 775
 776         if ((offset & ~0xf) > 0) {
 777           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 778                                 basePtr,
 779                                 DAG.getConstant((offset & ~0xf), PtrVT));
 780         }
 781       } else {
 782         // Otherwise, assume it's at byte 0 of basePtr
 783         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 784                                     basePtr,
 785                                     DAG.getConstant(0, PtrVT));
 786       }
 787     } else {
 788       // Unaligned load: must be more pessimistic about addressing modes:
 789       if (basePtr.getOpcode() == ISD::ADD) {
 790         MachineFunction &MF = DAG.getMachineFunction();
 791         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 792         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 793         SDValue Flag;
 794
 795         SDValue Op0 = basePtr.getOperand(0);
 796         SDValue Op1 = basePtr.getOperand(1);
 797
 798         if (isa<ConstantSDNode>(Op1)) {
 799           // Convert the (add <ptr>, <const>) to an indirect address contained
 800           // in a register. Note that this is done because we need to avoid
 801           // creating a 0(reg) d-form address due to the SPU's block loads.
 802           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 803           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 804           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 805         } else {
 806           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 807           // will likely be lowered as a reg(reg) x-form address.
 808           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 809         }
 810       } else {
 811         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 812                               basePtr,
 813                               DAG.getConstant(0, PtrVT));
 814       }
 815
 816       // Insertion point is solely determined by basePtr's contents
 817       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 818                                   basePtr,
 819                                   DAG.getConstant(0, PtrVT));
 820     }
 821
 822     // Re-emit as a v16i8 vector load
 823     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 824                                SN->getSrcValue(), SN->getSrcValueOffset(),
 825                                SN->isVolatile(), 16);
 826
 827     // Update the chain
 828     the_chain = alignLoadVec.getValue(1);
 829
 830     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 831     SDValue theValue = SN->getValue();
 832     SDValue result;
 833
 834     if (StVT != VT
 835         && (theValue.getOpcode() == ISD::AssertZext
 836             || theValue.getOpcode() == ISD::AssertSext)) {
 837       // Drill down and get the value for zero- and sign-extended
 838       // quantities
 839       theValue = theValue.getOperand(0);
 840     }
 841
 842     // If the base pointer is already a D-form address, then just create
 843     // a new D-form address with a slot offset and the orignal base pointer.
 844     // Otherwise generate a D-form address with the slot offset relative
 845     // to the stack pointer, which is always aligned.
 846 #if !defined(NDEBUG)
 847       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 848         errs() << "CellSPU LowerSTORE: basePtr = ";
 849         basePtr.getNode()->dump(&DAG);
 850         errs() << "\n";
 851       }
 852 #endif
 853
 854     SDValue insertEltOp =
 855             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 856     SDValue vectorizeOp =
 857             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 858
 859     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 860                          vectorizeOp, alignLoadVec,
 861                          DAG.getNode(ISD::BIT_CONVERT, dl,
 862                                      MVT::v4i32, insertEltOp));
 863
 864     result = DAG.getStore(the_chain, dl, result, basePtr,
 865                           LN->getSrcValue(), LN->getSrcValueOffset(),
 866                           LN->isVolatile(), LN->getAlignment());
 867
 868 #if 0 && !defined(NDEBUG)
 869     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 870       const SDValue &currentRoot = DAG.getRoot();
 871
 872       DAG.setRoot(result);
 873       errs() << "------- CellSPU:LowerStore result:\n";
 874       DAG.dump();
 875       errs() << "-------\n";
 876       DAG.setRoot(currentRoot);
 877     }
 878 #endif
 879
 880     return result;
 881     /*UNREACHED*/
 882   }
 883   case ISD::PRE_INC:
 884   case ISD::PRE_DEC:
 885   case ISD::POST_INC:
 886   case ISD::POST_DEC:
 887   case ISD::LAST_INDEXED_MODE:
 888     {
 889       std::string msg;
 890       raw_string_ostream Msg(msg);
 891       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 892             "UNINDEXED\n";
 893       Msg << (unsigned) SN->getAddressingMode();
 894       llvm_report_error(Msg.str());
 895       /*NOTREACHED*/
 896     }
 897   }
 898
 899   return SDValue();
 900 }
 901
 902 //! Generate the address of a constant pool entry.
 903 static SDValue
 904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 905   EVT PtrVT = Op.getValueType();
 906   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 907   Constant *C = CP->getConstVal();
 908   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 909   SDValue Zero = DAG.getConstant(0, PtrVT);
 910   const TargetMachine &TM = DAG.getTarget();
 911   // FIXME there is no actual debug info here
 912   DebugLoc dl = Op.getDebugLoc();
 913
 914   if (TM.getRelocationModel() == Reloc::Static) {
 915     if (!ST->usingLargeMem()) {
 916       // Just return the SDValue with the constant pool address in it.
 917       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 918     } else {
 919       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 920       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 921       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 922     }
 923   }
 924
 925   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 926                    " not supported.");
 927   return SDValue();
 928 }
 929
 930 //! Alternate entry point for generating the address of a constant pool entry
 931 SDValue
 932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 933   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 934 }
 935
 936 static SDValue
 937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 938   EVT PtrVT = Op.getValueType();
 939   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 940   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 941   SDValue Zero = DAG.getConstant(0, PtrVT);
 942   const TargetMachine &TM = DAG.getTarget();
 943   // FIXME there is no actual debug info here
 944   DebugLoc dl = Op.getDebugLoc();
 945
 946   if (TM.getRelocationModel() == Reloc::Static) {
 947     if (!ST->usingLargeMem()) {
 948       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 949     } else {
 950       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 951       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 952       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 953     }
 954   }
 955
 956   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 957                    " not supported.");
 958   return SDValue();
 959 }
 960
 961 static SDValue
 962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 963   EVT PtrVT = Op.getValueType();
 964   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 965   GlobalValue *GV = GSDN->getGlobal();
 966   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 967   const TargetMachine &TM = DAG.getTarget();
 968   SDValue Zero = DAG.getConstant(0, PtrVT);
 969   // FIXME there is no actual debug info here
 970   DebugLoc dl = Op.getDebugLoc();
 971
 972   if (TM.getRelocationModel() == Reloc::Static) {
 973     if (!ST->usingLargeMem()) {
 974       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 975     } else {
 976       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 977       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 978       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 979     }
 980   } else {
 981     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 982                       "not supported.");
 983     /*NOTREACHED*/
 984   }
 985
 986   return SDValue();
 987 }
 988
 989 //! Custom lower double precision floating point constants
 990 static SDValue
 991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 992   EVT VT = Op.getValueType();
 993   // FIXME there is no actual debug info here
 994   DebugLoc dl = Op.getDebugLoc();
 995
 996   if (VT == MVT::f64) {
 997     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 998
 999     assert((FP != 0) &&
1000            "LowerConstantFP: Node is not ConstantFPSDNode");
1001
1002     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003     SDValue T = DAG.getConstant(dbits, MVT::i64);
1004     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1005     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1007   }
1008
1009   return SDValue();
1010 }
1011
1012 SDValue
1013 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1014                                         CallingConv::ID CallConv, bool isVarArg,
1015                                         const SmallVectorImpl<ISD::InputArg>
1016                                           &Ins,
1017                                         DebugLoc dl, SelectionDAG &DAG,
1018                                         SmallVectorImpl<SDValue> &InVals) {
1019
1020   MachineFunction &MF = DAG.getMachineFunction();
1021   MachineFrameInfo *MFI = MF.getFrameInfo();
1022   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1023
1024   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1025   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1026
1027   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1028   unsigned ArgRegIdx = 0;
1029   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1030
1031   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1032
1033   // Add DAG nodes to load the arguments or copy them out of registers.
1034   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1035     EVT ObjectVT = Ins[ArgNo].VT;
1036     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1037     SDValue ArgVal;
1038
1039     if (ArgRegIdx < NumArgRegs) {
1040       const TargetRegisterClass *ArgRegClass;
1041
1042       switch (ObjectVT.getSimpleVT().SimpleTy) {
1043       default: {
1044         std::string msg;
1045         raw_string_ostream Msg(msg);
1046         Msg << "LowerFormalArguments Unhandled argument type: "
1047              << ObjectVT.getEVTString();
1048         llvm_report_error(Msg.str());
1049       }
1050       case MVT::i8:
1051         ArgRegClass = &SPU::R8CRegClass;
1052         break;
1053       case MVT::i16:
1054         ArgRegClass = &SPU::R16CRegClass;
1055         break;
1056       case MVT::i32:
1057         ArgRegClass = &SPU::R32CRegClass;
1058         break;
1059       case MVT::i64:
1060         ArgRegClass = &SPU::R64CRegClass;
1061         break;
1062       case MVT::i128:
1063         ArgRegClass = &SPU::GPRCRegClass;
1064         break;
1065       case MVT::f32:
1066         ArgRegClass = &SPU::R32FPRegClass;
1067         break;
1068       case MVT::f64:
1069         ArgRegClass = &SPU::R64FPRegClass;
1070         break;
1071       case MVT::v2f64:
1072       case MVT::v4f32:
1073       case MVT::v2i64:
1074       case MVT::v4i32:
1075       case MVT::v8i16:
1076       case MVT::v16i8:
1077         ArgRegClass = &SPU::VECREGRegClass;
1078         break;
1079       }
1080
1081       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1084       ++ArgRegIdx;
1085     } else {
1086       // We need to load the argument to a virtual register if we determined
1087       // above that we ran out of physical registers of the appropriate type
1088       // or we're forced to do vararg
1089       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1090       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1092       ArgOffset += StackSlotSize;
1093     }
1094
1095     InVals.push_back(ArgVal);
1096     // Update the chain
1097     Chain = ArgVal.getOperand(0);
1098   }
1099
1100   // vararg handling:
1101   if (isVarArg) {
1102     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103     // We will spill (79-3)+1 registers to the stack
1104     SmallVector<SDValue, 79-3+1> MemOps;
1105
1106     // Create the frame slot
1107
1108     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1110                                                  true, false);
1111       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1112       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1113       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1114       Chain = Store.getOperand(0);
1115       MemOps.push_back(Store);
1116
1117       // Increment address by stack slot size for the next stored argument
1118       ArgOffset += StackSlotSize;
1119     }
1120     if (!MemOps.empty())
1121       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122                           &MemOps[0], MemOps.size());
1123   }
1124
1125   return Chain;
1126 }
1127
1128 /// isLSAAddress - Return the immediate to use if the specified
1129 /// value is representable as a LSA address.
1130 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1131   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1132   if (!C) return 0;
1133
1134   int Addr = C->getZExtValue();
1135   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1136       (Addr << 14 >> 14) != Addr)
1137     return 0;  // Top 14 bits have to be sext of immediate.
1138
1139   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1140 }
1141
1142 SDValue
1143 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1144                              CallingConv::ID CallConv, bool isVarArg,
1145                              bool isTailCall,
1146                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1147                              const SmallVectorImpl<ISD::InputArg> &Ins,
1148                              DebugLoc dl, SelectionDAG &DAG,
1149                              SmallVectorImpl<SDValue> &InVals) {
1150
1151   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1152   unsigned NumOps     = Outs.size();
1153   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1156
1157   // Handy pointer type
1158   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1159
1160   // Accumulate how many bytes are to be pushed on the stack, including the
1161   // linkage area, and parameter passing area.  According to the SPU ABI,
1162   // we minimally need space for [LR] and [SP]
1163   unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1164
1165   // Set up a copy of the stack pointer for use loading and storing any
1166   // arguments that may not fit in the registers available for argument
1167   // passing.
1168   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1169
1170   // Figure out which arguments are going to go in registers, and which in
1171   // memory.
1172   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1173   unsigned ArgRegIdx = 0;
1174
1175   // Keep track of registers passing arguments
1176   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1177   // And the arguments passed on the stack
1178   SmallVector<SDValue, 8> MemOpChains;
1179
1180   for (unsigned i = 0; i != NumOps; ++i) {
1181     SDValue Arg = Outs[i].Val;
1182
1183     // PtrOff will be used to store the current argument to the stack if a
1184     // register cannot be found for it.
1185     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1186     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1187
1188     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1189     default: llvm_unreachable("Unexpected ValueType for argument!");
1190     case MVT::i8:
1191     case MVT::i16:
1192     case MVT::i32:
1193     case MVT::i64:
1194     case MVT::i128:
1195       if (ArgRegIdx != NumArgRegs) {
1196         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1197       } else {
1198         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1199         ArgOffset += StackSlotSize;
1200       }
1201       break;
1202     case MVT::f32:
1203     case MVT::f64:
1204       if (ArgRegIdx != NumArgRegs) {
1205         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1206       } else {
1207         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1208         ArgOffset += StackSlotSize;
1209       }
1210       break;
1211     case MVT::v2i64:
1212     case MVT::v2f64:
1213     case MVT::v4f32:
1214     case MVT::v4i32:
1215     case MVT::v8i16:
1216     case MVT::v16i8:
1217       if (ArgRegIdx != NumArgRegs) {
1218         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1219       } else {
1220         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1221         ArgOffset += StackSlotSize;
1222       }
1223       break;
1224     }
1225   }
1226
1227   // Update number of stack bytes actually used, insert a call sequence start
1228   NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1229   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1230                                                             true));
1231
1232   if (!MemOpChains.empty()) {
1233     // Adjust the stack pointer for the stack arguments.
1234     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1235                         &MemOpChains[0], MemOpChains.size());
1236   }
1237
1238   // Build a sequence of copy-to-reg nodes chained together with token chain
1239   // and flag operands which copy the outgoing args into the appropriate regs.
1240   SDValue InFlag;
1241   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1242     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1243                              RegsToPass[i].second, InFlag);
1244     InFlag = Chain.getValue(1);
1245   }
1246
1247   SmallVector<SDValue, 8> Ops;
1248   unsigned CallOpc = SPUISD::CALL;
1249
1250   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1251   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1252   // node so that legalize doesn't hack it.
1253   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1254     GlobalValue *GV = G->getGlobal();
1255     EVT CalleeVT = Callee.getValueType();
1256     SDValue Zero = DAG.getConstant(0, PtrVT);
1257     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1258
1259     if (!ST->usingLargeMem()) {
1260       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1261       // style calls, otherwise, external symbols are BRASL calls. This assumes
1262       // that declared/defined symbols are in the same compilation unit and can
1263       // be reached through PC-relative jumps.
1264       //
1265       // NOTE:
1266       // This may be an unsafe assumption for JIT and really large compilation
1267       // units.
1268       if (GV->isDeclaration()) {
1269         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1270       } else {
1271         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1272       }
1273     } else {
1274       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1275       // address pairs:
1276       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1277     }
1278   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1279     EVT CalleeVT = Callee.getValueType();
1280     SDValue Zero = DAG.getConstant(0, PtrVT);
1281     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1282         Callee.getValueType());
1283
1284     if (!ST->usingLargeMem()) {
1285       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1286     } else {
1287       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1288     }
1289   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1290     // If this is an absolute destination address that appears to be a legal
1291     // local store address, use the munged value.
1292     Callee = SDValue(Dest, 0);
1293   }
1294
1295   Ops.push_back(Chain);
1296   Ops.push_back(Callee);
1297
1298   // Add argument registers to the end of the list so that they are known live
1299   // into the call.
1300   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1301     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1302                                   RegsToPass[i].second.getValueType()));
1303
1304   if (InFlag.getNode())
1305     Ops.push_back(InFlag);
1306   // Returns a chain and a flag for retval copy to use.
1307   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1308                       &Ops[0], Ops.size());
1309   InFlag = Chain.getValue(1);
1310
1311   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1312                              DAG.getIntPtrConstant(0, true), InFlag);
1313   if (!Ins.empty())
1314     InFlag = Chain.getValue(1);
1315
1316   // If the function returns void, just return the chain.
1317   if (Ins.empty())
1318     return Chain;
1319
1320   // If the call has results, copy the values out of the ret val registers.
1321   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1322   default: llvm_unreachable("Unexpected ret value!");
1323   case MVT::Other: break;
1324   case MVT::i32:
1325     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1326       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1327                                  MVT::i32, InFlag).getValue(1);
1328       InVals.push_back(Chain.getValue(0));
1329       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1330                                  Chain.getValue(2)).getValue(1);
1331       InVals.push_back(Chain.getValue(0));
1332     } else {
1333       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1334                                  InFlag).getValue(1);
1335       InVals.push_back(Chain.getValue(0));
1336     }
1337     break;
1338   case MVT::i64:
1339     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1340                                InFlag).getValue(1);
1341     InVals.push_back(Chain.getValue(0));
1342     break;
1343   case MVT::i128:
1344     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1345                                InFlag).getValue(1);
1346     InVals.push_back(Chain.getValue(0));
1347     break;
1348   case MVT::f32:
1349   case MVT::f64:
1350     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1351                                InFlag).getValue(1);
1352     InVals.push_back(Chain.getValue(0));
1353     break;
1354   case MVT::v2f64:
1355   case MVT::v2i64:
1356   case MVT::v4f32:
1357   case MVT::v4i32:
1358   case MVT::v8i16:
1359   case MVT::v16i8:
1360     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1361                                    InFlag).getValue(1);
1362     InVals.push_back(Chain.getValue(0));
1363     break;
1364   }
1365
1366   return Chain;
1367 }
1368
1369 SDValue
1370 SPUTargetLowering::LowerReturn(SDValue Chain,
1371                                CallingConv::ID CallConv, bool isVarArg,
1372                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1373                                DebugLoc dl, SelectionDAG &DAG) {
1374
1375   SmallVector<CCValAssign, 16> RVLocs;
1376   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1377                  RVLocs, *DAG.getContext());
1378   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1379
1380   // If this is the first return lowered for this function, add the regs to the
1381   // liveout set for the function.
1382   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1383     for (unsigned i = 0; i != RVLocs.size(); ++i)
1384       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1385   }
1386
1387   SDValue Flag;
1388
1389   // Copy the result values into the output registers.
1390   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1391     CCValAssign &VA = RVLocs[i];
1392     assert(VA.isRegLoc() && "Can only return in registers!");
1393     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1394                              Outs[i].Val, Flag);
1395     Flag = Chain.getValue(1);
1396   }
1397
1398   if (Flag.getNode())
1399     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1400   else
1401     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1402 }
1403
1404
1405 //===----------------------------------------------------------------------===//
1406 // Vector related lowering:
1407 //===----------------------------------------------------------------------===//
1408
1409 static ConstantSDNode *
1410 getVecImm(SDNode *N) {
1411   SDValue OpVal(0, 0);
1412
1413   // Check to see if this buildvec has a single non-undef value in its elements.
1414   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1415     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1416     if (OpVal.getNode() == 0)
1417       OpVal = N->getOperand(i);
1418     else if (OpVal != N->getOperand(i))
1419       return 0;
1420   }
1421
1422   if (OpVal.getNode() != 0) {
1423     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1424       return CN;
1425     }
1426   }
1427
1428   return 0;
1429 }
1430
1431 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1432 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1433 /// constant
1434 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1435                               EVT ValueType) {
1436   if (ConstantSDNode *CN = getVecImm(N)) {
1437     uint64_t Value = CN->getZExtValue();
1438     if (ValueType == MVT::i64) {
1439       uint64_t UValue = CN->getZExtValue();
1440       uint32_t upper = uint32_t(UValue >> 32);
1441       uint32_t lower = uint32_t(UValue);
1442       if (upper != lower)
1443         return SDValue();
1444       Value = Value >> 32;
1445     }
1446     if (Value <= 0x3ffff)
1447       return DAG.getTargetConstant(Value, ValueType);
1448   }
1449
1450   return SDValue();
1451 }
1452
1453 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1454 /// and the value fits into a signed 16-bit constant, and if so, return the
1455 /// constant
1456 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1457                               EVT ValueType) {
1458   if (ConstantSDNode *CN = getVecImm(N)) {
1459     int64_t Value = CN->getSExtValue();
1460     if (ValueType == MVT::i64) {
1461       uint64_t UValue = CN->getZExtValue();
1462       uint32_t upper = uint32_t(UValue >> 32);
1463       uint32_t lower = uint32_t(UValue);
1464       if (upper != lower)
1465         return SDValue();
1466       Value = Value >> 32;
1467     }
1468     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1469       return DAG.getTargetConstant(Value, ValueType);
1470     }
1471   }
1472
1473   return SDValue();
1474 }
1475
1476 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1477 /// and the value fits into a signed 10-bit constant, and if so, return the
1478 /// constant
1479 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1480                               EVT ValueType) {
1481   if (ConstantSDNode *CN = getVecImm(N)) {
1482     int64_t Value = CN->getSExtValue();
1483     if (ValueType == MVT::i64) {
1484       uint64_t UValue = CN->getZExtValue();
1485       uint32_t upper = uint32_t(UValue >> 32);
1486       uint32_t lower = uint32_t(UValue);
1487       if (upper != lower)
1488         return SDValue();
1489       Value = Value >> 32;
1490     }
1491     if (isS10Constant(Value))
1492       return DAG.getTargetConstant(Value, ValueType);
1493   }
1494
1495   return SDValue();
1496 }
1497
1498 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1499 /// and the value fits into a signed 8-bit constant, and if so, return the
1500 /// constant.
1501 ///
1502 /// @note: The incoming vector is v16i8 because that's the only way we can load
1503 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1504 /// same value.
1505 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1506                              EVT ValueType) {
1507   if (ConstantSDNode *CN = getVecImm(N)) {
1508     int Value = (int) CN->getZExtValue();
1509     if (ValueType == MVT::i16
1510         && Value <= 0xffff                 /* truncated from uint64_t */
1511         && ((short) Value >> 8) == ((short) Value & 0xff))
1512       return DAG.getTargetConstant(Value & 0xff, ValueType);
1513     else if (ValueType == MVT::i8
1514              && (Value & 0xff) == Value)
1515       return DAG.getTargetConstant(Value, ValueType);
1516   }
1517
1518   return SDValue();
1519 }
1520
1521 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1522 /// and the value fits into a signed 16-bit constant, and if so, return the
1523 /// constant
1524 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1525                                EVT ValueType) {
1526   if (ConstantSDNode *CN = getVecImm(N)) {
1527     uint64_t Value = CN->getZExtValue();
1528     if ((ValueType == MVT::i32
1529           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1530         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1531       return DAG.getTargetConstant(Value >> 16, ValueType);
1532   }
1533
1534   return SDValue();
1535 }
1536
1537 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1538 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1539   if (ConstantSDNode *CN = getVecImm(N)) {
1540     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1541   }
1542
1543   return SDValue();
1544 }
1545
1546 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1547 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1548   if (ConstantSDNode *CN = getVecImm(N)) {
1549     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1550   }
1551
1552   return SDValue();
1553 }
1554
1555 //! Lower a BUILD_VECTOR instruction creatively:
1556 static SDValue
1557 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1558   EVT VT = Op.getValueType();
1559   EVT EltVT = VT.getVectorElementType();
1560   DebugLoc dl = Op.getDebugLoc();
1561   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1562   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1563   unsigned minSplatBits = EltVT.getSizeInBits();
1564
1565   if (minSplatBits < 16)
1566     minSplatBits = 16;
1567
1568   APInt APSplatBits, APSplatUndef;
1569   unsigned SplatBitSize;
1570   bool HasAnyUndefs;
1571
1572   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1573                             HasAnyUndefs, minSplatBits)
1574       || minSplatBits < SplatBitSize)
1575     return SDValue();   // Wasn't a constant vector or splat exceeded min
1576
1577   uint64_t SplatBits = APSplatBits.getZExtValue();
1578
1579   switch (VT.getSimpleVT().SimpleTy) {
1580   default: {
1581     std::string msg;
1582     raw_string_ostream Msg(msg);
1583     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1584          << VT.getEVTString();
1585     llvm_report_error(Msg.str());
1586     /*NOTREACHED*/
1587   }
1588   case MVT::v4f32: {
1589     uint32_t Value32 = uint32_t(SplatBits);
1590     assert(SplatBitSize == 32
1591            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1592     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593     SDValue T = DAG.getConstant(Value32, MVT::i32);
1594     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1595                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1596     break;
1597   }
1598   case MVT::v2f64: {
1599     uint64_t f64val = uint64_t(SplatBits);
1600     assert(SplatBitSize == 64
1601            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1602     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603     SDValue T = DAG.getConstant(f64val, MVT::i64);
1604     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1605                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1606     break;
1607   }
1608   case MVT::v16i8: {
1609    // 8-bit constants have to be expanded to 16-bits
1610    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1611    SmallVector<SDValue, 8> Ops;
1612
1613    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1614    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1615                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1616   }
1617   case MVT::v8i16: {
1618     unsigned short Value16 = SplatBits;
1619     SDValue T = DAG.getConstant(Value16, EltVT);
1620     SmallVector<SDValue, 8> Ops;
1621
1622     Ops.assign(8, T);
1623     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1624   }
1625   case MVT::v4i32: {
1626     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1627     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1628   }
1629   case MVT::v2i32: {
1630     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1631     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1632   }
1633   case MVT::v2i64: {
1634     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1635   }
1636   }
1637
1638   return SDValue();
1639 }
1640
1641 /*!
1642  */
1643 SDValue
1644 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1645                      DebugLoc dl) {
1646   uint32_t upper = uint32_t(SplatVal >> 32);
1647   uint32_t lower = uint32_t(SplatVal);
1648
1649   if (upper == lower) {
1650     // Magic constant that can be matched by IL, ILA, et. al.
1651     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1652     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1653                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1654                                    Val, Val, Val, Val));
1655   } else {
1656     bool upper_special, lower_special;
1657
1658     // NOTE: This code creates common-case shuffle masks that can be easily
1659     // detected as common expressions. It is not attempting to create highly
1660     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1661
1662     // Detect if the upper or lower half is a special shuffle mask pattern:
1663     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1664     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1665
1666     // Both upper and lower are special, lower to a constant pool load:
1667     if (lower_special && upper_special) {
1668       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1669       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1670                          SplatValCN, SplatValCN);
1671     }
1672
1673     SDValue LO32;
1674     SDValue HI32;
1675     SmallVector<SDValue, 16> ShufBytes;
1676     SDValue Result;
1677
1678     // Create lower vector if not a special pattern
1679     if (!lower_special) {
1680       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1681       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1682                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1683                                      LO32C, LO32C, LO32C, LO32C));
1684     }
1685
1686     // Create upper vector if not a special pattern
1687     if (!upper_special) {
1688       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1689       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1690                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1691                                      HI32C, HI32C, HI32C, HI32C));
1692     }
1693
1694     // If either upper or lower are special, then the two input operands are
1695     // the same (basically, one of them is a "don't care")
1696     if (lower_special)
1697       LO32 = HI32;
1698     if (upper_special)
1699       HI32 = LO32;
1700
1701     for (int i = 0; i < 4; ++i) {
1702       uint64_t val = 0;
1703       for (int j = 0; j < 4; ++j) {
1704         SDValue V;
1705         bool process_upper, process_lower;
1706         val <<= 8;
1707         process_upper = (upper_special && (i & 1) == 0);
1708         process_lower = (lower_special && (i & 1) == 1);
1709
1710         if (process_upper || process_lower) {
1711           if ((process_upper && upper == 0)
1712                   || (process_lower && lower == 0))
1713             val |= 0x80;
1714           else if ((process_upper && upper == 0xffffffff)
1715                   || (process_lower && lower == 0xffffffff))
1716             val |= 0xc0;
1717           else if ((process_upper && upper == 0x80000000)
1718                   || (process_lower && lower == 0x80000000))
1719             val |= (j == 0 ? 0xe0 : 0x80);
1720         } else
1721           val |= i * 4 + j + ((i & 1) * 16);
1722       }
1723
1724       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1725     }
1726
1727     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1728                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1729                                    &ShufBytes[0], ShufBytes.size()));
1730   }
1731 }
1732
1733 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1734 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1735 /// permutation vector, V3, is monotonically increasing with one "exception"
1736 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1737 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1738 /// In either case, the net result is going to eventually invoke SHUFB to
1739 /// permute/shuffle the bytes from V1 and V2.
1740 /// \note
1741 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1742 /// control word for byte/halfword/word insertion. This takes care of a single
1743 /// element move from V2 into V1.
1744 /// \note
1745 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1746 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1747   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1748   SDValue V1 = Op.getOperand(0);
1749   SDValue V2 = Op.getOperand(1);
1750   DebugLoc dl = Op.getDebugLoc();
1751
1752   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1753
1754   // If we have a single element being moved from V1 to V2, this can be handled
1755   // using the C*[DX] compute mask instructions, but the vector elements have
1756   // to be monotonically increasing with one exception element.
1757   EVT VecVT = V1.getValueType();
1758   EVT EltVT = VecVT.getVectorElementType();
1759   unsigned EltsFromV2 = 0;
1760   unsigned V2Elt = 0;
1761   unsigned V2EltIdx0 = 0;
1762   unsigned CurrElt = 0;
1763   unsigned MaxElts = VecVT.getVectorNumElements();
1764   unsigned PrevElt = 0;
1765   unsigned V0Elt = 0;
1766   bool monotonic = true;
1767   bool rotate = true;
1768
1769   if (EltVT == MVT::i8) {
1770     V2EltIdx0 = 16;
1771   } else if (EltVT == MVT::i16) {
1772     V2EltIdx0 = 8;
1773   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1774     V2EltIdx0 = 4;
1775   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1776     V2EltIdx0 = 2;
1777   } else
1778     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1779
1780   for (unsigned i = 0; i != MaxElts; ++i) {
1781     if (SVN->getMaskElt(i) < 0)
1782       continue;
1783
1784     unsigned SrcElt = SVN->getMaskElt(i);
1785
1786     if (monotonic) {
1787       if (SrcElt >= V2EltIdx0) {
1788         if (1 >= (++EltsFromV2)) {
1789           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790         }
1791       } else if (CurrElt != SrcElt) {
1792         monotonic = false;
1793       }
1794
1795       ++CurrElt;
1796     }
1797
1798     if (rotate) {
1799       if (PrevElt > 0 && SrcElt < MaxElts) {
1800         if ((PrevElt == SrcElt - 1)
1801             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1802           PrevElt = SrcElt;
1803           if (SrcElt == 0)
1804             V0Elt = i;
1805         } else {
1806           rotate = false;
1807         }
1808       } else if (PrevElt == 0) {
1809         // First time through, need to keep track of previous element
1810         PrevElt = SrcElt;
1811       } else {
1812         // This isn't a rotation, takes elements from vector 2
1813         rotate = false;
1814       }
1815     }
1816   }
1817
1818   if (EltsFromV2 == 1 && monotonic) {
1819     // Compute mask and shuffle
1820     MachineFunction &MF = DAG.getMachineFunction();
1821     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1822     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1823     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1824     // Initialize temporary register to 0
1825     SDValue InitTempReg =
1826       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1827     // Copy register's contents as index in SHUFFLE_MASK:
1828     SDValue ShufMaskOp =
1829       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1830                   DAG.getTargetConstant(V2Elt, MVT::i32),
1831                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1832     // Use shuffle mask in SHUFB synthetic instruction:
1833     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1834                        ShufMaskOp);
1835   } else if (rotate) {
1836     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1837
1838     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1839                        V1, DAG.getConstant(rotamt, MVT::i16));
1840   } else {
1841    // Convert the SHUFFLE_VECTOR mask's input element units to the
1842    // actual bytes.
1843     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1844
1845     SmallVector<SDValue, 16> ResultMask;
1846     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1847       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1848
1849       for (unsigned j = 0; j < BytesPerElement; ++j)
1850         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1851     }
1852
1853     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1854                                     &ResultMask[0], ResultMask.size());
1855     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1856   }
1857 }
1858
1859 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1860   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1861   DebugLoc dl = Op.getDebugLoc();
1862
1863   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1864     // For a constant, build the appropriate constant vector, which will
1865     // eventually simplify to a vector register load.
1866
1867     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1868     SmallVector<SDValue, 16> ConstVecValues;
1869     EVT VT;
1870     size_t n_copies;
1871
1872     // Create a constant vector:
1873     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1874     default: llvm_unreachable("Unexpected constant value type in "
1875                               "LowerSCALAR_TO_VECTOR");
1876     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1877     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1878     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1879     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1880     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1881     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1882     }
1883
1884     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1885     for (size_t j = 0; j < n_copies; ++j)
1886       ConstVecValues.push_back(CValue);
1887
1888     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1889                        &ConstVecValues[0], ConstVecValues.size());
1890   } else {
1891     // Otherwise, copy the value from one register to another:
1892     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1893     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1894     case MVT::i8:
1895     case MVT::i16:
1896     case MVT::i32:
1897     case MVT::i64:
1898     case MVT::f32:
1899     case MVT::f64:
1900       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1901     }
1902   }
1903
1904   return SDValue();
1905 }
1906
1907 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1908   EVT VT = Op.getValueType();
1909   SDValue N = Op.getOperand(0);
1910   SDValue Elt = Op.getOperand(1);
1911   DebugLoc dl = Op.getDebugLoc();
1912   SDValue retval;
1913
1914   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1915     // Constant argument:
1916     int EltNo = (int) C->getZExtValue();
1917
1918     // sanity checks:
1919     if (VT == MVT::i8 && EltNo >= 16)
1920       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1921     else if (VT == MVT::i16 && EltNo >= 8)
1922       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1923     else if (VT == MVT::i32 && EltNo >= 4)
1924       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1925     else if (VT == MVT::i64 && EltNo >= 2)
1926       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1927
1928     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1929       // i32 and i64: Element 0 is the preferred slot
1930       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1931     }
1932
1933     // Need to generate shuffle mask and extract:
1934     int prefslot_begin = -1, prefslot_end = -1;
1935     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1936
1937     switch (VT.getSimpleVT().SimpleTy) {
1938     default:
1939       assert(false && "Invalid value type!");
1940     case MVT::i8: {
1941       prefslot_begin = prefslot_end = 3;
1942       break;
1943     }
1944     case MVT::i16: {
1945       prefslot_begin = 2; prefslot_end = 3;
1946       break;
1947     }
1948     case MVT::i32:
1949     case MVT::f32: {
1950       prefslot_begin = 0; prefslot_end = 3;
1951       break;
1952     }
1953     case MVT::i64:
1954     case MVT::f64: {
1955       prefslot_begin = 0; prefslot_end = 7;
1956       break;
1957     }
1958     }
1959
1960     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1961            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1962
1963     unsigned int ShufBytes[16] = {
1964       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1965     };
1966     for (int i = 0; i < 16; ++i) {
1967       // zero fill uppper part of preferred slot, don't care about the
1968       // other slots:
1969       unsigned int mask_val;
1970       if (i <= prefslot_end) {
1971         mask_val =
1972           ((i < prefslot_begin)
1973            ? 0x80
1974            : elt_byte + (i - prefslot_begin));
1975
1976         ShufBytes[i] = mask_val;
1977       } else
1978         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1979     }
1980
1981     SDValue ShufMask[4];
1982     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1983       unsigned bidx = i * 4;
1984       unsigned int bits = ((ShufBytes[bidx] << 24) |
1985                            (ShufBytes[bidx+1] << 16) |
1986                            (ShufBytes[bidx+2] << 8) |
1987                            ShufBytes[bidx+3]);
1988       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1989     }
1990
1991     SDValue ShufMaskVec =
1992       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1993                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1994
1995     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1996                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1997                                      N, N, ShufMaskVec));
1998   } else {
1999     // Variable index: Rotate the requested element into slot 0, then replicate
2000     // slot 0 across the vector
2001     EVT VecVT = N.getValueType();
2002     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2003       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2004                         "vector type!");
2005     }
2006
2007     // Make life easier by making sure the index is zero-extended to i32
2008     if (Elt.getValueType() != MVT::i32)
2009       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2010
2011     // Scale the index to a bit/byte shift quantity
2012     APInt scaleFactor =
2013             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2014     unsigned scaleShift = scaleFactor.logBase2();
2015     SDValue vecShift;
2016
2017     if (scaleShift > 0) {
2018       // Scale the shift factor:
2019       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2020                         DAG.getConstant(scaleShift, MVT::i32));
2021     }
2022
2023     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2024
2025     // Replicate the bytes starting at byte 0 across the entire vector (for
2026     // consistency with the notion of a unified register set)
2027     SDValue replicate;
2028
2029     switch (VT.getSimpleVT().SimpleTy) {
2030     default:
2031       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2032                         "type");
2033       /*NOTREACHED*/
2034     case MVT::i8: {
2035       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2036       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2037                               factor, factor, factor, factor);
2038       break;
2039     }
2040     case MVT::i16: {
2041       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2042       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2043                               factor, factor, factor, factor);
2044       break;
2045     }
2046     case MVT::i32:
2047     case MVT::f32: {
2048       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2049       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2050                               factor, factor, factor, factor);
2051       break;
2052     }
2053     case MVT::i64:
2054     case MVT::f64: {
2055       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2056       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2057       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2058                               loFactor, hiFactor, loFactor, hiFactor);
2059       break;
2060     }
2061     }
2062
2063     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2064                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2065                                      vecShift, vecShift, replicate));
2066   }
2067
2068   return retval;
2069 }
2070
2071 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2072   SDValue VecOp = Op.getOperand(0);
2073   SDValue ValOp = Op.getOperand(1);
2074   SDValue IdxOp = Op.getOperand(2);
2075   DebugLoc dl = Op.getDebugLoc();
2076   EVT VT = Op.getValueType();
2077
2078   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2079   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2080
2081   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2082   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2083   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2084                                 DAG.getRegister(SPU::R1, PtrVT),
2085                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2086   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2087
2088   SDValue result =
2089     DAG.getNode(SPUISD::SHUFB, dl, VT,
2090                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2091                 VecOp,
2092                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2093
2094   return result;
2095 }
2096
2097 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2098                            const TargetLowering &TLI)
2099 {
2100   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2101   DebugLoc dl = Op.getDebugLoc();
2102   EVT ShiftVT = TLI.getShiftAmountTy();
2103
2104   assert(Op.getValueType() == MVT::i8);
2105   switch (Opc) {
2106   default:
2107     llvm_unreachable("Unhandled i8 math operator");
2108     /*NOTREACHED*/
2109     break;
2110   case ISD::ADD: {
2111     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2112     // the result:
2113     SDValue N1 = Op.getOperand(1);
2114     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2115     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2116     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2117                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2118
2119   }
2120
2121   case ISD::SUB: {
2122     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2123     // the result:
2124     SDValue N1 = Op.getOperand(1);
2125     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2126     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2127     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2128                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2129   }
2130   case ISD::ROTR:
2131   case ISD::ROTL: {
2132     SDValue N1 = Op.getOperand(1);
2133     EVT N1VT = N1.getValueType();
2134
2135     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2136     if (!N1VT.bitsEq(ShiftVT)) {
2137       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2138                        ? ISD::ZERO_EXTEND
2139                        : ISD::TRUNCATE;
2140       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2141     }
2142
2143     // Replicate lower 8-bits into upper 8:
2144     SDValue ExpandArg =
2145       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2146                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2147                               N0, DAG.getConstant(8, MVT::i32)));
2148
2149     // Truncate back down to i8
2150     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2151                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2152   }
2153   case ISD::SRL:
2154   case ISD::SHL: {
2155     SDValue N1 = Op.getOperand(1);
2156     EVT N1VT = N1.getValueType();
2157
2158     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2159     if (!N1VT.bitsEq(ShiftVT)) {
2160       unsigned N1Opc = ISD::ZERO_EXTEND;
2161
2162       if (N1.getValueType().bitsGT(ShiftVT))
2163         N1Opc = ISD::TRUNCATE;
2164
2165       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2166     }
2167
2168     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2169                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2170   }
2171   case ISD::SRA: {
2172     SDValue N1 = Op.getOperand(1);
2173     EVT N1VT = N1.getValueType();
2174
2175     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2176     if (!N1VT.bitsEq(ShiftVT)) {
2177       unsigned N1Opc = ISD::SIGN_EXTEND;
2178
2179       if (N1VT.bitsGT(ShiftVT))
2180         N1Opc = ISD::TRUNCATE;
2181       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2182     }
2183
2184     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2185                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2186   }
2187   case ISD::MUL: {
2188     SDValue N1 = Op.getOperand(1);
2189
2190     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2191     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2192     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2193                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2194     break;
2195   }
2196   }
2197
2198   return SDValue();
2199 }
2200
2201 //! Lower byte immediate operations for v16i8 vectors:
2202 static SDValue
2203 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2204   SDValue ConstVec;
2205   SDValue Arg;
2206   EVT VT = Op.getValueType();
2207   DebugLoc dl = Op.getDebugLoc();
2208
2209   ConstVec = Op.getOperand(0);
2210   Arg = Op.getOperand(1);
2211   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2212     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2213       ConstVec = ConstVec.getOperand(0);
2214     } else {
2215       ConstVec = Op.getOperand(1);
2216       Arg = Op.getOperand(0);
2217       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2218         ConstVec = ConstVec.getOperand(0);
2219       }
2220     }
2221   }
2222
2223   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2224     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2225     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2226
2227     APInt APSplatBits, APSplatUndef;
2228     unsigned SplatBitSize;
2229     bool HasAnyUndefs;
2230     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2231
2232     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2233                               HasAnyUndefs, minSplatBits)
2234         && minSplatBits <= SplatBitSize) {
2235       uint64_t SplatBits = APSplatBits.getZExtValue();
2236       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2237
2238       SmallVector<SDValue, 16> tcVec;
2239       tcVec.assign(16, tc);
2240       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2241                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2242     }
2243   }
2244
2245   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2246   // lowered.  Return the operation, rather than a null SDValue.
2247   return Op;
2248 }
2249
2250 //! Custom lowering for CTPOP (count population)
2251 /*!
2252   Custom lowering code that counts the number ones in the input
2253   operand. SPU has such an instruction, but it counts the number of
2254   ones per byte, which then have to be accumulated.
2255 */
2256 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2257   EVT VT = Op.getValueType();
2258   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2259                                VT, (128 / VT.getSizeInBits()));
2260   DebugLoc dl = Op.getDebugLoc();
2261
2262   switch (VT.getSimpleVT().SimpleTy) {
2263   default:
2264     assert(false && "Invalid value type!");
2265   case MVT::i8: {
2266     SDValue N = Op.getOperand(0);
2267     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2268
2269     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2270     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2271
2272     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2273   }
2274
2275   case MVT::i16: {
2276     MachineFunction &MF = DAG.getMachineFunction();
2277     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2278
2279     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2280
2281     SDValue N = Op.getOperand(0);
2282     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2283     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2284     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2285
2286     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2287     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2288
2289     // CNTB_result becomes the chain to which all of the virtual registers
2290     // CNTB_reg, SUM1_reg become associated:
2291     SDValue CNTB_result =
2292       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2293
2294     SDValue CNTB_rescopy =
2295       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2296
2297     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2298
2299     return DAG.getNode(ISD::AND, dl, MVT::i16,
2300                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2301                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2302                                                Tmp1, Shift1),
2303                                    Tmp1),
2304                        Mask0);
2305   }
2306
2307   case MVT::i32: {
2308     MachineFunction &MF = DAG.getMachineFunction();
2309     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2310
2311     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2312     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2313
2314     SDValue N = Op.getOperand(0);
2315     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2316     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2317     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2318     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2319
2320     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2321     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2322
2323     // CNTB_result becomes the chain to which all of the virtual registers
2324     // CNTB_reg, SUM1_reg become associated:
2325     SDValue CNTB_result =
2326       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2327
2328     SDValue CNTB_rescopy =
2329       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2330
2331     SDValue Comp1 =
2332       DAG.getNode(ISD::SRL, dl, MVT::i32,
2333                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2334                   Shift1);
2335
2336     SDValue Sum1 =
2337       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2338                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2339
2340     SDValue Sum1_rescopy =
2341       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2342
2343     SDValue Comp2 =
2344       DAG.getNode(ISD::SRL, dl, MVT::i32,
2345                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2346                   Shift2);
2347     SDValue Sum2 =
2348       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2349                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2350
2351     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2352   }
2353
2354   case MVT::i64:
2355     break;
2356   }
2357
2358   return SDValue();
2359 }
2360
2361 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2362 /*!
2363  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2364  All conversions to i64 are expanded to a libcall.
2365  */
2366 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2367                               SPUTargetLowering &TLI) {
2368   EVT OpVT = Op.getValueType();
2369   SDValue Op0 = Op.getOperand(0);
2370   EVT Op0VT = Op0.getValueType();
2371
2372   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2373       || OpVT == MVT::i64) {
2374     // Convert f32 / f64 to i32 / i64 via libcall.
2375     RTLIB::Libcall LC =
2376             (Op.getOpcode() == ISD::FP_TO_SINT)
2377              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2378              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2379     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2380     SDValue Dummy;
2381     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2382   }
2383
2384   return Op;
2385 }
2386
2387 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2388 /*!
2389  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2390  All conversions from i64 are expanded to a libcall.
2391  */
2392 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2393                               SPUTargetLowering &TLI) {
2394   EVT OpVT = Op.getValueType();
2395   SDValue Op0 = Op.getOperand(0);
2396   EVT Op0VT = Op0.getValueType();
2397
2398   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2399       || Op0VT == MVT::i64) {
2400     // Convert i32, i64 to f64 via libcall:
2401     RTLIB::Libcall LC =
2402             (Op.getOpcode() == ISD::SINT_TO_FP)
2403              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2404              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2405     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2406     SDValue Dummy;
2407     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2408   }
2409
2410   return Op;
2411 }
2412
2413 //! Lower ISD::SETCC
2414 /*!
2415  This handles MVT::f64 (double floating point) condition lowering
2416  */
2417 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2418                           const TargetLowering &TLI) {
2419   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2420   DebugLoc dl = Op.getDebugLoc();
2421   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2422
2423   SDValue lhs = Op.getOperand(0);
2424   SDValue rhs = Op.getOperand(1);
2425   EVT lhsVT = lhs.getValueType();
2426   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2427
2428   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2429   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2430   EVT IntVT(MVT::i64);
2431
2432   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2433   // selected to a NOP:
2434   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2435   SDValue lhsHi32 =
2436           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2437                       DAG.getNode(ISD::SRL, dl, IntVT,
2438                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2439   SDValue lhsHi32abs =
2440           DAG.getNode(ISD::AND, dl, MVT::i32,
2441                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2442   SDValue lhsLo32 =
2443           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2444
2445   // SETO and SETUO only use the lhs operand:
2446   if (CC->get() == ISD::SETO) {
2447     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2448     // SETUO
2449     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2450     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2451                        DAG.getSetCC(dl, ccResultVT,
2452                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2453                                     ISD::SETUO),
2454                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2455   } else if (CC->get() == ISD::SETUO) {
2456     // Evaluates to true if Op0 is [SQ]NaN
2457     return DAG.getNode(ISD::AND, dl, ccResultVT,
2458                        DAG.getSetCC(dl, ccResultVT,
2459                                     lhsHi32abs,
2460                                     DAG.getConstant(0x7ff00000, MVT::i32),
2461                                     ISD::SETGE),
2462                        DAG.getSetCC(dl, ccResultVT,
2463                                     lhsLo32,
2464                                     DAG.getConstant(0, MVT::i32),
2465                                     ISD::SETGT));
2466   }
2467
2468   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2469   SDValue rhsHi32 =
2470           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2471                       DAG.getNode(ISD::SRL, dl, IntVT,
2472                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2473
2474   // If a value is negative, subtract from the sign magnitude constant:
2475   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2476
2477   // Convert the sign-magnitude representation into 2's complement:
2478   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2479                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2480   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2481   SDValue lhsSelect =
2482           DAG.getNode(ISD::SELECT, dl, IntVT,
2483                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2484
2485   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2486                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2487   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2488   SDValue rhsSelect =
2489           DAG.getNode(ISD::SELECT, dl, IntVT,
2490                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2491
2492   unsigned compareOp;
2493
2494   switch (CC->get()) {
2495   case ISD::SETOEQ:
2496   case ISD::SETUEQ:
2497     compareOp = ISD::SETEQ; break;
2498   case ISD::SETOGT:
2499   case ISD::SETUGT:
2500     compareOp = ISD::SETGT; break;
2501   case ISD::SETOGE:
2502   case ISD::SETUGE:
2503     compareOp = ISD::SETGE; break;
2504   case ISD::SETOLT:
2505   case ISD::SETULT:
2506     compareOp = ISD::SETLT; break;
2507   case ISD::SETOLE:
2508   case ISD::SETULE:
2509     compareOp = ISD::SETLE; break;
2510   case ISD::SETUNE:
2511   case ISD::SETONE:
2512     compareOp = ISD::SETNE; break;
2513   default:
2514     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2515   }
2516
2517   SDValue result =
2518           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2519                        (ISD::CondCode) compareOp);
2520
2521   if ((CC->get() & 0x8) == 0) {
2522     // Ordered comparison:
2523     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2524                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2525                                   ISD::SETO);
2526     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2527                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2528                                   ISD::SETO);
2529     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2530
2531     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2532   }
2533
2534   return result;
2535 }
2536
2537 //! Lower ISD::SELECT_CC
2538 /*!
2539   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2540   SELB instruction.
2541
2542   \note Need to revisit this in the future: if the code path through the true
2543   and false value computations is longer than the latency of a branch (6
2544   cycles), then it would be more advantageous to branch and insert a new basic
2545   block and branch on the condition. However, this code does not make that
2546   assumption, given the simplisitc uses so far.
2547  */
2548
2549 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2550                               const TargetLowering &TLI) {
2551   EVT VT = Op.getValueType();
2552   SDValue lhs = Op.getOperand(0);
2553   SDValue rhs = Op.getOperand(1);
2554   SDValue trueval = Op.getOperand(2);
2555   SDValue falseval = Op.getOperand(3);
2556   SDValue condition = Op.getOperand(4);
2557   DebugLoc dl = Op.getDebugLoc();
2558
2559   // NOTE: SELB's arguments: $rA, $rB, $mask
2560   //
2561   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2562   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2563   // condition was true and 0s where the condition was false. Hence, the
2564   // arguments to SELB get reversed.
2565
2566   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2567   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2568   // with another "cannot select select_cc" assert:
2569
2570   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2571                                 TLI.getSetCCResultType(Op.getValueType()),
2572                                 lhs, rhs, condition);
2573   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2574 }
2575
2576 //! Custom lower ISD::TRUNCATE
2577 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2578 {
2579   // Type to truncate to
2580   EVT VT = Op.getValueType();
2581   MVT simpleVT = VT.getSimpleVT();
2582   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2583                                VT, (128 / VT.getSizeInBits()));
2584   DebugLoc dl = Op.getDebugLoc();
2585
2586   // Type to truncate from
2587   SDValue Op0 = Op.getOperand(0);
2588   EVT Op0VT = Op0.getValueType();
2589
2590   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2591     // Create shuffle mask, least significant doubleword of quadword
2592     unsigned maskHigh = 0x08090a0b;
2593     unsigned maskLow = 0x0c0d0e0f;
2594     // Use a shuffle to perform the truncation
2595     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2596                                    DAG.getConstant(maskHigh, MVT::i32),
2597                                    DAG.getConstant(maskLow, MVT::i32),
2598                                    DAG.getConstant(maskHigh, MVT::i32),
2599                                    DAG.getConstant(maskLow, MVT::i32));
2600
2601     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2602                                        Op0, Op0, shufMask);
2603
2604     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2605   }
2606
2607   return SDValue();             // Leave the truncate unmolested
2608 }
2609
2610 /*!
2611  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2612  * algorithm is to duplicate the sign bit using rotmai to generate at
2613  * least one byte full of sign bits. Then propagate the "sign-byte" into
2614  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2615  *
2616  * @param Op The sext operand
2617  * @param DAG The current DAG
2618  * @return The SDValue with the entire instruction sequence
2619  */
2620 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2621 {
2622   DebugLoc dl = Op.getDebugLoc();
2623
2624   // Type to extend to
2625   MVT OpVT = Op.getValueType().getSimpleVT();
2626   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2627                                OpVT, (128 / OpVT.getSizeInBits()));
2628
2629   // Type to extend from
2630   SDValue Op0 = Op.getOperand(0);
2631   MVT Op0VT = Op0.getValueType().getSimpleVT();
2632
2633   // The type to extend to needs to be a i128 and
2634   // the type to extend from needs to be i64 or i32.
2635   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2636           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2637
2638   // Create shuffle mask
2639   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2640   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2641   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2642   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2643                                  DAG.getConstant(mask1, MVT::i32),
2644                                  DAG.getConstant(mask1, MVT::i32),
2645                                  DAG.getConstant(mask2, MVT::i32),
2646                                  DAG.getConstant(mask3, MVT::i32));
2647
2648   // Word wise arithmetic right shift to generate at least one byte
2649   // that contains sign bits.
2650   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2651   SDValue sraVal = DAG.getNode(ISD::SRA,
2652                  dl,
2653                  mvt,
2654                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2655                  DAG.getConstant(31, MVT::i32));
2656
2657   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2658   // and the input value into the lower 64 bits.
2659   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2660       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2661
2662   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2663 }
2664
2665 //! Custom (target-specific) lowering entry point
2666 /*!
2667   This is where LLVM's DAG selection process calls to do target-specific
2668   lowering of nodes.
2669  */
2670 SDValue
2671 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2672 {
2673   unsigned Opc = (unsigned) Op.getOpcode();
2674   EVT VT = Op.getValueType();
2675
2676   switch (Opc) {
2677   default: {
2678 #ifndef NDEBUG
2679     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2680     errs() << "Op.getOpcode() = " << Opc << "\n";
2681     errs() << "*Op.getNode():\n";
2682     Op.getNode()->dump();
2683 #endif
2684     llvm_unreachable(0);
2685   }
2686   case ISD::LOAD:
2687   case ISD::EXTLOAD:
2688   case ISD::SEXTLOAD:
2689   case ISD::ZEXTLOAD:
2690     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2691   case ISD::STORE:
2692     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2693   case ISD::ConstantPool:
2694     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2695   case ISD::GlobalAddress:
2696     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2697   case ISD::JumpTable:
2698     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2699   case ISD::ConstantFP:
2700     return LowerConstantFP(Op, DAG);
2701
2702   // i8, i64 math ops:
2703   case ISD::ADD:
2704   case ISD::SUB:
2705   case ISD::ROTR:
2706   case ISD::ROTL:
2707   case ISD::SRL:
2708   case ISD::SHL:
2709   case ISD::SRA: {
2710     if (VT == MVT::i8)
2711       return LowerI8Math(Op, DAG, Opc, *this);
2712     break;
2713   }
2714
2715   case ISD::FP_TO_SINT:
2716   case ISD::FP_TO_UINT:
2717     return LowerFP_TO_INT(Op, DAG, *this);
2718
2719   case ISD::SINT_TO_FP:
2720   case ISD::UINT_TO_FP:
2721     return LowerINT_TO_FP(Op, DAG, *this);
2722
2723   // Vector-related lowering.
2724   case ISD::BUILD_VECTOR:
2725     return LowerBUILD_VECTOR(Op, DAG);
2726   case ISD::SCALAR_TO_VECTOR:
2727     return LowerSCALAR_TO_VECTOR(Op, DAG);
2728   case ISD::VECTOR_SHUFFLE:
2729     return LowerVECTOR_SHUFFLE(Op, DAG);
2730   case ISD::EXTRACT_VECTOR_ELT:
2731     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2732   case ISD::INSERT_VECTOR_ELT:
2733     return LowerINSERT_VECTOR_ELT(Op, DAG);
2734
2735   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2736   case ISD::AND:
2737   case ISD::OR:
2738   case ISD::XOR:
2739     return LowerByteImmed(Op, DAG);
2740
2741   // Vector and i8 multiply:
2742   case ISD::MUL:
2743     if (VT == MVT::i8)
2744       return LowerI8Math(Op, DAG, Opc, *this);
2745
2746   case ISD::CTPOP:
2747     return LowerCTPOP(Op, DAG);
2748
2749   case ISD::SELECT_CC:
2750     return LowerSELECT_CC(Op, DAG, *this);
2751
2752   case ISD::SETCC:
2753     return LowerSETCC(Op, DAG, *this);
2754
2755   case ISD::TRUNCATE:
2756     return LowerTRUNCATE(Op, DAG);
2757
2758   case ISD::SIGN_EXTEND:
2759     return LowerSIGN_EXTEND(Op, DAG);
2760   }
2761
2762   return SDValue();
2763 }
2764
2765 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2766                                            SmallVectorImpl<SDValue>&Results,
2767                                            SelectionDAG &DAG)
2768 {
2769 #if 0
2770   unsigned Opc = (unsigned) N->getOpcode();
2771   EVT OpVT = N->getValueType(0);
2772
2773   switch (Opc) {
2774   default: {
2775     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2776     errs() << "Op.getOpcode() = " << Opc << "\n";
2777     errs() << "*Op.getNode():\n";
2778     N->dump();
2779     abort();
2780     /*NOTREACHED*/
2781   }
2782   }
2783 #endif
2784
2785   /* Otherwise, return unchanged */
2786 }
2787
2788 //===----------------------------------------------------------------------===//
2789 // Target Optimization Hooks
2790 //===----------------------------------------------------------------------===//
2791
2792 SDValue
2793 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2794 {
2795 #if 0
2796   TargetMachine &TM = getTargetMachine();
2797 #endif
2798   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2799   SelectionDAG &DAG = DCI.DAG;
2800   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2801   EVT NodeVT = N->getValueType(0);      // The node's value type
2802   EVT Op0VT = Op0.getValueType();       // The first operand's result
2803   SDValue Result;                       // Initially, empty result
2804   DebugLoc dl = N->getDebugLoc();
2805
2806   switch (N->getOpcode()) {
2807   default: break;
2808   case ISD::ADD: {
2809     SDValue Op1 = N->getOperand(1);
2810
2811     if (Op0.getOpcode() == SPUISD::IndirectAddr
2812         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2813       // Normalize the operands to reduce repeated code
2814       SDValue IndirectArg = Op0, AddArg = Op1;
2815
2816       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2817         IndirectArg = Op1;
2818         AddArg = Op0;
2819       }
2820
2821       if (isa<ConstantSDNode>(AddArg)) {
2822         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2823         SDValue IndOp1 = IndirectArg.getOperand(1);
2824
2825         if (CN0->isNullValue()) {
2826           // (add (SPUindirect <arg>, <arg>), 0) ->
2827           // (SPUindirect <arg>, <arg>)
2828
2829 #if !defined(NDEBUG)
2830           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2831             errs() << "\n"
2832                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2833                  << "With:    (SPUindirect <arg>, <arg>)\n";
2834           }
2835 #endif
2836
2837           return IndirectArg;
2838         } else if (isa<ConstantSDNode>(IndOp1)) {
2839           // (add (SPUindirect <arg>, <const>), <const>) ->
2840           // (SPUindirect <arg>, <const + const>)
2841           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2842           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2843           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2844
2845 #if !defined(NDEBUG)
2846           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2847             errs() << "\n"
2848                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2849                  << "), " << CN0->getSExtValue() << ")\n"
2850                  << "With:    (SPUindirect <arg>, "
2851                  << combinedConst << ")\n";
2852           }
2853 #endif
2854
2855           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2856                              IndirectArg, combinedValue);
2857         }
2858       }
2859     }
2860     break;
2861   }
2862   case ISD::SIGN_EXTEND:
2863   case ISD::ZERO_EXTEND:
2864   case ISD::ANY_EXTEND: {
2865     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2866       // (any_extend (SPUextract_elt0 <arg>)) ->
2867       // (SPUextract_elt0 <arg>)
2868       // Types must match, however...
2869 #if !defined(NDEBUG)
2870       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2871         errs() << "\nReplace: ";
2872         N->dump(&DAG);
2873         errs() << "\nWith:    ";
2874         Op0.getNode()->dump(&DAG);
2875         errs() << "\n";
2876       }
2877 #endif
2878
2879       return Op0;
2880     }
2881     break;
2882   }
2883   case SPUISD::IndirectAddr: {
2884     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2885       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2886       if (CN != 0 && CN->getZExtValue() == 0) {
2887         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2888         // (SPUaform <addr>, 0)
2889
2890         DEBUG(errs() << "Replace: ");
2891         DEBUG(N->dump(&DAG));
2892         DEBUG(errs() << "\nWith:    ");
2893         DEBUG(Op0.getNode()->dump(&DAG));
2894         DEBUG(errs() << "\n");
2895
2896         return Op0;
2897       }
2898     } else if (Op0.getOpcode() == ISD::ADD) {
2899       SDValue Op1 = N->getOperand(1);
2900       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2901         // (SPUindirect (add <arg>, <arg>), 0) ->
2902         // (SPUindirect <arg>, <arg>)
2903         if (CN1->isNullValue()) {
2904
2905 #if !defined(NDEBUG)
2906           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2907             errs() << "\n"
2908                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2909                  << "With:    (SPUindirect <arg>, <arg>)\n";
2910           }
2911 #endif
2912
2913           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2914                              Op0.getOperand(0), Op0.getOperand(1));
2915         }
2916       }
2917     }
2918     break;
2919   }
2920   case SPUISD::SHLQUAD_L_BITS:
2921   case SPUISD::SHLQUAD_L_BYTES:
2922   case SPUISD::ROTBYTES_LEFT: {
2923     SDValue Op1 = N->getOperand(1);
2924
2925     // Kill degenerate vector shifts:
2926     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2927       if (CN->isNullValue()) {
2928         Result = Op0;
2929       }
2930     }
2931     break;
2932   }
2933   case SPUISD::PREFSLOT2VEC: {
2934     switch (Op0.getOpcode()) {
2935     default:
2936       break;
2937     case ISD::ANY_EXTEND:
2938     case ISD::ZERO_EXTEND:
2939     case ISD::SIGN_EXTEND: {
2940       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2941       // <arg>
2942       // but only if the SPUprefslot2vec and <arg> types match.
2943       SDValue Op00 = Op0.getOperand(0);
2944       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2945         SDValue Op000 = Op00.getOperand(0);
2946         if (Op000.getValueType() == NodeVT) {
2947           Result = Op000;
2948         }
2949       }
2950       break;
2951     }
2952     case SPUISD::VEC2PREFSLOT: {
2953       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2954       // <arg>
2955       Result = Op0.getOperand(0);
2956       break;
2957     }
2958     }
2959     break;
2960   }
2961   }
2962
2963   // Otherwise, return unchanged.
2964 #ifndef NDEBUG
2965   if (Result.getNode()) {
2966     DEBUG(errs() << "\nReplace.SPU: ");
2967     DEBUG(N->dump(&DAG));
2968     DEBUG(errs() << "\nWith:        ");
2969     DEBUG(Result.getNode()->dump(&DAG));
2970     DEBUG(errs() << "\n");
2971   }
2972 #endif
2973
2974   return Result;
2975 }
2976
2977 //===----------------------------------------------------------------------===//
2978 // Inline Assembly Support
2979 //===----------------------------------------------------------------------===//
2980
2981 /// getConstraintType - Given a constraint letter, return the type of
2982 /// constraint it is for this target.
2983 SPUTargetLowering::ConstraintType
2984 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2985   if (ConstraintLetter.size() == 1) {
2986     switch (ConstraintLetter[0]) {
2987     default: break;
2988     case 'b':
2989     case 'r':
2990     case 'f':
2991     case 'v':
2992     case 'y':
2993       return C_RegisterClass;
2994     }
2995   }
2996   return TargetLowering::getConstraintType(ConstraintLetter);
2997 }
2998
2999 std::pair<unsigned, const TargetRegisterClass*>
3000 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3001                                                 EVT VT) const
3002 {
3003   if (Constraint.size() == 1) {
3004     // GCC RS6000 Constraint Letters
3005     switch (Constraint[0]) {
3006     case 'b':   // R1-R31
3007     case 'r':   // R0-R31
3008       if (VT == MVT::i64)
3009         return std::make_pair(0U, SPU::R64CRegisterClass);
3010       return std::make_pair(0U, SPU::R32CRegisterClass);
3011     case 'f':
3012       if (VT == MVT::f32)
3013         return std::make_pair(0U, SPU::R32FPRegisterClass);
3014       else if (VT == MVT::f64)
3015         return std::make_pair(0U, SPU::R64FPRegisterClass);
3016       break;
3017     case 'v':
3018       return std::make_pair(0U, SPU::GPRCRegisterClass);
3019     }
3020   }
3021
3022   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3023 }
3024
3025 //! Compute used/known bits for a SPU operand
3026 void
3027 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3028                                                   const APInt &Mask,
3029                                                   APInt &KnownZero,
3030                                                   APInt &KnownOne,
3031                                                   const SelectionDAG &DAG,
3032                                                   unsigned Depth ) const {
3033 #if 0
3034   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3035
3036   switch (Op.getOpcode()) {
3037   default:
3038     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3039     break;
3040   case CALL:
3041   case SHUFB:
3042   case SHUFFLE_MASK:
3043   case CNTB:
3044   case SPUISD::PREFSLOT2VEC:
3045   case SPUISD::LDRESULT:
3046   case SPUISD::VEC2PREFSLOT:
3047   case SPUISD::SHLQUAD_L_BITS:
3048   case SPUISD::SHLQUAD_L_BYTES:
3049   case SPUISD::VEC_ROTL:
3050   case SPUISD::VEC_ROTR:
3051   case SPUISD::ROTBYTES_LEFT:
3052   case SPUISD::SELECT_MASK:
3053   case SPUISD::SELB:
3054   }
3055 #endif
3056 }
3057
3058 unsigned
3059 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3060                                                    unsigned Depth) const {
3061   switch (Op.getOpcode()) {
3062   default:
3063     return 1;
3064
3065   case ISD::SETCC: {
3066     EVT VT = Op.getValueType();
3067
3068     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3069       VT = MVT::i32;
3070     }
3071     return VT.getSizeInBits();
3072   }
3073   }
3074 }
3075
3076 // LowerAsmOperandForConstraint
3077 void
3078 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3079                                                 char ConstraintLetter,
3080                                                 bool hasMemory,
3081                                                 std::vector<SDValue> &Ops,
3082                                                 SelectionDAG &DAG) const {
3083   // Default, for the time being, to the base class handler
3084   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3085                                                Ops, DAG);
3086 }
3087
3088 /// isLegalAddressImmediate - Return true if the integer value can be used
3089 /// as the offset of the target addressing mode.
3090 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3091                                                 const Type *Ty) const {
3092   // SPU's addresses are 256K:
3093   return (V > -(1 << 18) && V < (1 << 18) - 1);
3094 }
3095
3096 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3097   return false;
3098 }
3099
3100 bool
3101 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3102   // The SPU target isn't yet aware of offsets.
3103   return false;
3104 }