lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG, Op.getDebugLoc());
 122
 123     return CallInfo.first;
 124   }
 125 }
 126
 127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 128   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 129     SPUTM(TM) {
 130   // Fold away setcc operations if possible.
 131   setPow2DivIsCheap();
 132
 133   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 134   setUseUnderscoreSetJmp(true);
 135   setUseUnderscoreLongJmp(true);
 136
 137   // Set RTLIB libcall names as used by SPU:
 138   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 139
 140   // Set up the SPU's register classes:
 141   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 142   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 143   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 144   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 145   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 146   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 147   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 148
 149   // SPU has no sign or zero extended loads for i1, i8, i16:
 150   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 151   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 152   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 153
 154   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 156
 157   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 161
 162   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 163
 164   // SPU constant load actions are custom lowered:
 165   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 166   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 167
 168   // SPU's loads and stores have to be custom lowered:
 169   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 170        ++sctype) {
 171     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 172
 173     setOperationAction(ISD::LOAD,   VT, Custom);
 174     setOperationAction(ISD::STORE,  VT, Custom);
 175     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 176     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 177     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 178
 179     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 180       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 181       setTruncStoreAction(VT, StoreVT, Expand);
 182     }
 183   }
 184
 185   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 186        ++sctype) {
 187     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 188
 189     setOperationAction(ISD::LOAD,   VT, Custom);
 190     setOperationAction(ISD::STORE,  VT, Custom);
 191
 192     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 193       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 194       setTruncStoreAction(VT, StoreVT, Expand);
 195     }
 196   }
 197
 198   // Expand the jumptable branches
 199   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 200   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 201
 202   // Custom lower SELECT_CC for most cases, but expand by default
 203   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 208
 209   // SPU has no intrinsics for these particular operations:
 210   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 211
 212   // SPU has no division/remainder instructions
 213   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 214   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 216   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 218   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 220   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 222   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 224   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 226   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 228   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 230   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 232   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 234   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 236   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 238   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 240   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 242   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 243
 244   // We don't support sin/cos/sqrt/fmod
 245   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 246   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 247   setOperationAction(ISD::FREM , MVT::f64, Expand);
 248   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 249   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 250   setOperationAction(ISD::FREM , MVT::f32, Expand);
 251
 252   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 253   // for f32!)
 254   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 255   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 256
 257   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 259
 260   // SPU can do rotate right and left, so legalize it... but customize for i8
 261   // because instructions don't exist.
 262
 263   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 264   //        .td files.
 265   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 266   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 268
 269   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 270   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 272
 273   // SPU has no native version of shift left/right for i8
 274   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 275   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 277
 278   // Make these operations legal and handle them during instruction selection:
 279   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 280   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 282
 283   // Custom lower i8, i32 and i64 multiplications
 284   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 285   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 286   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 287
 288   // Expand double-width multiplication
 289   // FIXME: It would probably be reasonable to support some of these operations
 290   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 291   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 294   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 295   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 297   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 298   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 299   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 301   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 302   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 303   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 305   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 306
 307   // Need to custom handle (some) common i8, i64 math ops
 308   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 309   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 310   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 311   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 312
 313   // SPU does not have BSWAP. It does have i32 support CTLZ.
 314   // CTPOP has to be custom lowered.
 315   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 316   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 317
 318   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 323
 324   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 329
 330   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 331   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 333   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 334   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 335
 336   // SPU has a version of select that implements (a&~c)|(b&c), just like
 337   // select ought to work:
 338   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 339   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 340   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 342
 343   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 344   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 348
 349   // Custom lower i128 -> i64 truncates
 350   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 351
 352   // Custom lower i32/i64 -> i128 sign extend
 353   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 354
 355   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 356   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 358   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 359   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 360   // to expand to a libcall, hence the custom lowering:
 361   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 362   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 364   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 366   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 367
 368   // FDIV on SPU requires custom lowering
 369   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 370
 371   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 378   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 379   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 380
 381   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 385
 386   // We cannot sextinreg(i1).  Expand to shifts.
 387   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 388
 389   // We want to legalize GlobalAddress and ConstantPool nodes into the
 390   // appropriate instructions to materialize the address.
 391   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 392        ++sctype) {
 393     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 394
 395     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 396     setOperationAction(ISD::ConstantPool,   VT, Custom);
 397     setOperationAction(ISD::JumpTable,      VT, Custom);
 398   }
 399
 400   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 401   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 402
 403   // Use the default implementation.
 404   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 405   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 406   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 407   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 408   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 409   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 410   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 411
 412   // Cell SPU has instructions for converting between i64 and fp.
 413   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 414   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 415
 416   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 417   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 418
 419   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 420   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 421
 422   // First set operation action for all vector types to expand. Then we
 423   // will selectively turn on ones that can be effectively codegen'd.
 424   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 428   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 429   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 430
 431   // "Odd size" vector classes that we're willing to support:
 432   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 433
 434   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 435        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 436     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 437
 438     // add/sub are legal for all supported vector VT's.
 439     setOperationAction(ISD::ADD,     VT, Legal);
 440     setOperationAction(ISD::SUB,     VT, Legal);
 441     // mul has to be custom lowered.
 442     setOperationAction(ISD::MUL,     VT, Legal);
 443
 444     setOperationAction(ISD::AND,     VT, Legal);
 445     setOperationAction(ISD::OR,      VT, Legal);
 446     setOperationAction(ISD::XOR,     VT, Legal);
 447     setOperationAction(ISD::LOAD,    VT, Legal);
 448     setOperationAction(ISD::SELECT,  VT, Legal);
 449     setOperationAction(ISD::STORE,   VT, Legal);
 450
 451     // These operations need to be expanded:
 452     setOperationAction(ISD::SDIV,    VT, Expand);
 453     setOperationAction(ISD::SREM,    VT, Expand);
 454     setOperationAction(ISD::UDIV,    VT, Expand);
 455     setOperationAction(ISD::UREM,    VT, Expand);
 456
 457     // Custom lower build_vector, constant pool spills, insert and
 458     // extract vector elements:
 459     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 460     setOperationAction(ISD::ConstantPool, VT, Custom);
 461     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 462     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 463     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 464     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 465   }
 466
 467   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 468   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 469   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 470   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 471
 472   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 473
 474   setShiftAmountType(MVT::i32);
 475   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 476
 477   setStackPointerRegisterToSaveRestore(SPU::R1);
 478
 479   // We have target-specific dag combine patterns for the following nodes:
 480   setTargetDAGCombine(ISD::ADD);
 481   setTargetDAGCombine(ISD::ZERO_EXTEND);
 482   setTargetDAGCombine(ISD::SIGN_EXTEND);
 483   setTargetDAGCombine(ISD::ANY_EXTEND);
 484
 485   computeRegisterProperties();
 486
 487   // Set pre-RA register scheduler default to BURR, which produces slightly
 488   // better code than the default (could also be TDRR, but TargetLowering.h
 489   // needs a mod to support that model):
 490   setSchedulingPreference(SchedulingForRegPressure);
 491 }
 492
 493 const char *
 494 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 495 {
 496   if (node_names.empty()) {
 497     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 498     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 499     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 500     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 501     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 502     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 503     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 504     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 505     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 506     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 507     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 508     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 509     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 510     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 511     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 512     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 513     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 514     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 515     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 516             "SPUISD::ROTBYTES_LEFT_BITS";
 517     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 518     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 519     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 520     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 521     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 522   }
 523
 524   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 525
 526   return ((i != node_names.end()) ? i->second : 0);
 527 }
 528
 529 /// getFunctionAlignment - Return the Log2 alignment of this function.
 530 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 531   return 3;
 532 }
 533
 534 //===----------------------------------------------------------------------===//
 535 // Return the Cell SPU's SETCC result type
 536 //===----------------------------------------------------------------------===//
 537
 538 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 539   // i16 and i32 are valid SETCC result types
 540   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 541     VT.getSimpleVT().SimpleTy :
 542     MVT::i32);
 543 }
 544
 545 //===----------------------------------------------------------------------===//
 546 // Calling convention code:
 547 //===----------------------------------------------------------------------===//
 548
 549 #include "SPUGenCallingConv.inc"
 550
 551 //===----------------------------------------------------------------------===//
 552 //  LowerOperation implementation
 553 //===----------------------------------------------------------------------===//
 554
 555 /// Custom lower loads for CellSPU
 556 /*!
 557  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 558  within a 16-byte block, we have to rotate to extract the requested element.
 559
 560  For extending loads, we also want to ensure that the following sequence is
 561  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 562
 563 \verbatim
 564 %1  v16i8,ch = load
 565 %2  v16i8,ch = rotate %1
 566 %3  v4f8, ch = bitconvert %2
 567 %4  f32      = vec2perfslot %3
 568 %5  f64      = fp_extend %4
 569 \endverbatim
 570 */
 571 static SDValue
 572 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 573   LoadSDNode *LN = cast<LoadSDNode>(Op);
 574   SDValue the_chain = LN->getChain();
 575   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 576   EVT InVT = LN->getMemoryVT();
 577   EVT OutVT = Op.getValueType();
 578   ISD::LoadExtType ExtType = LN->getExtensionType();
 579   unsigned alignment = LN->getAlignment();
 580   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 581   DebugLoc dl = Op.getDebugLoc();
 582
 583   switch (LN->getAddressingMode()) {
 584   case ISD::UNINDEXED: {
 585     SDValue result;
 586     SDValue basePtr = LN->getBasePtr();
 587     SDValue rotate;
 588
 589     if (alignment == 16) {
 590       ConstantSDNode *CN;
 591
 592       // Special cases for a known aligned load to simplify the base pointer
 593       // and the rotation amount:
 594       if (basePtr.getOpcode() == ISD::ADD
 595           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 596         // Known offset into basePtr
 597         int64_t offset = CN->getSExtValue();
 598         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 599
 600         if (rotamt < 0)
 601           rotamt += 16;
 602
 603         rotate = DAG.getConstant(rotamt, MVT::i16);
 604
 605         // Simplify the base pointer for this case:
 606         basePtr = basePtr.getOperand(0);
 607         if ((offset & ~0xf) > 0) {
 608           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 609                                 basePtr,
 610                                 DAG.getConstant((offset & ~0xf), PtrVT));
 611         }
 612       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 613                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 614                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 615                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 616         // Plain aligned a-form address: rotate into preferred slot
 617         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 618         int64_t rotamt = -vtm->prefslot_byte;
 619         if (rotamt < 0)
 620           rotamt += 16;
 621         rotate = DAG.getConstant(rotamt, MVT::i16);
 622       } else {
 623         // Offset the rotate amount by the basePtr and the preferred slot
 624         // byte offset
 625         int64_t rotamt = -vtm->prefslot_byte;
 626         if (rotamt < 0)
 627           rotamt += 16;
 628         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 629                              basePtr,
 630                              DAG.getConstant(rotamt, PtrVT));
 631       }
 632     } else {
 633       // Unaligned load: must be more pessimistic about addressing modes:
 634       if (basePtr.getOpcode() == ISD::ADD) {
 635         MachineFunction &MF = DAG.getMachineFunction();
 636         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 637         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 638         SDValue Flag;
 639
 640         SDValue Op0 = basePtr.getOperand(0);
 641         SDValue Op1 = basePtr.getOperand(1);
 642
 643         if (isa<ConstantSDNode>(Op1)) {
 644           // Convert the (add <ptr>, <const>) to an indirect address contained
 645           // in a register. Note that this is done because we need to avoid
 646           // creating a 0(reg) d-form address due to the SPU's block loads.
 647           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 648           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 649           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 650         } else {
 651           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 652           // will likely be lowered as a reg(reg) x-form address.
 653           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 654         }
 655       } else {
 656         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 657                               basePtr,
 658                               DAG.getConstant(0, PtrVT));
 659       }
 660
 661       // Offset the rotate amount by the basePtr and the preferred slot
 662       // byte offset
 663       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 664                            basePtr,
 665                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 666     }
 667
 668     // Re-emit as a v16i8 vector load
 669     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 670                          LN->getSrcValue(), LN->getSrcValueOffset(),
 671                          LN->isVolatile(), LN->isNonTemporal(), 16);
 672
 673     // Update the chain
 674     the_chain = result.getValue(1);
 675
 676     // Rotate into the preferred slot:
 677     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 678                          result.getValue(0), rotate);
 679
 680     // Convert the loaded v16i8 vector to the appropriate vector type
 681     // specified by the operand:
 682     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 683                                  InVT, (128 / InVT.getSizeInBits()));
 684     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 685                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 686
 687     // Handle extending loads by extending the scalar result:
 688     if (ExtType == ISD::SEXTLOAD) {
 689       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 690     } else if (ExtType == ISD::ZEXTLOAD) {
 691       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 692     } else if (ExtType == ISD::EXTLOAD) {
 693       unsigned NewOpc = ISD::ANY_EXTEND;
 694
 695       if (OutVT.isFloatingPoint())
 696         NewOpc = ISD::FP_EXTEND;
 697
 698       result = DAG.getNode(NewOpc, dl, OutVT, result);
 699     }
 700
 701     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 702     SDValue retops[2] = {
 703       result,
 704       the_chain
 705     };
 706
 707     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 708                          retops, sizeof(retops) / sizeof(retops[0]));
 709     return result;
 710   }
 711   case ISD::PRE_INC:
 712   case ISD::PRE_DEC:
 713   case ISD::POST_INC:
 714   case ISD::POST_DEC:
 715   case ISD::LAST_INDEXED_MODE:
 716     {
 717       std::string msg;
 718       raw_string_ostream Msg(msg);
 719       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 720             "UNINDEXED\n";
 721       Msg << (unsigned) LN->getAddressingMode();
 722       llvm_report_error(Msg.str());
 723       /*NOTREACHED*/
 724     }
 725   }
 726
 727   return SDValue();
 728 }
 729
 730 /// Custom lower stores for CellSPU
 731 /*!
 732  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 733  within a 16-byte block, we have to generate a shuffle to insert the
 734  requested element into its place, then store the resulting block.
 735  */
 736 static SDValue
 737 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 738   StoreSDNode *SN = cast<StoreSDNode>(Op);
 739   SDValue Value = SN->getValue();
 740   EVT VT = Value.getValueType();
 741   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 742   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 743   DebugLoc dl = Op.getDebugLoc();
 744   unsigned alignment = SN->getAlignment();
 745
 746   switch (SN->getAddressingMode()) {
 747   case ISD::UNINDEXED: {
 748     // The vector type we really want to load from the 16-byte chunk.
 749     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 750                                  VT, (128 / VT.getSizeInBits()));
 751
 752     SDValue alignLoadVec;
 753     SDValue basePtr = SN->getBasePtr();
 754     SDValue the_chain = SN->getChain();
 755     SDValue insertEltOffs;
 756
 757     if (alignment == 16) {
 758       ConstantSDNode *CN;
 759
 760       // Special cases for a known aligned load to simplify the base pointer
 761       // and insertion byte:
 762       if (basePtr.getOpcode() == ISD::ADD
 763           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 764         // Known offset into basePtr
 765         int64_t offset = CN->getSExtValue();
 766
 767         // Simplify the base pointer for this case:
 768         basePtr = basePtr.getOperand(0);
 769         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 770                                     basePtr,
 771                                     DAG.getConstant((offset & 0xf), PtrVT));
 772
 773         if ((offset & ~0xf) > 0) {
 774           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 775                                 basePtr,
 776                                 DAG.getConstant((offset & ~0xf), PtrVT));
 777         }
 778       } else {
 779         // Otherwise, assume it's at byte 0 of basePtr
 780         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 781                                     basePtr,
 782                                     DAG.getConstant(0, PtrVT));
 783       }
 784     } else {
 785       // Unaligned load: must be more pessimistic about addressing modes:
 786       if (basePtr.getOpcode() == ISD::ADD) {
 787         MachineFunction &MF = DAG.getMachineFunction();
 788         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 789         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 790         SDValue Flag;
 791
 792         SDValue Op0 = basePtr.getOperand(0);
 793         SDValue Op1 = basePtr.getOperand(1);
 794
 795         if (isa<ConstantSDNode>(Op1)) {
 796           // Convert the (add <ptr>, <const>) to an indirect address contained
 797           // in a register. Note that this is done because we need to avoid
 798           // creating a 0(reg) d-form address due to the SPU's block loads.
 799           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 800           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 801           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 802         } else {
 803           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 804           // will likely be lowered as a reg(reg) x-form address.
 805           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 806         }
 807       } else {
 808         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 809                               basePtr,
 810                               DAG.getConstant(0, PtrVT));
 811       }
 812
 813       // Insertion point is solely determined by basePtr's contents
 814       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 815                                   basePtr,
 816                                   DAG.getConstant(0, PtrVT));
 817     }
 818
 819     // Re-emit as a v16i8 vector load
 820     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 821                                SN->getSrcValue(), SN->getSrcValueOffset(),
 822                                SN->isVolatile(), SN->isNonTemporal(), 16);
 823
 824     // Update the chain
 825     the_chain = alignLoadVec.getValue(1);
 826
 827     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 828     SDValue theValue = SN->getValue();
 829     SDValue result;
 830
 831     if (StVT != VT
 832         && (theValue.getOpcode() == ISD::AssertZext
 833             || theValue.getOpcode() == ISD::AssertSext)) {
 834       // Drill down and get the value for zero- and sign-extended
 835       // quantities
 836       theValue = theValue.getOperand(0);
 837     }
 838
 839     // If the base pointer is already a D-form address, then just create
 840     // a new D-form address with a slot offset and the orignal base pointer.
 841     // Otherwise generate a D-form address with the slot offset relative
 842     // to the stack pointer, which is always aligned.
 843 #if !defined(NDEBUG)
 844       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 845         errs() << "CellSPU LowerSTORE: basePtr = ";
 846         basePtr.getNode()->dump(&DAG);
 847         errs() << "\n";
 848       }
 849 #endif
 850
 851     SDValue insertEltOp =
 852             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 853     SDValue vectorizeOp =
 854             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 855
 856     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 857                          vectorizeOp, alignLoadVec,
 858                          DAG.getNode(ISD::BIT_CONVERT, dl,
 859                                      MVT::v4i32, insertEltOp));
 860
 861     result = DAG.getStore(the_chain, dl, result, basePtr,
 862                           LN->getSrcValue(), LN->getSrcValueOffset(),
 863                           LN->isVolatile(), LN->isNonTemporal(),
 864                           LN->getAlignment());
 865
 866 #if 0 && !defined(NDEBUG)
 867     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 868       const SDValue &currentRoot = DAG.getRoot();
 869
 870       DAG.setRoot(result);
 871       errs() << "------- CellSPU:LowerStore result:\n";
 872       DAG.dump();
 873       errs() << "-------\n";
 874       DAG.setRoot(currentRoot);
 875     }
 876 #endif
 877
 878     return result;
 879     /*UNREACHED*/
 880   }
 881   case ISD::PRE_INC:
 882   case ISD::PRE_DEC:
 883   case ISD::POST_INC:
 884   case ISD::POST_DEC:
 885   case ISD::LAST_INDEXED_MODE:
 886     {
 887       std::string msg;
 888       raw_string_ostream Msg(msg);
 889       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 890             "UNINDEXED\n";
 891       Msg << (unsigned) SN->getAddressingMode();
 892       llvm_report_error(Msg.str());
 893       /*NOTREACHED*/
 894     }
 895   }
 896
 897   return SDValue();
 898 }
 899
 900 //! Generate the address of a constant pool entry.
 901 static SDValue
 902 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 903   EVT PtrVT = Op.getValueType();
 904   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 905   Constant *C = CP->getConstVal();
 906   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 907   SDValue Zero = DAG.getConstant(0, PtrVT);
 908   const TargetMachine &TM = DAG.getTarget();
 909   // FIXME there is no actual debug info here
 910   DebugLoc dl = Op.getDebugLoc();
 911
 912   if (TM.getRelocationModel() == Reloc::Static) {
 913     if (!ST->usingLargeMem()) {
 914       // Just return the SDValue with the constant pool address in it.
 915       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 916     } else {
 917       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 918       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 919       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 920     }
 921   }
 922
 923   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 924                    " not supported.");
 925   return SDValue();
 926 }
 927
 928 //! Alternate entry point for generating the address of a constant pool entry
 929 SDValue
 930 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 931   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 932 }
 933
 934 static SDValue
 935 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 936   EVT PtrVT = Op.getValueType();
 937   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 938   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 939   SDValue Zero = DAG.getConstant(0, PtrVT);
 940   const TargetMachine &TM = DAG.getTarget();
 941   // FIXME there is no actual debug info here
 942   DebugLoc dl = Op.getDebugLoc();
 943
 944   if (TM.getRelocationModel() == Reloc::Static) {
 945     if (!ST->usingLargeMem()) {
 946       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 947     } else {
 948       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 949       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 950       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 951     }
 952   }
 953
 954   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 955                    " not supported.");
 956   return SDValue();
 957 }
 958
 959 static SDValue
 960 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 961   EVT PtrVT = Op.getValueType();
 962   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 963   GlobalValue *GV = GSDN->getGlobal();
 964   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 965   const TargetMachine &TM = DAG.getTarget();
 966   SDValue Zero = DAG.getConstant(0, PtrVT);
 967   // FIXME there is no actual debug info here
 968   DebugLoc dl = Op.getDebugLoc();
 969
 970   if (TM.getRelocationModel() == Reloc::Static) {
 971     if (!ST->usingLargeMem()) {
 972       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 973     } else {
 974       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 975       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 976       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 977     }
 978   } else {
 979     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 980                       "not supported.");
 981     /*NOTREACHED*/
 982   }
 983
 984   return SDValue();
 985 }
 986
 987 //! Custom lower double precision floating point constants
 988 static SDValue
 989 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 990   EVT VT = Op.getValueType();
 991   // FIXME there is no actual debug info here
 992   DebugLoc dl = Op.getDebugLoc();
 993
 994   if (VT == MVT::f64) {
 995     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 996
 997     assert((FP != 0) &&
 998            "LowerConstantFP: Node is not ConstantFPSDNode");
 999
1000     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1001     SDValue T = DAG.getConstant(dbits, MVT::i64);
1002     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1003     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1004                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1005   }
1006
1007   return SDValue();
1008 }
1009
1010 SDValue
1011 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1012                                         CallingConv::ID CallConv, bool isVarArg,
1013                                         const SmallVectorImpl<ISD::InputArg>
1014                                           &Ins,
1015                                         DebugLoc dl, SelectionDAG &DAG,
1016                                         SmallVectorImpl<SDValue> &InVals) {
1017
1018   MachineFunction &MF = DAG.getMachineFunction();
1019   MachineFrameInfo *MFI = MF.getFrameInfo();
1020   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1021
1022   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1023   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1024
1025   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1026   unsigned ArgRegIdx = 0;
1027   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1028
1029   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1030
1031   // Add DAG nodes to load the arguments or copy them out of registers.
1032   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1033     EVT ObjectVT = Ins[ArgNo].VT;
1034     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1035     SDValue ArgVal;
1036
1037     if (ArgRegIdx < NumArgRegs) {
1038       const TargetRegisterClass *ArgRegClass;
1039
1040       switch (ObjectVT.getSimpleVT().SimpleTy) {
1041       default: {
1042         std::string msg;
1043         raw_string_ostream Msg(msg);
1044         Msg << "LowerFormalArguments Unhandled argument type: "
1045              << ObjectVT.getEVTString();
1046         llvm_report_error(Msg.str());
1047       }
1048       case MVT::i8:
1049         ArgRegClass = &SPU::R8CRegClass;
1050         break;
1051       case MVT::i16:
1052         ArgRegClass = &SPU::R16CRegClass;
1053         break;
1054       case MVT::i32:
1055         ArgRegClass = &SPU::R32CRegClass;
1056         break;
1057       case MVT::i64:
1058         ArgRegClass = &SPU::R64CRegClass;
1059         break;
1060       case MVT::i128:
1061         ArgRegClass = &SPU::GPRCRegClass;
1062         break;
1063       case MVT::f32:
1064         ArgRegClass = &SPU::R32FPRegClass;
1065         break;
1066       case MVT::f64:
1067         ArgRegClass = &SPU::R64FPRegClass;
1068         break;
1069       case MVT::v2f64:
1070       case MVT::v4f32:
1071       case MVT::v2i64:
1072       case MVT::v4i32:
1073       case MVT::v8i16:
1074       case MVT::v16i8:
1075         ArgRegClass = &SPU::VECREGRegClass;
1076         break;
1077       }
1078
1079       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1080       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1081       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1082       ++ArgRegIdx;
1083     } else {
1084       // We need to load the argument to a virtual register if we determined
1085       // above that we ran out of physical registers of the appropriate type
1086       // or we're forced to do vararg
1087       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1088       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1089       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1090       ArgOffset += StackSlotSize;
1091     }
1092
1093     InVals.push_back(ArgVal);
1094     // Update the chain
1095     Chain = ArgVal.getOperand(0);
1096   }
1097
1098   // vararg handling:
1099   if (isVarArg) {
1100     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1101     // We will spill (79-3)+1 registers to the stack
1102     SmallVector<SDValue, 79-3+1> MemOps;
1103
1104     // Create the frame slot
1105
1106     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1107       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1108                                                  true, false);
1109       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1110       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1111       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1112                                    false, false, 0);
1113       Chain = Store.getOperand(0);
1114       MemOps.push_back(Store);
1115
1116       // Increment address by stack slot size for the next stored argument
1117       ArgOffset += StackSlotSize;
1118     }
1119     if (!MemOps.empty())
1120       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1121                           &MemOps[0], MemOps.size());
1122   }
1123
1124   return Chain;
1125 }
1126
1127 /// isLSAAddress - Return the immediate to use if the specified
1128 /// value is representable as a LSA address.
1129 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1130   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1131   if (!C) return 0;
1132
1133   int Addr = C->getZExtValue();
1134   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1135       (Addr << 14 >> 14) != Addr)
1136     return 0;  // Top 14 bits have to be sext of immediate.
1137
1138   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1139 }
1140
1141 SDValue
1142 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1143                              CallingConv::ID CallConv, bool isVarArg,
1144                              bool &isTailCall,
1145                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1146                              const SmallVectorImpl<ISD::InputArg> &Ins,
1147                              DebugLoc dl, SelectionDAG &DAG,
1148                              SmallVectorImpl<SDValue> &InVals) {
1149   // CellSPU target does not yet support tail call optimization.
1150   isTailCall = false;
1151
1152   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1153   unsigned NumOps     = Outs.size();
1154   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1155   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1156   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1157
1158   // Handy pointer type
1159   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1160
1161   // Set up a copy of the stack pointer for use loading and storing any
1162   // arguments that may not fit in the registers available for argument
1163   // passing.
1164   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1165
1166   // Figure out which arguments are going to go in registers, and which in
1167   // memory.
1168   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1169   unsigned ArgRegIdx = 0;
1170
1171   // Keep track of registers passing arguments
1172   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1173   // And the arguments passed on the stack
1174   SmallVector<SDValue, 8> MemOpChains;
1175
1176   for (unsigned i = 0; i != NumOps; ++i) {
1177     SDValue Arg = Outs[i].Val;
1178
1179     // PtrOff will be used to store the current argument to the stack if a
1180     // register cannot be found for it.
1181     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1182     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1183
1184     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1185     default: llvm_unreachable("Unexpected ValueType for argument!");
1186     case MVT::i8:
1187     case MVT::i16:
1188     case MVT::i32:
1189     case MVT::i64:
1190     case MVT::i128:
1191       if (ArgRegIdx != NumArgRegs) {
1192         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1193       } else {
1194         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1195                                            false, false, 0));
1196         ArgOffset += StackSlotSize;
1197       }
1198       break;
1199     case MVT::f32:
1200     case MVT::f64:
1201       if (ArgRegIdx != NumArgRegs) {
1202         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1203       } else {
1204         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1205                                            false, false, 0));
1206         ArgOffset += StackSlotSize;
1207       }
1208       break;
1209     case MVT::v2i64:
1210     case MVT::v2f64:
1211     case MVT::v4f32:
1212     case MVT::v4i32:
1213     case MVT::v8i16:
1214     case MVT::v16i8:
1215       if (ArgRegIdx != NumArgRegs) {
1216         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1217       } else {
1218         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1219                                            false, false, 0));
1220         ArgOffset += StackSlotSize;
1221       }
1222       break;
1223     }
1224   }
1225
1226   // Accumulate how many bytes are to be pushed on the stack, including the
1227   // linkage area, and parameter passing area.  According to the SPU ABI,
1228   // we minimally need space for [LR] and [SP].
1229   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1230
1231   // Insert a call sequence start
1232   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1233                                                             true));
1234
1235   if (!MemOpChains.empty()) {
1236     // Adjust the stack pointer for the stack arguments.
1237     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1238                         &MemOpChains[0], MemOpChains.size());
1239   }
1240
1241   // Build a sequence of copy-to-reg nodes chained together with token chain
1242   // and flag operands which copy the outgoing args into the appropriate regs.
1243   SDValue InFlag;
1244   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1245     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1246                              RegsToPass[i].second, InFlag);
1247     InFlag = Chain.getValue(1);
1248   }
1249
1250   SmallVector<SDValue, 8> Ops;
1251   unsigned CallOpc = SPUISD::CALL;
1252
1253   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1254   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1255   // node so that legalize doesn't hack it.
1256   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1257     GlobalValue *GV = G->getGlobal();
1258     EVT CalleeVT = Callee.getValueType();
1259     SDValue Zero = DAG.getConstant(0, PtrVT);
1260     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1261
1262     if (!ST->usingLargeMem()) {
1263       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1264       // style calls, otherwise, external symbols are BRASL calls. This assumes
1265       // that declared/defined symbols are in the same compilation unit and can
1266       // be reached through PC-relative jumps.
1267       //
1268       // NOTE:
1269       // This may be an unsafe assumption for JIT and really large compilation
1270       // units.
1271       if (GV->isDeclaration()) {
1272         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1273       } else {
1274         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1275       }
1276     } else {
1277       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1278       // address pairs:
1279       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1280     }
1281   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1282     EVT CalleeVT = Callee.getValueType();
1283     SDValue Zero = DAG.getConstant(0, PtrVT);
1284     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1285         Callee.getValueType());
1286
1287     if (!ST->usingLargeMem()) {
1288       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1289     } else {
1290       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1291     }
1292   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1293     // If this is an absolute destination address that appears to be a legal
1294     // local store address, use the munged value.
1295     Callee = SDValue(Dest, 0);
1296   }
1297
1298   Ops.push_back(Chain);
1299   Ops.push_back(Callee);
1300
1301   // Add argument registers to the end of the list so that they are known live
1302   // into the call.
1303   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1304     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1305                                   RegsToPass[i].second.getValueType()));
1306
1307   if (InFlag.getNode())
1308     Ops.push_back(InFlag);
1309   // Returns a chain and a flag for retval copy to use.
1310   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1311                       &Ops[0], Ops.size());
1312   InFlag = Chain.getValue(1);
1313
1314   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1315                              DAG.getIntPtrConstant(0, true), InFlag);
1316   if (!Ins.empty())
1317     InFlag = Chain.getValue(1);
1318
1319   // If the function returns void, just return the chain.
1320   if (Ins.empty())
1321     return Chain;
1322
1323   // If the call has results, copy the values out of the ret val registers.
1324   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1325   default: llvm_unreachable("Unexpected ret value!");
1326   case MVT::Other: break;
1327   case MVT::i32:
1328     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1329       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1330                                  MVT::i32, InFlag).getValue(1);
1331       InVals.push_back(Chain.getValue(0));
1332       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333                                  Chain.getValue(2)).getValue(1);
1334       InVals.push_back(Chain.getValue(0));
1335     } else {
1336       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1337                                  InFlag).getValue(1);
1338       InVals.push_back(Chain.getValue(0));
1339     }
1340     break;
1341   case MVT::i64:
1342     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1343                                InFlag).getValue(1);
1344     InVals.push_back(Chain.getValue(0));
1345     break;
1346   case MVT::i128:
1347     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348                                InFlag).getValue(1);
1349     InVals.push_back(Chain.getValue(0));
1350     break;
1351   case MVT::f32:
1352   case MVT::f64:
1353     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1354                                InFlag).getValue(1);
1355     InVals.push_back(Chain.getValue(0));
1356     break;
1357   case MVT::v2f64:
1358   case MVT::v2i64:
1359   case MVT::v4f32:
1360   case MVT::v4i32:
1361   case MVT::v8i16:
1362   case MVT::v16i8:
1363     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1364                                    InFlag).getValue(1);
1365     InVals.push_back(Chain.getValue(0));
1366     break;
1367   }
1368
1369   return Chain;
1370 }
1371
1372 SDValue
1373 SPUTargetLowering::LowerReturn(SDValue Chain,
1374                                CallingConv::ID CallConv, bool isVarArg,
1375                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1376                                DebugLoc dl, SelectionDAG &DAG) {
1377
1378   SmallVector<CCValAssign, 16> RVLocs;
1379   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1380                  RVLocs, *DAG.getContext());
1381   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1382
1383   // If this is the first return lowered for this function, add the regs to the
1384   // liveout set for the function.
1385   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1386     for (unsigned i = 0; i != RVLocs.size(); ++i)
1387       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1388   }
1389
1390   SDValue Flag;
1391
1392   // Copy the result values into the output registers.
1393   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1394     CCValAssign &VA = RVLocs[i];
1395     assert(VA.isRegLoc() && "Can only return in registers!");
1396     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1397                              Outs[i].Val, Flag);
1398     Flag = Chain.getValue(1);
1399   }
1400
1401   if (Flag.getNode())
1402     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1403   else
1404     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1405 }
1406
1407
1408 //===----------------------------------------------------------------------===//
1409 // Vector related lowering:
1410 //===----------------------------------------------------------------------===//
1411
1412 static ConstantSDNode *
1413 getVecImm(SDNode *N) {
1414   SDValue OpVal(0, 0);
1415
1416   // Check to see if this buildvec has a single non-undef value in its elements.
1417   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1418     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1419     if (OpVal.getNode() == 0)
1420       OpVal = N->getOperand(i);
1421     else if (OpVal != N->getOperand(i))
1422       return 0;
1423   }
1424
1425   if (OpVal.getNode() != 0) {
1426     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1427       return CN;
1428     }
1429   }
1430
1431   return 0;
1432 }
1433
1434 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1436 /// constant
1437 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1438                               EVT ValueType) {
1439   if (ConstantSDNode *CN = getVecImm(N)) {
1440     uint64_t Value = CN->getZExtValue();
1441     if (ValueType == MVT::i64) {
1442       uint64_t UValue = CN->getZExtValue();
1443       uint32_t upper = uint32_t(UValue >> 32);
1444       uint32_t lower = uint32_t(UValue);
1445       if (upper != lower)
1446         return SDValue();
1447       Value = Value >> 32;
1448     }
1449     if (Value <= 0x3ffff)
1450       return DAG.getTargetConstant(Value, ValueType);
1451   }
1452
1453   return SDValue();
1454 }
1455
1456 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 16-bit constant, and if so, return the
1458 /// constant
1459 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1460                               EVT ValueType) {
1461   if (ConstantSDNode *CN = getVecImm(N)) {
1462     int64_t Value = CN->getSExtValue();
1463     if (ValueType == MVT::i64) {
1464       uint64_t UValue = CN->getZExtValue();
1465       uint32_t upper = uint32_t(UValue >> 32);
1466       uint32_t lower = uint32_t(UValue);
1467       if (upper != lower)
1468         return SDValue();
1469       Value = Value >> 32;
1470     }
1471     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1472       return DAG.getTargetConstant(Value, ValueType);
1473     }
1474   }
1475
1476   return SDValue();
1477 }
1478
1479 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1480 /// and the value fits into a signed 10-bit constant, and if so, return the
1481 /// constant
1482 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1483                               EVT ValueType) {
1484   if (ConstantSDNode *CN = getVecImm(N)) {
1485     int64_t Value = CN->getSExtValue();
1486     if (ValueType == MVT::i64) {
1487       uint64_t UValue = CN->getZExtValue();
1488       uint32_t upper = uint32_t(UValue >> 32);
1489       uint32_t lower = uint32_t(UValue);
1490       if (upper != lower)
1491         return SDValue();
1492       Value = Value >> 32;
1493     }
1494     if (isS10Constant(Value))
1495       return DAG.getTargetConstant(Value, ValueType);
1496   }
1497
1498   return SDValue();
1499 }
1500
1501 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1502 /// and the value fits into a signed 8-bit constant, and if so, return the
1503 /// constant.
1504 ///
1505 /// @note: The incoming vector is v16i8 because that's the only way we can load
1506 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1507 /// same value.
1508 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1509                              EVT ValueType) {
1510   if (ConstantSDNode *CN = getVecImm(N)) {
1511     int Value = (int) CN->getZExtValue();
1512     if (ValueType == MVT::i16
1513         && Value <= 0xffff                 /* truncated from uint64_t */
1514         && ((short) Value >> 8) == ((short) Value & 0xff))
1515       return DAG.getTargetConstant(Value & 0xff, ValueType);
1516     else if (ValueType == MVT::i8
1517              && (Value & 0xff) == Value)
1518       return DAG.getTargetConstant(Value, ValueType);
1519   }
1520
1521   return SDValue();
1522 }
1523
1524 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1525 /// and the value fits into a signed 16-bit constant, and if so, return the
1526 /// constant
1527 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1528                                EVT ValueType) {
1529   if (ConstantSDNode *CN = getVecImm(N)) {
1530     uint64_t Value = CN->getZExtValue();
1531     if ((ValueType == MVT::i32
1532           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1533         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1534       return DAG.getTargetConstant(Value >> 16, ValueType);
1535   }
1536
1537   return SDValue();
1538 }
1539
1540 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1541 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1542   if (ConstantSDNode *CN = getVecImm(N)) {
1543     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1544   }
1545
1546   return SDValue();
1547 }
1548
1549 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1550 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1551   if (ConstantSDNode *CN = getVecImm(N)) {
1552     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1553   }
1554
1555   return SDValue();
1556 }
1557
1558 //! Lower a BUILD_VECTOR instruction creatively:
1559 static SDValue
1560 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561   EVT VT = Op.getValueType();
1562   EVT EltVT = VT.getVectorElementType();
1563   DebugLoc dl = Op.getDebugLoc();
1564   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1565   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1566   unsigned minSplatBits = EltVT.getSizeInBits();
1567
1568   if (minSplatBits < 16)
1569     minSplatBits = 16;
1570
1571   APInt APSplatBits, APSplatUndef;
1572   unsigned SplatBitSize;
1573   bool HasAnyUndefs;
1574
1575   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1576                             HasAnyUndefs, minSplatBits)
1577       || minSplatBits < SplatBitSize)
1578     return SDValue();   // Wasn't a constant vector or splat exceeded min
1579
1580   uint64_t SplatBits = APSplatBits.getZExtValue();
1581
1582   switch (VT.getSimpleVT().SimpleTy) {
1583   default: {
1584     std::string msg;
1585     raw_string_ostream Msg(msg);
1586     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1587          << VT.getEVTString();
1588     llvm_report_error(Msg.str());
1589     /*NOTREACHED*/
1590   }
1591   case MVT::v4f32: {
1592     uint32_t Value32 = uint32_t(SplatBits);
1593     assert(SplatBitSize == 32
1594            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596     SDValue T = DAG.getConstant(Value32, MVT::i32);
1597     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1598                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1599     break;
1600   }
1601   case MVT::v2f64: {
1602     uint64_t f64val = uint64_t(SplatBits);
1603     assert(SplatBitSize == 64
1604            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606     SDValue T = DAG.getConstant(f64val, MVT::i64);
1607     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1608                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1609     break;
1610   }
1611   case MVT::v16i8: {
1612    // 8-bit constants have to be expanded to 16-bits
1613    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1614    SmallVector<SDValue, 8> Ops;
1615
1616    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1617    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1618                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1619   }
1620   case MVT::v8i16: {
1621     unsigned short Value16 = SplatBits;
1622     SDValue T = DAG.getConstant(Value16, EltVT);
1623     SmallVector<SDValue, 8> Ops;
1624
1625     Ops.assign(8, T);
1626     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1627   }
1628   case MVT::v4i32: {
1629     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1631   }
1632   case MVT::v2i32: {
1633     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1634     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1635   }
1636   case MVT::v2i64: {
1637     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1638   }
1639   }
1640
1641   return SDValue();
1642 }
1643
1644 /*!
1645  */
1646 SDValue
1647 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1648                      DebugLoc dl) {
1649   uint32_t upper = uint32_t(SplatVal >> 32);
1650   uint32_t lower = uint32_t(SplatVal);
1651
1652   if (upper == lower) {
1653     // Magic constant that can be matched by IL, ILA, et. al.
1654     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1655     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1656                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1657                                    Val, Val, Val, Val));
1658   } else {
1659     bool upper_special, lower_special;
1660
1661     // NOTE: This code creates common-case shuffle masks that can be easily
1662     // detected as common expressions. It is not attempting to create highly
1663     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1664
1665     // Detect if the upper or lower half is a special shuffle mask pattern:
1666     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1667     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1668
1669     // Both upper and lower are special, lower to a constant pool load:
1670     if (lower_special && upper_special) {
1671       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1672       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1673                          SplatValCN, SplatValCN);
1674     }
1675
1676     SDValue LO32;
1677     SDValue HI32;
1678     SmallVector<SDValue, 16> ShufBytes;
1679     SDValue Result;
1680
1681     // Create lower vector if not a special pattern
1682     if (!lower_special) {
1683       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1684       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686                                      LO32C, LO32C, LO32C, LO32C));
1687     }
1688
1689     // Create upper vector if not a special pattern
1690     if (!upper_special) {
1691       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1692       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1693                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1694                                      HI32C, HI32C, HI32C, HI32C));
1695     }
1696
1697     // If either upper or lower are special, then the two input operands are
1698     // the same (basically, one of them is a "don't care")
1699     if (lower_special)
1700       LO32 = HI32;
1701     if (upper_special)
1702       HI32 = LO32;
1703
1704     for (int i = 0; i < 4; ++i) {
1705       uint64_t val = 0;
1706       for (int j = 0; j < 4; ++j) {
1707         SDValue V;
1708         bool process_upper, process_lower;
1709         val <<= 8;
1710         process_upper = (upper_special && (i & 1) == 0);
1711         process_lower = (lower_special && (i & 1) == 1);
1712
1713         if (process_upper || process_lower) {
1714           if ((process_upper && upper == 0)
1715                   || (process_lower && lower == 0))
1716             val |= 0x80;
1717           else if ((process_upper && upper == 0xffffffff)
1718                   || (process_lower && lower == 0xffffffff))
1719             val |= 0xc0;
1720           else if ((process_upper && upper == 0x80000000)
1721                   || (process_lower && lower == 0x80000000))
1722             val |= (j == 0 ? 0xe0 : 0x80);
1723         } else
1724           val |= i * 4 + j + ((i & 1) * 16);
1725       }
1726
1727       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1728     }
1729
1730     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1731                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732                                    &ShufBytes[0], ShufBytes.size()));
1733   }
1734 }
1735
1736 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1738 /// permutation vector, V3, is monotonically increasing with one "exception"
1739 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741 /// In either case, the net result is going to eventually invoke SHUFB to
1742 /// permute/shuffle the bytes from V1 and V2.
1743 /// \note
1744 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1745 /// control word for byte/halfword/word insertion. This takes care of a single
1746 /// element move from V2 into V1.
1747 /// \note
1748 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1750   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1751   SDValue V1 = Op.getOperand(0);
1752   SDValue V2 = Op.getOperand(1);
1753   DebugLoc dl = Op.getDebugLoc();
1754
1755   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1756
1757   // If we have a single element being moved from V1 to V2, this can be handled
1758   // using the C*[DX] compute mask instructions, but the vector elements have
1759   // to be monotonically increasing with one exception element.
1760   EVT VecVT = V1.getValueType();
1761   EVT EltVT = VecVT.getVectorElementType();
1762   unsigned EltsFromV2 = 0;
1763   unsigned V2Elt = 0;
1764   unsigned V2EltIdx0 = 0;
1765   unsigned CurrElt = 0;
1766   unsigned MaxElts = VecVT.getVectorNumElements();
1767   unsigned PrevElt = 0;
1768   unsigned V0Elt = 0;
1769   bool monotonic = true;
1770   bool rotate = true;
1771
1772   if (EltVT == MVT::i8) {
1773     V2EltIdx0 = 16;
1774   } else if (EltVT == MVT::i16) {
1775     V2EltIdx0 = 8;
1776   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1777     V2EltIdx0 = 4;
1778   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1779     V2EltIdx0 = 2;
1780   } else
1781     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1782
1783   for (unsigned i = 0; i != MaxElts; ++i) {
1784     if (SVN->getMaskElt(i) < 0)
1785       continue;
1786
1787     unsigned SrcElt = SVN->getMaskElt(i);
1788
1789     if (monotonic) {
1790       if (SrcElt >= V2EltIdx0) {
1791         if (1 >= (++EltsFromV2)) {
1792           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1793         }
1794       } else if (CurrElt != SrcElt) {
1795         monotonic = false;
1796       }
1797
1798       ++CurrElt;
1799     }
1800
1801     if (rotate) {
1802       if (PrevElt > 0 && SrcElt < MaxElts) {
1803         if ((PrevElt == SrcElt - 1)
1804             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805           PrevElt = SrcElt;
1806           if (SrcElt == 0)
1807             V0Elt = i;
1808         } else {
1809           rotate = false;
1810         }
1811       } else if (PrevElt == 0) {
1812         // First time through, need to keep track of previous element
1813         PrevElt = SrcElt;
1814       } else {
1815         // This isn't a rotation, takes elements from vector 2
1816         rotate = false;
1817       }
1818     }
1819   }
1820
1821   if (EltsFromV2 == 1 && monotonic) {
1822     // Compute mask and shuffle
1823     MachineFunction &MF = DAG.getMachineFunction();
1824     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1825     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1826     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827     // Initialize temporary register to 0
1828     SDValue InitTempReg =
1829       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1830     // Copy register's contents as index in SHUFFLE_MASK:
1831     SDValue ShufMaskOp =
1832       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1833                   DAG.getTargetConstant(V2Elt, MVT::i32),
1834                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1835     // Use shuffle mask in SHUFB synthetic instruction:
1836     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1837                        ShufMaskOp);
1838   } else if (rotate) {
1839     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1840
1841     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1842                        V1, DAG.getConstant(rotamt, MVT::i16));
1843   } else {
1844    // Convert the SHUFFLE_VECTOR mask's input element units to the
1845    // actual bytes.
1846     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1847
1848     SmallVector<SDValue, 16> ResultMask;
1849     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1850       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1851
1852       for (unsigned j = 0; j < BytesPerElement; ++j)
1853         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1854     }
1855
1856     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1857                                     &ResultMask[0], ResultMask.size());
1858     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1859   }
1860 }
1861
1862 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1863   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1864   DebugLoc dl = Op.getDebugLoc();
1865
1866   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867     // For a constant, build the appropriate constant vector, which will
1868     // eventually simplify to a vector register load.
1869
1870     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871     SmallVector<SDValue, 16> ConstVecValues;
1872     EVT VT;
1873     size_t n_copies;
1874
1875     // Create a constant vector:
1876     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1877     default: llvm_unreachable("Unexpected constant value type in "
1878                               "LowerSCALAR_TO_VECTOR");
1879     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885     }
1886
1887     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888     for (size_t j = 0; j < n_copies; ++j)
1889       ConstVecValues.push_back(CValue);
1890
1891     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1892                        &ConstVecValues[0], ConstVecValues.size());
1893   } else {
1894     // Otherwise, copy the value from one register to another:
1895     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1896     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1897     case MVT::i8:
1898     case MVT::i16:
1899     case MVT::i32:
1900     case MVT::i64:
1901     case MVT::f32:
1902     case MVT::f64:
1903       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1904     }
1905   }
1906
1907   return SDValue();
1908 }
1909
1910 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1911   EVT VT = Op.getValueType();
1912   SDValue N = Op.getOperand(0);
1913   SDValue Elt = Op.getOperand(1);
1914   DebugLoc dl = Op.getDebugLoc();
1915   SDValue retval;
1916
1917   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1918     // Constant argument:
1919     int EltNo = (int) C->getZExtValue();
1920
1921     // sanity checks:
1922     if (VT == MVT::i8 && EltNo >= 16)
1923       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1924     else if (VT == MVT::i16 && EltNo >= 8)
1925       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1926     else if (VT == MVT::i32 && EltNo >= 4)
1927       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1928     else if (VT == MVT::i64 && EltNo >= 2)
1929       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1930
1931     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1932       // i32 and i64: Element 0 is the preferred slot
1933       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1934     }
1935
1936     // Need to generate shuffle mask and extract:
1937     int prefslot_begin = -1, prefslot_end = -1;
1938     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1939
1940     switch (VT.getSimpleVT().SimpleTy) {
1941     default:
1942       assert(false && "Invalid value type!");
1943     case MVT::i8: {
1944       prefslot_begin = prefslot_end = 3;
1945       break;
1946     }
1947     case MVT::i16: {
1948       prefslot_begin = 2; prefslot_end = 3;
1949       break;
1950     }
1951     case MVT::i32:
1952     case MVT::f32: {
1953       prefslot_begin = 0; prefslot_end = 3;
1954       break;
1955     }
1956     case MVT::i64:
1957     case MVT::f64: {
1958       prefslot_begin = 0; prefslot_end = 7;
1959       break;
1960     }
1961     }
1962
1963     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1964            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1965
1966     unsigned int ShufBytes[16] = {
1967       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1968     };
1969     for (int i = 0; i < 16; ++i) {
1970       // zero fill uppper part of preferred slot, don't care about the
1971       // other slots:
1972       unsigned int mask_val;
1973       if (i <= prefslot_end) {
1974         mask_val =
1975           ((i < prefslot_begin)
1976            ? 0x80
1977            : elt_byte + (i - prefslot_begin));
1978
1979         ShufBytes[i] = mask_val;
1980       } else
1981         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1982     }
1983
1984     SDValue ShufMask[4];
1985     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1986       unsigned bidx = i * 4;
1987       unsigned int bits = ((ShufBytes[bidx] << 24) |
1988                            (ShufBytes[bidx+1] << 16) |
1989                            (ShufBytes[bidx+2] << 8) |
1990                            ShufBytes[bidx+3]);
1991       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1992     }
1993
1994     SDValue ShufMaskVec =
1995       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1996                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1997
1998     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1999                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2000                                      N, N, ShufMaskVec));
2001   } else {
2002     // Variable index: Rotate the requested element into slot 0, then replicate
2003     // slot 0 across the vector
2004     EVT VecVT = N.getValueType();
2005     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2006       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2007                         "vector type!");
2008     }
2009
2010     // Make life easier by making sure the index is zero-extended to i32
2011     if (Elt.getValueType() != MVT::i32)
2012       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2013
2014     // Scale the index to a bit/byte shift quantity
2015     APInt scaleFactor =
2016             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2017     unsigned scaleShift = scaleFactor.logBase2();
2018     SDValue vecShift;
2019
2020     if (scaleShift > 0) {
2021       // Scale the shift factor:
2022       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2023                         DAG.getConstant(scaleShift, MVT::i32));
2024     }
2025
2026     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2027
2028     // Replicate the bytes starting at byte 0 across the entire vector (for
2029     // consistency with the notion of a unified register set)
2030     SDValue replicate;
2031
2032     switch (VT.getSimpleVT().SimpleTy) {
2033     default:
2034       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2035                         "type");
2036       /*NOTREACHED*/
2037     case MVT::i8: {
2038       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2039       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2040                               factor, factor, factor, factor);
2041       break;
2042     }
2043     case MVT::i16: {
2044       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2045       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2046                               factor, factor, factor, factor);
2047       break;
2048     }
2049     case MVT::i32:
2050     case MVT::f32: {
2051       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2052       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053                               factor, factor, factor, factor);
2054       break;
2055     }
2056     case MVT::i64:
2057     case MVT::f64: {
2058       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2059       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2060       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2061                               loFactor, hiFactor, loFactor, hiFactor);
2062       break;
2063     }
2064     }
2065
2066     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2067                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2068                                      vecShift, vecShift, replicate));
2069   }
2070
2071   return retval;
2072 }
2073
2074 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2075   SDValue VecOp = Op.getOperand(0);
2076   SDValue ValOp = Op.getOperand(1);
2077   SDValue IdxOp = Op.getOperand(2);
2078   DebugLoc dl = Op.getDebugLoc();
2079   EVT VT = Op.getValueType();
2080
2081   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2082   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2083
2084   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2085   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2086   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2087                                 DAG.getRegister(SPU::R1, PtrVT),
2088                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2089   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2090
2091   SDValue result =
2092     DAG.getNode(SPUISD::SHUFB, dl, VT,
2093                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2094                 VecOp,
2095                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2096
2097   return result;
2098 }
2099
2100 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2101                            const TargetLowering &TLI)
2102 {
2103   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2104   DebugLoc dl = Op.getDebugLoc();
2105   EVT ShiftVT = TLI.getShiftAmountTy();
2106
2107   assert(Op.getValueType() == MVT::i8);
2108   switch (Opc) {
2109   default:
2110     llvm_unreachable("Unhandled i8 math operator");
2111     /*NOTREACHED*/
2112     break;
2113   case ISD::ADD: {
2114     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2115     // the result:
2116     SDValue N1 = Op.getOperand(1);
2117     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2118     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2119     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2120                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2121
2122   }
2123
2124   case ISD::SUB: {
2125     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2126     // the result:
2127     SDValue N1 = Op.getOperand(1);
2128     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2129     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2130     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2131                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132   }
2133   case ISD::ROTR:
2134   case ISD::ROTL: {
2135     SDValue N1 = Op.getOperand(1);
2136     EVT N1VT = N1.getValueType();
2137
2138     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2139     if (!N1VT.bitsEq(ShiftVT)) {
2140       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2141                        ? ISD::ZERO_EXTEND
2142                        : ISD::TRUNCATE;
2143       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2144     }
2145
2146     // Replicate lower 8-bits into upper 8:
2147     SDValue ExpandArg =
2148       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2149                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2150                               N0, DAG.getConstant(8, MVT::i32)));
2151
2152     // Truncate back down to i8
2153     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2154                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2155   }
2156   case ISD::SRL:
2157   case ISD::SHL: {
2158     SDValue N1 = Op.getOperand(1);
2159     EVT N1VT = N1.getValueType();
2160
2161     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2162     if (!N1VT.bitsEq(ShiftVT)) {
2163       unsigned N1Opc = ISD::ZERO_EXTEND;
2164
2165       if (N1.getValueType().bitsGT(ShiftVT))
2166         N1Opc = ISD::TRUNCATE;
2167
2168       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2169     }
2170
2171     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2173   }
2174   case ISD::SRA: {
2175     SDValue N1 = Op.getOperand(1);
2176     EVT N1VT = N1.getValueType();
2177
2178     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2179     if (!N1VT.bitsEq(ShiftVT)) {
2180       unsigned N1Opc = ISD::SIGN_EXTEND;
2181
2182       if (N1VT.bitsGT(ShiftVT))
2183         N1Opc = ISD::TRUNCATE;
2184       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2185     }
2186
2187     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2189   }
2190   case ISD::MUL: {
2191     SDValue N1 = Op.getOperand(1);
2192
2193     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2194     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2195     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2196                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2197     break;
2198   }
2199   }
2200
2201   return SDValue();
2202 }
2203
2204 //! Lower byte immediate operations for v16i8 vectors:
2205 static SDValue
2206 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2207   SDValue ConstVec;
2208   SDValue Arg;
2209   EVT VT = Op.getValueType();
2210   DebugLoc dl = Op.getDebugLoc();
2211
2212   ConstVec = Op.getOperand(0);
2213   Arg = Op.getOperand(1);
2214   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2215     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2216       ConstVec = ConstVec.getOperand(0);
2217     } else {
2218       ConstVec = Op.getOperand(1);
2219       Arg = Op.getOperand(0);
2220       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2221         ConstVec = ConstVec.getOperand(0);
2222       }
2223     }
2224   }
2225
2226   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2227     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2228     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2229
2230     APInt APSplatBits, APSplatUndef;
2231     unsigned SplatBitSize;
2232     bool HasAnyUndefs;
2233     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2234
2235     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2236                               HasAnyUndefs, minSplatBits)
2237         && minSplatBits <= SplatBitSize) {
2238       uint64_t SplatBits = APSplatBits.getZExtValue();
2239       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2240
2241       SmallVector<SDValue, 16> tcVec;
2242       tcVec.assign(16, tc);
2243       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2244                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2245     }
2246   }
2247
2248   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2249   // lowered.  Return the operation, rather than a null SDValue.
2250   return Op;
2251 }
2252
2253 //! Custom lowering for CTPOP (count population)
2254 /*!
2255   Custom lowering code that counts the number ones in the input
2256   operand. SPU has such an instruction, but it counts the number of
2257   ones per byte, which then have to be accumulated.
2258 */
2259 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2260   EVT VT = Op.getValueType();
2261   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2262                                VT, (128 / VT.getSizeInBits()));
2263   DebugLoc dl = Op.getDebugLoc();
2264
2265   switch (VT.getSimpleVT().SimpleTy) {
2266   default:
2267     assert(false && "Invalid value type!");
2268   case MVT::i8: {
2269     SDValue N = Op.getOperand(0);
2270     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2271
2272     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2273     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2274
2275     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2276   }
2277
2278   case MVT::i16: {
2279     MachineFunction &MF = DAG.getMachineFunction();
2280     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2281
2282     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2283
2284     SDValue N = Op.getOperand(0);
2285     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2286     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2287     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2288
2289     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2290     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2291
2292     // CNTB_result becomes the chain to which all of the virtual registers
2293     // CNTB_reg, SUM1_reg become associated:
2294     SDValue CNTB_result =
2295       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2296
2297     SDValue CNTB_rescopy =
2298       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2299
2300     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2301
2302     return DAG.getNode(ISD::AND, dl, MVT::i16,
2303                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2304                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2305                                                Tmp1, Shift1),
2306                                    Tmp1),
2307                        Mask0);
2308   }
2309
2310   case MVT::i32: {
2311     MachineFunction &MF = DAG.getMachineFunction();
2312     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2313
2314     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2315     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2316
2317     SDValue N = Op.getOperand(0);
2318     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2319     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2320     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2321     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2322
2323     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2324     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2325
2326     // CNTB_result becomes the chain to which all of the virtual registers
2327     // CNTB_reg, SUM1_reg become associated:
2328     SDValue CNTB_result =
2329       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2330
2331     SDValue CNTB_rescopy =
2332       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2333
2334     SDValue Comp1 =
2335       DAG.getNode(ISD::SRL, dl, MVT::i32,
2336                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2337                   Shift1);
2338
2339     SDValue Sum1 =
2340       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2341                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2342
2343     SDValue Sum1_rescopy =
2344       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2345
2346     SDValue Comp2 =
2347       DAG.getNode(ISD::SRL, dl, MVT::i32,
2348                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2349                   Shift2);
2350     SDValue Sum2 =
2351       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2352                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2353
2354     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2355   }
2356
2357   case MVT::i64:
2358     break;
2359   }
2360
2361   return SDValue();
2362 }
2363
2364 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2365 /*!
2366  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2367  All conversions to i64 are expanded to a libcall.
2368  */
2369 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2370                               SPUTargetLowering &TLI) {
2371   EVT OpVT = Op.getValueType();
2372   SDValue Op0 = Op.getOperand(0);
2373   EVT Op0VT = Op0.getValueType();
2374
2375   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2376       || OpVT == MVT::i64) {
2377     // Convert f32 / f64 to i32 / i64 via libcall.
2378     RTLIB::Libcall LC =
2379             (Op.getOpcode() == ISD::FP_TO_SINT)
2380              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2381              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2382     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2383     SDValue Dummy;
2384     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2385   }
2386
2387   return Op;
2388 }
2389
2390 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2391 /*!
2392  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2393  All conversions from i64 are expanded to a libcall.
2394  */
2395 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2396                               SPUTargetLowering &TLI) {
2397   EVT OpVT = Op.getValueType();
2398   SDValue Op0 = Op.getOperand(0);
2399   EVT Op0VT = Op0.getValueType();
2400
2401   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2402       || Op0VT == MVT::i64) {
2403     // Convert i32, i64 to f64 via libcall:
2404     RTLIB::Libcall LC =
2405             (Op.getOpcode() == ISD::SINT_TO_FP)
2406              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2407              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2408     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2409     SDValue Dummy;
2410     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2411   }
2412
2413   return Op;
2414 }
2415
2416 //! Lower ISD::SETCC
2417 /*!
2418  This handles MVT::f64 (double floating point) condition lowering
2419  */
2420 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2421                           const TargetLowering &TLI) {
2422   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2423   DebugLoc dl = Op.getDebugLoc();
2424   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2425
2426   SDValue lhs = Op.getOperand(0);
2427   SDValue rhs = Op.getOperand(1);
2428   EVT lhsVT = lhs.getValueType();
2429   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2430
2431   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2432   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2433   EVT IntVT(MVT::i64);
2434
2435   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2436   // selected to a NOP:
2437   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2438   SDValue lhsHi32 =
2439           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2440                       DAG.getNode(ISD::SRL, dl, IntVT,
2441                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2442   SDValue lhsHi32abs =
2443           DAG.getNode(ISD::AND, dl, MVT::i32,
2444                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2445   SDValue lhsLo32 =
2446           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2447
2448   // SETO and SETUO only use the lhs operand:
2449   if (CC->get() == ISD::SETO) {
2450     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2451     // SETUO
2452     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2453     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2454                        DAG.getSetCC(dl, ccResultVT,
2455                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2456                                     ISD::SETUO),
2457                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2458   } else if (CC->get() == ISD::SETUO) {
2459     // Evaluates to true if Op0 is [SQ]NaN
2460     return DAG.getNode(ISD::AND, dl, ccResultVT,
2461                        DAG.getSetCC(dl, ccResultVT,
2462                                     lhsHi32abs,
2463                                     DAG.getConstant(0x7ff00000, MVT::i32),
2464                                     ISD::SETGE),
2465                        DAG.getSetCC(dl, ccResultVT,
2466                                     lhsLo32,
2467                                     DAG.getConstant(0, MVT::i32),
2468                                     ISD::SETGT));
2469   }
2470
2471   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2472   SDValue rhsHi32 =
2473           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2474                       DAG.getNode(ISD::SRL, dl, IntVT,
2475                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2476
2477   // If a value is negative, subtract from the sign magnitude constant:
2478   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2479
2480   // Convert the sign-magnitude representation into 2's complement:
2481   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2482                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2483   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2484   SDValue lhsSelect =
2485           DAG.getNode(ISD::SELECT, dl, IntVT,
2486                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2487
2488   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2489                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2490   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2491   SDValue rhsSelect =
2492           DAG.getNode(ISD::SELECT, dl, IntVT,
2493                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2494
2495   unsigned compareOp;
2496
2497   switch (CC->get()) {
2498   case ISD::SETOEQ:
2499   case ISD::SETUEQ:
2500     compareOp = ISD::SETEQ; break;
2501   case ISD::SETOGT:
2502   case ISD::SETUGT:
2503     compareOp = ISD::SETGT; break;
2504   case ISD::SETOGE:
2505   case ISD::SETUGE:
2506     compareOp = ISD::SETGE; break;
2507   case ISD::SETOLT:
2508   case ISD::SETULT:
2509     compareOp = ISD::SETLT; break;
2510   case ISD::SETOLE:
2511   case ISD::SETULE:
2512     compareOp = ISD::SETLE; break;
2513   case ISD::SETUNE:
2514   case ISD::SETONE:
2515     compareOp = ISD::SETNE; break;
2516   default:
2517     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2518   }
2519
2520   SDValue result =
2521           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2522                        (ISD::CondCode) compareOp);
2523
2524   if ((CC->get() & 0x8) == 0) {
2525     // Ordered comparison:
2526     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2527                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2528                                   ISD::SETO);
2529     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2530                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2531                                   ISD::SETO);
2532     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2533
2534     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2535   }
2536
2537   return result;
2538 }
2539
2540 //! Lower ISD::SELECT_CC
2541 /*!
2542   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2543   SELB instruction.
2544
2545   \note Need to revisit this in the future: if the code path through the true
2546   and false value computations is longer than the latency of a branch (6
2547   cycles), then it would be more advantageous to branch and insert a new basic
2548   block and branch on the condition. However, this code does not make that
2549   assumption, given the simplisitc uses so far.
2550  */
2551
2552 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2553                               const TargetLowering &TLI) {
2554   EVT VT = Op.getValueType();
2555   SDValue lhs = Op.getOperand(0);
2556   SDValue rhs = Op.getOperand(1);
2557   SDValue trueval = Op.getOperand(2);
2558   SDValue falseval = Op.getOperand(3);
2559   SDValue condition = Op.getOperand(4);
2560   DebugLoc dl = Op.getDebugLoc();
2561
2562   // NOTE: SELB's arguments: $rA, $rB, $mask
2563   //
2564   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2565   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2566   // condition was true and 0s where the condition was false. Hence, the
2567   // arguments to SELB get reversed.
2568
2569   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2570   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2571   // with another "cannot select select_cc" assert:
2572
2573   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2574                                 TLI.getSetCCResultType(Op.getValueType()),
2575                                 lhs, rhs, condition);
2576   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2577 }
2578
2579 //! Custom lower ISD::TRUNCATE
2580 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2581 {
2582   // Type to truncate to
2583   EVT VT = Op.getValueType();
2584   MVT simpleVT = VT.getSimpleVT();
2585   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2586                                VT, (128 / VT.getSizeInBits()));
2587   DebugLoc dl = Op.getDebugLoc();
2588
2589   // Type to truncate from
2590   SDValue Op0 = Op.getOperand(0);
2591   EVT Op0VT = Op0.getValueType();
2592
2593   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2594     // Create shuffle mask, least significant doubleword of quadword
2595     unsigned maskHigh = 0x08090a0b;
2596     unsigned maskLow = 0x0c0d0e0f;
2597     // Use a shuffle to perform the truncation
2598     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2599                                    DAG.getConstant(maskHigh, MVT::i32),
2600                                    DAG.getConstant(maskLow, MVT::i32),
2601                                    DAG.getConstant(maskHigh, MVT::i32),
2602                                    DAG.getConstant(maskLow, MVT::i32));
2603
2604     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2605                                        Op0, Op0, shufMask);
2606
2607     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2608   }
2609
2610   return SDValue();             // Leave the truncate unmolested
2611 }
2612
2613 /*!
2614  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2615  * algorithm is to duplicate the sign bit using rotmai to generate at
2616  * least one byte full of sign bits. Then propagate the "sign-byte" into
2617  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2618  *
2619  * @param Op The sext operand
2620  * @param DAG The current DAG
2621  * @return The SDValue with the entire instruction sequence
2622  */
2623 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2624 {
2625   DebugLoc dl = Op.getDebugLoc();
2626
2627   // Type to extend to
2628   MVT OpVT = Op.getValueType().getSimpleVT();
2629
2630   // Type to extend from
2631   SDValue Op0 = Op.getOperand(0);
2632   MVT Op0VT = Op0.getValueType().getSimpleVT();
2633
2634   // The type to extend to needs to be a i128 and
2635   // the type to extend from needs to be i64 or i32.
2636   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2637           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2638
2639   // Create shuffle mask
2640   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2641   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2642   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2643   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2644                                  DAG.getConstant(mask1, MVT::i32),
2645                                  DAG.getConstant(mask1, MVT::i32),
2646                                  DAG.getConstant(mask2, MVT::i32),
2647                                  DAG.getConstant(mask3, MVT::i32));
2648
2649   // Word wise arithmetic right shift to generate at least one byte
2650   // that contains sign bits.
2651   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2652   SDValue sraVal = DAG.getNode(ISD::SRA,
2653                  dl,
2654                  mvt,
2655                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2656                  DAG.getConstant(31, MVT::i32));
2657
2658   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2659   // and the input value into the lower 64 bits.
2660   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2661       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2662
2663   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2664 }
2665
2666 //! Custom (target-specific) lowering entry point
2667 /*!
2668   This is where LLVM's DAG selection process calls to do target-specific
2669   lowering of nodes.
2670  */
2671 SDValue
2672 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2673 {
2674   unsigned Opc = (unsigned) Op.getOpcode();
2675   EVT VT = Op.getValueType();
2676
2677   switch (Opc) {
2678   default: {
2679 #ifndef NDEBUG
2680     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2681     errs() << "Op.getOpcode() = " << Opc << "\n";
2682     errs() << "*Op.getNode():\n";
2683     Op.getNode()->dump();
2684 #endif
2685     llvm_unreachable(0);
2686   }
2687   case ISD::LOAD:
2688   case ISD::EXTLOAD:
2689   case ISD::SEXTLOAD:
2690   case ISD::ZEXTLOAD:
2691     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2692   case ISD::STORE:
2693     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2694   case ISD::ConstantPool:
2695     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2696   case ISD::GlobalAddress:
2697     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2698   case ISD::JumpTable:
2699     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2700   case ISD::ConstantFP:
2701     return LowerConstantFP(Op, DAG);
2702
2703   // i8, i64 math ops:
2704   case ISD::ADD:
2705   case ISD::SUB:
2706   case ISD::ROTR:
2707   case ISD::ROTL:
2708   case ISD::SRL:
2709   case ISD::SHL:
2710   case ISD::SRA: {
2711     if (VT == MVT::i8)
2712       return LowerI8Math(Op, DAG, Opc, *this);
2713     break;
2714   }
2715
2716   case ISD::FP_TO_SINT:
2717   case ISD::FP_TO_UINT:
2718     return LowerFP_TO_INT(Op, DAG, *this);
2719
2720   case ISD::SINT_TO_FP:
2721   case ISD::UINT_TO_FP:
2722     return LowerINT_TO_FP(Op, DAG, *this);
2723
2724   // Vector-related lowering.
2725   case ISD::BUILD_VECTOR:
2726     return LowerBUILD_VECTOR(Op, DAG);
2727   case ISD::SCALAR_TO_VECTOR:
2728     return LowerSCALAR_TO_VECTOR(Op, DAG);
2729   case ISD::VECTOR_SHUFFLE:
2730     return LowerVECTOR_SHUFFLE(Op, DAG);
2731   case ISD::EXTRACT_VECTOR_ELT:
2732     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2733   case ISD::INSERT_VECTOR_ELT:
2734     return LowerINSERT_VECTOR_ELT(Op, DAG);
2735
2736   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2737   case ISD::AND:
2738   case ISD::OR:
2739   case ISD::XOR:
2740     return LowerByteImmed(Op, DAG);
2741
2742   // Vector and i8 multiply:
2743   case ISD::MUL:
2744     if (VT == MVT::i8)
2745       return LowerI8Math(Op, DAG, Opc, *this);
2746
2747   case ISD::CTPOP:
2748     return LowerCTPOP(Op, DAG);
2749
2750   case ISD::SELECT_CC:
2751     return LowerSELECT_CC(Op, DAG, *this);
2752
2753   case ISD::SETCC:
2754     return LowerSETCC(Op, DAG, *this);
2755
2756   case ISD::TRUNCATE:
2757     return LowerTRUNCATE(Op, DAG);
2758
2759   case ISD::SIGN_EXTEND:
2760     return LowerSIGN_EXTEND(Op, DAG);
2761   }
2762
2763   return SDValue();
2764 }
2765
2766 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2767                                            SmallVectorImpl<SDValue>&Results,
2768                                            SelectionDAG &DAG)
2769 {
2770 #if 0
2771   unsigned Opc = (unsigned) N->getOpcode();
2772   EVT OpVT = N->getValueType(0);
2773
2774   switch (Opc) {
2775   default: {
2776     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2777     errs() << "Op.getOpcode() = " << Opc << "\n";
2778     errs() << "*Op.getNode():\n";
2779     N->dump();
2780     abort();
2781     /*NOTREACHED*/
2782   }
2783   }
2784 #endif
2785
2786   /* Otherwise, return unchanged */
2787 }
2788
2789 //===----------------------------------------------------------------------===//
2790 // Target Optimization Hooks
2791 //===----------------------------------------------------------------------===//
2792
2793 SDValue
2794 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2795 {
2796 #if 0
2797   TargetMachine &TM = getTargetMachine();
2798 #endif
2799   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2800   SelectionDAG &DAG = DCI.DAG;
2801   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2802   EVT NodeVT = N->getValueType(0);      // The node's value type
2803   EVT Op0VT = Op0.getValueType();       // The first operand's result
2804   SDValue Result;                       // Initially, empty result
2805   DebugLoc dl = N->getDebugLoc();
2806
2807   switch (N->getOpcode()) {
2808   default: break;
2809   case ISD::ADD: {
2810     SDValue Op1 = N->getOperand(1);
2811
2812     if (Op0.getOpcode() == SPUISD::IndirectAddr
2813         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2814       // Normalize the operands to reduce repeated code
2815       SDValue IndirectArg = Op0, AddArg = Op1;
2816
2817       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2818         IndirectArg = Op1;
2819         AddArg = Op0;
2820       }
2821
2822       if (isa<ConstantSDNode>(AddArg)) {
2823         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2824         SDValue IndOp1 = IndirectArg.getOperand(1);
2825
2826         if (CN0->isNullValue()) {
2827           // (add (SPUindirect <arg>, <arg>), 0) ->
2828           // (SPUindirect <arg>, <arg>)
2829
2830 #if !defined(NDEBUG)
2831           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832             errs() << "\n"
2833                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2834                  << "With:    (SPUindirect <arg>, <arg>)\n";
2835           }
2836 #endif
2837
2838           return IndirectArg;
2839         } else if (isa<ConstantSDNode>(IndOp1)) {
2840           // (add (SPUindirect <arg>, <const>), <const>) ->
2841           // (SPUindirect <arg>, <const + const>)
2842           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2843           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2844           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2845
2846 #if !defined(NDEBUG)
2847           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2848             errs() << "\n"
2849                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2850                  << "), " << CN0->getSExtValue() << ")\n"
2851                  << "With:    (SPUindirect <arg>, "
2852                  << combinedConst << ")\n";
2853           }
2854 #endif
2855
2856           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2857                              IndirectArg, combinedValue);
2858         }
2859       }
2860     }
2861     break;
2862   }
2863   case ISD::SIGN_EXTEND:
2864   case ISD::ZERO_EXTEND:
2865   case ISD::ANY_EXTEND: {
2866     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2867       // (any_extend (SPUextract_elt0 <arg>)) ->
2868       // (SPUextract_elt0 <arg>)
2869       // Types must match, however...
2870 #if !defined(NDEBUG)
2871       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2872         errs() << "\nReplace: ";
2873         N->dump(&DAG);
2874         errs() << "\nWith:    ";
2875         Op0.getNode()->dump(&DAG);
2876         errs() << "\n";
2877       }
2878 #endif
2879
2880       return Op0;
2881     }
2882     break;
2883   }
2884   case SPUISD::IndirectAddr: {
2885     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2886       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2887       if (CN != 0 && CN->getZExtValue() == 0) {
2888         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2889         // (SPUaform <addr>, 0)
2890
2891         DEBUG(errs() << "Replace: ");
2892         DEBUG(N->dump(&DAG));
2893         DEBUG(errs() << "\nWith:    ");
2894         DEBUG(Op0.getNode()->dump(&DAG));
2895         DEBUG(errs() << "\n");
2896
2897         return Op0;
2898       }
2899     } else if (Op0.getOpcode() == ISD::ADD) {
2900       SDValue Op1 = N->getOperand(1);
2901       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2902         // (SPUindirect (add <arg>, <arg>), 0) ->
2903         // (SPUindirect <arg>, <arg>)
2904         if (CN1->isNullValue()) {
2905
2906 #if !defined(NDEBUG)
2907           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2908             errs() << "\n"
2909                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2910                  << "With:    (SPUindirect <arg>, <arg>)\n";
2911           }
2912 #endif
2913
2914           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2915                              Op0.getOperand(0), Op0.getOperand(1));
2916         }
2917       }
2918     }
2919     break;
2920   }
2921   case SPUISD::SHLQUAD_L_BITS:
2922   case SPUISD::SHLQUAD_L_BYTES:
2923   case SPUISD::ROTBYTES_LEFT: {
2924     SDValue Op1 = N->getOperand(1);
2925
2926     // Kill degenerate vector shifts:
2927     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2928       if (CN->isNullValue()) {
2929         Result = Op0;
2930       }
2931     }
2932     break;
2933   }
2934   case SPUISD::PREFSLOT2VEC: {
2935     switch (Op0.getOpcode()) {
2936     default:
2937       break;
2938     case ISD::ANY_EXTEND:
2939     case ISD::ZERO_EXTEND:
2940     case ISD::SIGN_EXTEND: {
2941       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2942       // <arg>
2943       // but only if the SPUprefslot2vec and <arg> types match.
2944       SDValue Op00 = Op0.getOperand(0);
2945       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2946         SDValue Op000 = Op00.getOperand(0);
2947         if (Op000.getValueType() == NodeVT) {
2948           Result = Op000;
2949         }
2950       }
2951       break;
2952     }
2953     case SPUISD::VEC2PREFSLOT: {
2954       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2955       // <arg>
2956       Result = Op0.getOperand(0);
2957       break;
2958     }
2959     }
2960     break;
2961   }
2962   }
2963
2964   // Otherwise, return unchanged.
2965 #ifndef NDEBUG
2966   if (Result.getNode()) {
2967     DEBUG(errs() << "\nReplace.SPU: ");
2968     DEBUG(N->dump(&DAG));
2969     DEBUG(errs() << "\nWith:        ");
2970     DEBUG(Result.getNode()->dump(&DAG));
2971     DEBUG(errs() << "\n");
2972   }
2973 #endif
2974
2975   return Result;
2976 }
2977
2978 //===----------------------------------------------------------------------===//
2979 // Inline Assembly Support
2980 //===----------------------------------------------------------------------===//
2981
2982 /// getConstraintType - Given a constraint letter, return the type of
2983 /// constraint it is for this target.
2984 SPUTargetLowering::ConstraintType
2985 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2986   if (ConstraintLetter.size() == 1) {
2987     switch (ConstraintLetter[0]) {
2988     default: break;
2989     case 'b':
2990     case 'r':
2991     case 'f':
2992     case 'v':
2993     case 'y':
2994       return C_RegisterClass;
2995     }
2996   }
2997   return TargetLowering::getConstraintType(ConstraintLetter);
2998 }
2999
3000 std::pair<unsigned, const TargetRegisterClass*>
3001 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3002                                                 EVT VT) const
3003 {
3004   if (Constraint.size() == 1) {
3005     // GCC RS6000 Constraint Letters
3006     switch (Constraint[0]) {
3007     case 'b':   // R1-R31
3008     case 'r':   // R0-R31
3009       if (VT == MVT::i64)
3010         return std::make_pair(0U, SPU::R64CRegisterClass);
3011       return std::make_pair(0U, SPU::R32CRegisterClass);
3012     case 'f':
3013       if (VT == MVT::f32)
3014         return std::make_pair(0U, SPU::R32FPRegisterClass);
3015       else if (VT == MVT::f64)
3016         return std::make_pair(0U, SPU::R64FPRegisterClass);
3017       break;
3018     case 'v':
3019       return std::make_pair(0U, SPU::GPRCRegisterClass);
3020     }
3021   }
3022
3023   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3024 }
3025
3026 //! Compute used/known bits for a SPU operand
3027 void
3028 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3029                                                   const APInt &Mask,
3030                                                   APInt &KnownZero,
3031                                                   APInt &KnownOne,
3032                                                   const SelectionDAG &DAG,
3033                                                   unsigned Depth ) const {
3034 #if 0
3035   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3036
3037   switch (Op.getOpcode()) {
3038   default:
3039     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3040     break;
3041   case CALL:
3042   case SHUFB:
3043   case SHUFFLE_MASK:
3044   case CNTB:
3045   case SPUISD::PREFSLOT2VEC:
3046   case SPUISD::LDRESULT:
3047   case SPUISD::VEC2PREFSLOT:
3048   case SPUISD::SHLQUAD_L_BITS:
3049   case SPUISD::SHLQUAD_L_BYTES:
3050   case SPUISD::VEC_ROTL:
3051   case SPUISD::VEC_ROTR:
3052   case SPUISD::ROTBYTES_LEFT:
3053   case SPUISD::SELECT_MASK:
3054   case SPUISD::SELB:
3055   }
3056 #endif
3057 }
3058
3059 unsigned
3060 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3061                                                    unsigned Depth) const {
3062   switch (Op.getOpcode()) {
3063   default:
3064     return 1;
3065
3066   case ISD::SETCC: {
3067     EVT VT = Op.getValueType();
3068
3069     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3070       VT = MVT::i32;
3071     }
3072     return VT.getSizeInBits();
3073   }
3074   }
3075 }
3076
3077 // LowerAsmOperandForConstraint
3078 void
3079 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3080                                                 char ConstraintLetter,
3081                                                 bool hasMemory,
3082                                                 std::vector<SDValue> &Ops,
3083                                                 SelectionDAG &DAG) const {
3084   // Default, for the time being, to the base class handler
3085   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3086                                                Ops, DAG);
3087 }
3088
3089 /// isLegalAddressImmediate - Return true if the integer value can be used
3090 /// as the offset of the target addressing mode.
3091 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3092                                                 const Type *Ty) const {
3093   // SPU's addresses are 256K:
3094   return (V > -(1 << 18) && V < (1 << 18) - 1);
3095 }
3096
3097 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3098   return false;
3099 }
3100
3101 bool
3102 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3103   // The SPU target isn't yet aware of offsets.
3104   return false;
3105 }