lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/Target/TargetLoweringObjectFile.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       std::string msg;
  75       raw_string_ostream Msg(msg);
  76       Msg << "getValueTypeMapEntry returns NULL for "
  77            << VT.getEVTString();
  78       llvm_report_error(Msg.str());
  79     }
  80 #endif
  81
  82     return retval;
  83   }
  84
  85   //! Expand a library call into an actual call DAG node
  86   /*!
  87    \note
  88    This code is taken from SelectionDAGLegalize, since it is not exposed as
  89    part of the LLVM SelectionDAG API.
  90    */
  91
  92   SDValue
  93   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  94                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  95     // The input chain to this libcall is the entry node of the function.
  96     // Legalizing the call will automatically add the previous call to the
  97     // dependence.
  98     SDValue InChain = DAG.getEntryNode();
  99
 100     TargetLowering::ArgListTy Args;
 101     TargetLowering::ArgListEntry Entry;
 102     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 103       EVT ArgVT = Op.getOperand(i).getValueType();
 104       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 105       Entry.Node = Op.getOperand(i);
 106       Entry.Ty = ArgTy;
 107       Entry.isSExt = isSigned;
 108       Entry.isZExt = !isSigned;
 109       Args.push_back(Entry);
 110     }
 111     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 112                                            TLI.getPointerTy());
 113
 114     // Splice the libcall in wherever FindInputOutputChains tells us to.
 115     const Type *RetTy =
 116                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 117     std::pair<SDValue, SDValue> CallInfo =
 118             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 119                             0, TLI.getLibcallCallingConv(LC), false,
 120                             /*isReturnValueUsed=*/true,
 121                             Callee, Args, DAG, Op.getDebugLoc(),
 122                             DAG.GetOrdering(InChain.getNode()));
 123
 124     return CallInfo.first;
 125   }
 126 }
 127
 128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 129   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 130     SPUTM(TM) {
 131   // Fold away setcc operations if possible.
 132   setPow2DivIsCheap();
 133
 134   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 135   setUseUnderscoreSetJmp(true);
 136   setUseUnderscoreLongJmp(true);
 137
 138   // Set RTLIB libcall names as used by SPU:
 139   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 140
 141   // Set up the SPU's register classes:
 142   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 143   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 144   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 145   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 146   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 147   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 148   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 149
 150   // SPU has no sign or zero extended loads for i1, i8, i16:
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 152   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 153   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 154
 155   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 156   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 157
 158   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 160   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 161   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 162
 163   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 164
 165   // SPU constant load actions are custom lowered:
 166   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 167   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 168
 169   // SPU's loads and stores have to be custom lowered:
 170   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 171        ++sctype) {
 172     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 173
 174     setOperationAction(ISD::LOAD,   VT, Custom);
 175     setOperationAction(ISD::STORE,  VT, Custom);
 176     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 177     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 178     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 179
 180     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 181       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 182       setTruncStoreAction(VT, StoreVT, Expand);
 183     }
 184   }
 185
 186   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 187        ++sctype) {
 188     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 189
 190     setOperationAction(ISD::LOAD,   VT, Custom);
 191     setOperationAction(ISD::STORE,  VT, Custom);
 192
 193     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 194       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 195       setTruncStoreAction(VT, StoreVT, Expand);
 196     }
 197   }
 198
 199   // Expand the jumptable branches
 200   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 201   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 202
 203   // Custom lower SELECT_CC for most cases, but expand by default
 204   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 207   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 208   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 209
 210   // SPU has no intrinsics for these particular operations:
 211   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 212
 213   // SPU has no division/remainder instructions
 214   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 218   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 219   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 220   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 224   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 225   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 226   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 230   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 231   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 232   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 236   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 237   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 238   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 239   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 241   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 242   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 243   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 244
 245   // We don't support sin/cos/sqrt/fmod
 246   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 248   setOperationAction(ISD::FREM , MVT::f64, Expand);
 249   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 250   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 251   setOperationAction(ISD::FREM , MVT::f32, Expand);
 252
 253   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 254   // for f32!)
 255   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 256   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 257
 258   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 259   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 260
 261   // SPU can do rotate right and left, so legalize it... but customize for i8
 262   // because instructions don't exist.
 263
 264   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 265   //        .td files.
 266   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 267   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 268   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 269
 270   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 271   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 272   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 273
 274   // SPU has no native version of shift left/right for i8
 275   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 276   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 277   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 278
 279   // Make these operations legal and handle them during instruction selection:
 280   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 281   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 282   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 283
 284   // Custom lower i8, i32 and i64 multiplications
 285   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 286   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 287   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 288
 289   // Expand double-width multiplication
 290   // FIXME: It would probably be reasonable to support some of these operations
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 303   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 304   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 305   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 306   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 307
 308   // Need to custom handle (some) common i8, i64 math ops
 309   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 310   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 311   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 312   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 313
 314   // SPU does not have BSWAP. It does have i32 support CTLZ.
 315   // CTPOP has to be custom lowered.
 316   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 317   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 318
 319   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 322   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 323   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 324
 325   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 328   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 329   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 330
 331   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 332   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 333   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 334   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 335   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 336
 337   // SPU has a version of select that implements (a&~c)|(b&c), just like
 338   // select ought to work:
 339   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 340   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 341   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 342   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 343
 344   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 345   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 347   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 348   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 349
 350   // Custom lower i128 -> i64 truncates
 351   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 352
 353   // Custom lower i32/i64 -> i128 sign extend
 354   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 355
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 360   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 361   // to expand to a libcall, hence the custom lowering:
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 366   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 367   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 368
 369   // FDIV on SPU requires custom lowering
 370   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 371
 372   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 379   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 380   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 381
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 384   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 385   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 386
 387   // We cannot sextinreg(i1).  Expand to shifts.
 388   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 389
 390   // We want to legalize GlobalAddress and ConstantPool nodes into the
 391   // appropriate instructions to materialize the address.
 392   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 393        ++sctype) {
 394     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 395
 396     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 397     setOperationAction(ISD::ConstantPool,   VT, Custom);
 398     setOperationAction(ISD::JumpTable,      VT, Custom);
 399   }
 400
 401   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 402   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 403
 404   // Use the default implementation.
 405   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 406   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 407   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 408   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 409   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 410   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 411   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 412
 413   // Cell SPU has instructions for converting between i64 and fp.
 414   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 415   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 416
 417   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 418   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 419
 420   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 421   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 422
 423   // First set operation action for all vector types to expand. Then we
 424   // will selectively turn on ones that can be effectively codegen'd.
 425   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 428   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 429   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 430   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 431
 432   // "Odd size" vector classes that we're willing to support:
 433   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 434
 435   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 436        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 437     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 438
 439     // add/sub are legal for all supported vector VT's.
 440     setOperationAction(ISD::ADD,     VT, Legal);
 441     setOperationAction(ISD::SUB,     VT, Legal);
 442     // mul has to be custom lowered.
 443     setOperationAction(ISD::MUL,     VT, Legal);
 444
 445     setOperationAction(ISD::AND,     VT, Legal);
 446     setOperationAction(ISD::OR,      VT, Legal);
 447     setOperationAction(ISD::XOR,     VT, Legal);
 448     setOperationAction(ISD::LOAD,    VT, Legal);
 449     setOperationAction(ISD::SELECT,  VT, Legal);
 450     setOperationAction(ISD::STORE,   VT, Legal);
 451
 452     // These operations need to be expanded:
 453     setOperationAction(ISD::SDIV,    VT, Expand);
 454     setOperationAction(ISD::SREM,    VT, Expand);
 455     setOperationAction(ISD::UDIV,    VT, Expand);
 456     setOperationAction(ISD::UREM,    VT, Expand);
 457
 458     // Custom lower build_vector, constant pool spills, insert and
 459     // extract vector elements:
 460     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 461     setOperationAction(ISD::ConstantPool, VT, Custom);
 462     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 463     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 464     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 465     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 466   }
 467
 468   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 469   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 470   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 471   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 472
 473   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 474
 475   setShiftAmountType(MVT::i32);
 476   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 477
 478   setStackPointerRegisterToSaveRestore(SPU::R1);
 479
 480   // We have target-specific dag combine patterns for the following nodes:
 481   setTargetDAGCombine(ISD::ADD);
 482   setTargetDAGCombine(ISD::ZERO_EXTEND);
 483   setTargetDAGCombine(ISD::SIGN_EXTEND);
 484   setTargetDAGCombine(ISD::ANY_EXTEND);
 485
 486   computeRegisterProperties();
 487
 488   // Set pre-RA register scheduler default to BURR, which produces slightly
 489   // better code than the default (could also be TDRR, but TargetLowering.h
 490   // needs a mod to support that model):
 491   setSchedulingPreference(SchedulingForRegPressure);
 492 }
 493
 494 const char *
 495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 496 {
 497   if (node_names.empty()) {
 498     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 499     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 500     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 501     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 502     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 503     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 504     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 505     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 506     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 507     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 508     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 509     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 510     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 511     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 512     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 513     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 514     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 515     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 516     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 517             "SPUISD::ROTBYTES_LEFT_BITS";
 518     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 519     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 520     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 521     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 522     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 523   }
 524
 525   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 526
 527   return ((i != node_names.end()) ? i->second : 0);
 528 }
 529
 530 /// getFunctionAlignment - Return the Log2 alignment of this function.
 531 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 532   return 3;
 533 }
 534
 535 //===----------------------------------------------------------------------===//
 536 // Return the Cell SPU's SETCC result type
 537 //===----------------------------------------------------------------------===//
 538
 539 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 540   // i16 and i32 are valid SETCC result types
 541   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 542     VT.getSimpleVT().SimpleTy :
 543     MVT::i32);
 544 }
 545
 546 //===----------------------------------------------------------------------===//
 547 // Calling convention code:
 548 //===----------------------------------------------------------------------===//
 549
 550 #include "SPUGenCallingConv.inc"
 551
 552 //===----------------------------------------------------------------------===//
 553 //  LowerOperation implementation
 554 //===----------------------------------------------------------------------===//
 555
 556 /// Custom lower loads for CellSPU
 557 /*!
 558  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 559  within a 16-byte block, we have to rotate to extract the requested element.
 560
 561  For extending loads, we also want to ensure that the following sequence is
 562  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 563
 564 \verbatim
 565 %1  v16i8,ch = load
 566 %2  v16i8,ch = rotate %1
 567 %3  v4f8, ch = bitconvert %2
 568 %4  f32      = vec2perfslot %3
 569 %5  f64      = fp_extend %4
 570 \endverbatim
 571 */
 572 static SDValue
 573 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 574   LoadSDNode *LN = cast<LoadSDNode>(Op);
 575   SDValue the_chain = LN->getChain();
 576   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 577   EVT InVT = LN->getMemoryVT();
 578   EVT OutVT = Op.getValueType();
 579   ISD::LoadExtType ExtType = LN->getExtensionType();
 580   unsigned alignment = LN->getAlignment();
 581   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 582   DebugLoc dl = Op.getDebugLoc();
 583
 584   switch (LN->getAddressingMode()) {
 585   case ISD::UNINDEXED: {
 586     SDValue result;
 587     SDValue basePtr = LN->getBasePtr();
 588     SDValue rotate;
 589
 590     if (alignment == 16) {
 591       ConstantSDNode *CN;
 592
 593       // Special cases for a known aligned load to simplify the base pointer
 594       // and the rotation amount:
 595       if (basePtr.getOpcode() == ISD::ADD
 596           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 597         // Known offset into basePtr
 598         int64_t offset = CN->getSExtValue();
 599         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 600
 601         if (rotamt < 0)
 602           rotamt += 16;
 603
 604         rotate = DAG.getConstant(rotamt, MVT::i16);
 605
 606         // Simplify the base pointer for this case:
 607         basePtr = basePtr.getOperand(0);
 608         if ((offset & ~0xf) > 0) {
 609           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 610                                 basePtr,
 611                                 DAG.getConstant((offset & ~0xf), PtrVT));
 612         }
 613       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 614                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 615                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 616                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 617         // Plain aligned a-form address: rotate into preferred slot
 618         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 619         int64_t rotamt = -vtm->prefslot_byte;
 620         if (rotamt < 0)
 621           rotamt += 16;
 622         rotate = DAG.getConstant(rotamt, MVT::i16);
 623       } else {
 624         // Offset the rotate amount by the basePtr and the preferred slot
 625         // byte offset
 626         int64_t rotamt = -vtm->prefslot_byte;
 627         if (rotamt < 0)
 628           rotamt += 16;
 629         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 630                              basePtr,
 631                              DAG.getConstant(rotamt, PtrVT));
 632       }
 633     } else {
 634       // Unaligned load: must be more pessimistic about addressing modes:
 635       if (basePtr.getOpcode() == ISD::ADD) {
 636         MachineFunction &MF = DAG.getMachineFunction();
 637         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 638         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 639         SDValue Flag;
 640
 641         SDValue Op0 = basePtr.getOperand(0);
 642         SDValue Op1 = basePtr.getOperand(1);
 643
 644         if (isa<ConstantSDNode>(Op1)) {
 645           // Convert the (add <ptr>, <const>) to an indirect address contained
 646           // in a register. Note that this is done because we need to avoid
 647           // creating a 0(reg) d-form address due to the SPU's block loads.
 648           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 649           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 650           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 651         } else {
 652           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 653           // will likely be lowered as a reg(reg) x-form address.
 654           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 655         }
 656       } else {
 657         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 658                               basePtr,
 659                               DAG.getConstant(0, PtrVT));
 660       }
 661
 662       // Offset the rotate amount by the basePtr and the preferred slot
 663       // byte offset
 664       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 665                            basePtr,
 666                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 667     }
 668
 669     // Re-emit as a v16i8 vector load
 670     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 671                          LN->getSrcValue(), LN->getSrcValueOffset(),
 672                          LN->isVolatile(), 16);
 673
 674     // Update the chain
 675     the_chain = result.getValue(1);
 676
 677     // Rotate into the preferred slot:
 678     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 679                          result.getValue(0), rotate);
 680
 681     // Convert the loaded v16i8 vector to the appropriate vector type
 682     // specified by the operand:
 683     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 684                                  InVT, (128 / InVT.getSizeInBits()));
 685     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 686                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 687
 688     // Handle extending loads by extending the scalar result:
 689     if (ExtType == ISD::SEXTLOAD) {
 690       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 691     } else if (ExtType == ISD::ZEXTLOAD) {
 692       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 693     } else if (ExtType == ISD::EXTLOAD) {
 694       unsigned NewOpc = ISD::ANY_EXTEND;
 695
 696       if (OutVT.isFloatingPoint())
 697         NewOpc = ISD::FP_EXTEND;
 698
 699       result = DAG.getNode(NewOpc, dl, OutVT, result);
 700     }
 701
 702     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 703     SDValue retops[2] = {
 704       result,
 705       the_chain
 706     };
 707
 708     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 709                          retops, sizeof(retops) / sizeof(retops[0]));
 710     return result;
 711   }
 712   case ISD::PRE_INC:
 713   case ISD::PRE_DEC:
 714   case ISD::POST_INC:
 715   case ISD::POST_DEC:
 716   case ISD::LAST_INDEXED_MODE:
 717     {
 718       std::string msg;
 719       raw_string_ostream Msg(msg);
 720       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 721             "UNINDEXED\n";
 722       Msg << (unsigned) LN->getAddressingMode();
 723       llvm_report_error(Msg.str());
 724       /*NOTREACHED*/
 725     }
 726   }
 727
 728   return SDValue();
 729 }
 730
 731 /// Custom lower stores for CellSPU
 732 /*!
 733  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 734  within a 16-byte block, we have to generate a shuffle to insert the
 735  requested element into its place, then store the resulting block.
 736  */
 737 static SDValue
 738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 739   StoreSDNode *SN = cast<StoreSDNode>(Op);
 740   SDValue Value = SN->getValue();
 741   EVT VT = Value.getValueType();
 742   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 743   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 744   DebugLoc dl = Op.getDebugLoc();
 745   unsigned alignment = SN->getAlignment();
 746
 747   switch (SN->getAddressingMode()) {
 748   case ISD::UNINDEXED: {
 749     // The vector type we really want to load from the 16-byte chunk.
 750     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 751                                  VT, (128 / VT.getSizeInBits()));
 752
 753     SDValue alignLoadVec;
 754     SDValue basePtr = SN->getBasePtr();
 755     SDValue the_chain = SN->getChain();
 756     SDValue insertEltOffs;
 757
 758     if (alignment == 16) {
 759       ConstantSDNode *CN;
 760
 761       // Special cases for a known aligned load to simplify the base pointer
 762       // and insertion byte:
 763       if (basePtr.getOpcode() == ISD::ADD
 764           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 765         // Known offset into basePtr
 766         int64_t offset = CN->getSExtValue();
 767
 768         // Simplify the base pointer for this case:
 769         basePtr = basePtr.getOperand(0);
 770         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 771                                     basePtr,
 772                                     DAG.getConstant((offset & 0xf), PtrVT));
 773
 774         if ((offset & ~0xf) > 0) {
 775           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 776                                 basePtr,
 777                                 DAG.getConstant((offset & ~0xf), PtrVT));
 778         }
 779       } else {
 780         // Otherwise, assume it's at byte 0 of basePtr
 781         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 782                                     basePtr,
 783                                     DAG.getConstant(0, PtrVT));
 784       }
 785     } else {
 786       // Unaligned load: must be more pessimistic about addressing modes:
 787       if (basePtr.getOpcode() == ISD::ADD) {
 788         MachineFunction &MF = DAG.getMachineFunction();
 789         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 790         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 791         SDValue Flag;
 792
 793         SDValue Op0 = basePtr.getOperand(0);
 794         SDValue Op1 = basePtr.getOperand(1);
 795
 796         if (isa<ConstantSDNode>(Op1)) {
 797           // Convert the (add <ptr>, <const>) to an indirect address contained
 798           // in a register. Note that this is done because we need to avoid
 799           // creating a 0(reg) d-form address due to the SPU's block loads.
 800           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 801           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 802           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 803         } else {
 804           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 805           // will likely be lowered as a reg(reg) x-form address.
 806           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 807         }
 808       } else {
 809         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 810                               basePtr,
 811                               DAG.getConstant(0, PtrVT));
 812       }
 813
 814       // Insertion point is solely determined by basePtr's contents
 815       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 816                                   basePtr,
 817                                   DAG.getConstant(0, PtrVT));
 818     }
 819
 820     // Re-emit as a v16i8 vector load
 821     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 822                                SN->getSrcValue(), SN->getSrcValueOffset(),
 823                                SN->isVolatile(), 16);
 824
 825     // Update the chain
 826     the_chain = alignLoadVec.getValue(1);
 827
 828     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 829     SDValue theValue = SN->getValue();
 830     SDValue result;
 831
 832     if (StVT != VT
 833         && (theValue.getOpcode() == ISD::AssertZext
 834             || theValue.getOpcode() == ISD::AssertSext)) {
 835       // Drill down and get the value for zero- and sign-extended
 836       // quantities
 837       theValue = theValue.getOperand(0);
 838     }
 839
 840     // If the base pointer is already a D-form address, then just create
 841     // a new D-form address with a slot offset and the orignal base pointer.
 842     // Otherwise generate a D-form address with the slot offset relative
 843     // to the stack pointer, which is always aligned.
 844 #if !defined(NDEBUG)
 845       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 846         errs() << "CellSPU LowerSTORE: basePtr = ";
 847         basePtr.getNode()->dump(&DAG);
 848         errs() << "\n";
 849       }
 850 #endif
 851
 852     SDValue insertEltOp =
 853             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 854     SDValue vectorizeOp =
 855             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 856
 857     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 858                          vectorizeOp, alignLoadVec,
 859                          DAG.getNode(ISD::BIT_CONVERT, dl,
 860                                      MVT::v4i32, insertEltOp));
 861
 862     result = DAG.getStore(the_chain, dl, result, basePtr,
 863                           LN->getSrcValue(), LN->getSrcValueOffset(),
 864                           LN->isVolatile(), LN->getAlignment());
 865
 866 #if 0 && !defined(NDEBUG)
 867     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 868       const SDValue &currentRoot = DAG.getRoot();
 869
 870       DAG.setRoot(result);
 871       errs() << "------- CellSPU:LowerStore result:\n";
 872       DAG.dump();
 873       errs() << "-------\n";
 874       DAG.setRoot(currentRoot);
 875     }
 876 #endif
 877
 878     return result;
 879     /*UNREACHED*/
 880   }
 881   case ISD::PRE_INC:
 882   case ISD::PRE_DEC:
 883   case ISD::POST_INC:
 884   case ISD::POST_DEC:
 885   case ISD::LAST_INDEXED_MODE:
 886     {
 887       std::string msg;
 888       raw_string_ostream Msg(msg);
 889       Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
 890             "UNINDEXED\n";
 891       Msg << (unsigned) SN->getAddressingMode();
 892       llvm_report_error(Msg.str());
 893       /*NOTREACHED*/
 894     }
 895   }
 896
 897   return SDValue();
 898 }
 899
 900 //! Generate the address of a constant pool entry.
 901 static SDValue
 902 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 903   EVT PtrVT = Op.getValueType();
 904   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 905   Constant *C = CP->getConstVal();
 906   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 907   SDValue Zero = DAG.getConstant(0, PtrVT);
 908   const TargetMachine &TM = DAG.getTarget();
 909   // FIXME there is no actual debug info here
 910   DebugLoc dl = Op.getDebugLoc();
 911
 912   if (TM.getRelocationModel() == Reloc::Static) {
 913     if (!ST->usingLargeMem()) {
 914       // Just return the SDValue with the constant pool address in it.
 915       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 916     } else {
 917       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 918       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 919       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 920     }
 921   }
 922
 923   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 924                    " not supported.");
 925   return SDValue();
 926 }
 927
 928 //! Alternate entry point for generating the address of a constant pool entry
 929 SDValue
 930 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 931   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 932 }
 933
 934 static SDValue
 935 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 936   EVT PtrVT = Op.getValueType();
 937   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 938   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 939   SDValue Zero = DAG.getConstant(0, PtrVT);
 940   const TargetMachine &TM = DAG.getTarget();
 941   // FIXME there is no actual debug info here
 942   DebugLoc dl = Op.getDebugLoc();
 943
 944   if (TM.getRelocationModel() == Reloc::Static) {
 945     if (!ST->usingLargeMem()) {
 946       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 947     } else {
 948       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 949       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 950       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 951     }
 952   }
 953
 954   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 955                    " not supported.");
 956   return SDValue();
 957 }
 958
 959 static SDValue
 960 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 961   EVT PtrVT = Op.getValueType();
 962   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 963   GlobalValue *GV = GSDN->getGlobal();
 964   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 965   const TargetMachine &TM = DAG.getTarget();
 966   SDValue Zero = DAG.getConstant(0, PtrVT);
 967   // FIXME there is no actual debug info here
 968   DebugLoc dl = Op.getDebugLoc();
 969
 970   if (TM.getRelocationModel() == Reloc::Static) {
 971     if (!ST->usingLargeMem()) {
 972       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 973     } else {
 974       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 975       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 976       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 977     }
 978   } else {
 979     llvm_report_error("LowerGlobalAddress: Relocation model other than static"
 980                       "not supported.");
 981     /*NOTREACHED*/
 982   }
 983
 984   return SDValue();
 985 }
 986
 987 //! Custom lower double precision floating point constants
 988 static SDValue
 989 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 990   EVT VT = Op.getValueType();
 991   // FIXME there is no actual debug info here
 992   DebugLoc dl = Op.getDebugLoc();
 993
 994   if (VT == MVT::f64) {
 995     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 996
 997     assert((FP != 0) &&
 998            "LowerConstantFP: Node is not ConstantFPSDNode");
 999
1000     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1001     SDValue T = DAG.getConstant(dbits, MVT::i64);
1002     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1003     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1004                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1005   }
1006
1007   return SDValue();
1008 }
1009
1010 SDValue
1011 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1012                                         CallingConv::ID CallConv, bool isVarArg,
1013                                         const SmallVectorImpl<ISD::InputArg>
1014                                           &Ins,
1015                                         DebugLoc dl, SelectionDAG &DAG,
1016                                         SmallVectorImpl<SDValue> &InVals) {
1017
1018   MachineFunction &MF = DAG.getMachineFunction();
1019   MachineFrameInfo *MFI = MF.getFrameInfo();
1020   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1021
1022   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1023   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1024
1025   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1026   unsigned ArgRegIdx = 0;
1027   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1028
1029   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1030
1031   // Add DAG nodes to load the arguments or copy them out of registers.
1032   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1033     EVT ObjectVT = Ins[ArgNo].VT;
1034     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1035     SDValue ArgVal;
1036
1037     if (ArgRegIdx < NumArgRegs) {
1038       const TargetRegisterClass *ArgRegClass;
1039
1040       switch (ObjectVT.getSimpleVT().SimpleTy) {
1041       default: {
1042         std::string msg;
1043         raw_string_ostream Msg(msg);
1044         Msg << "LowerFormalArguments Unhandled argument type: "
1045              << ObjectVT.getEVTString();
1046         llvm_report_error(Msg.str());
1047       }
1048       case MVT::i8:
1049         ArgRegClass = &SPU::R8CRegClass;
1050         break;
1051       case MVT::i16:
1052         ArgRegClass = &SPU::R16CRegClass;
1053         break;
1054       case MVT::i32:
1055         ArgRegClass = &SPU::R32CRegClass;
1056         break;
1057       case MVT::i64:
1058         ArgRegClass = &SPU::R64CRegClass;
1059         break;
1060       case MVT::i128:
1061         ArgRegClass = &SPU::GPRCRegClass;
1062         break;
1063       case MVT::f32:
1064         ArgRegClass = &SPU::R32FPRegClass;
1065         break;
1066       case MVT::f64:
1067         ArgRegClass = &SPU::R64FPRegClass;
1068         break;
1069       case MVT::v2f64:
1070       case MVT::v4f32:
1071       case MVT::v2i64:
1072       case MVT::v4i32:
1073       case MVT::v8i16:
1074       case MVT::v16i8:
1075         ArgRegClass = &SPU::VECREGRegClass;
1076         break;
1077       }
1078
1079       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1080       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1081       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1082       ++ArgRegIdx;
1083     } else {
1084       // We need to load the argument to a virtual register if we determined
1085       // above that we ran out of physical registers of the appropriate type
1086       // or we're forced to do vararg
1087       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1088       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1089       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1090       ArgOffset += StackSlotSize;
1091     }
1092
1093     InVals.push_back(ArgVal);
1094     // Update the chain
1095     Chain = ArgVal.getOperand(0);
1096   }
1097
1098   // vararg handling:
1099   if (isVarArg) {
1100     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1101     // We will spill (79-3)+1 registers to the stack
1102     SmallVector<SDValue, 79-3+1> MemOps;
1103
1104     // Create the frame slot
1105
1106     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1107       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1108                                                  true, false);
1109       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1110       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1111       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1112       Chain = Store.getOperand(0);
1113       MemOps.push_back(Store);
1114
1115       // Increment address by stack slot size for the next stored argument
1116       ArgOffset += StackSlotSize;
1117     }
1118     if (!MemOps.empty())
1119       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1120                           &MemOps[0], MemOps.size());
1121   }
1122
1123   return Chain;
1124 }
1125
1126 /// isLSAAddress - Return the immediate to use if the specified
1127 /// value is representable as a LSA address.
1128 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1129   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1130   if (!C) return 0;
1131
1132   int Addr = C->getZExtValue();
1133   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1134       (Addr << 14 >> 14) != Addr)
1135     return 0;  // Top 14 bits have to be sext of immediate.
1136
1137   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1138 }
1139
1140 SDValue
1141 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1142                              CallingConv::ID CallConv, bool isVarArg,
1143                              bool isTailCall,
1144                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1145                              const SmallVectorImpl<ISD::InputArg> &Ins,
1146                              DebugLoc dl, SelectionDAG &DAG,
1147                              SmallVectorImpl<SDValue> &InVals) {
1148
1149   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1150   unsigned NumOps     = Outs.size();
1151   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1152   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1153   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1154
1155   // Handy pointer type
1156   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1157
1158   // Set up a copy of the stack pointer for use loading and storing any
1159   // arguments that may not fit in the registers available for argument
1160   // passing.
1161   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1162
1163   // Figure out which arguments are going to go in registers, and which in
1164   // memory.
1165   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1166   unsigned ArgRegIdx = 0;
1167
1168   // Keep track of registers passing arguments
1169   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1170   // And the arguments passed on the stack
1171   SmallVector<SDValue, 8> MemOpChains;
1172
1173   for (unsigned i = 0; i != NumOps; ++i) {
1174     SDValue Arg = Outs[i].Val;
1175
1176     // PtrOff will be used to store the current argument to the stack if a
1177     // register cannot be found for it.
1178     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1179     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1180
1181     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1182     default: llvm_unreachable("Unexpected ValueType for argument!");
1183     case MVT::i8:
1184     case MVT::i16:
1185     case MVT::i32:
1186     case MVT::i64:
1187     case MVT::i128:
1188       if (ArgRegIdx != NumArgRegs) {
1189         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1190       } else {
1191         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1192         ArgOffset += StackSlotSize;
1193       }
1194       break;
1195     case MVT::f32:
1196     case MVT::f64:
1197       if (ArgRegIdx != NumArgRegs) {
1198         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1199       } else {
1200         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1201         ArgOffset += StackSlotSize;
1202       }
1203       break;
1204     case MVT::v2i64:
1205     case MVT::v2f64:
1206     case MVT::v4f32:
1207     case MVT::v4i32:
1208     case MVT::v8i16:
1209     case MVT::v16i8:
1210       if (ArgRegIdx != NumArgRegs) {
1211         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1212       } else {
1213         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1214         ArgOffset += StackSlotSize;
1215       }
1216       break;
1217     }
1218   }
1219
1220   // Accumulate how many bytes are to be pushed on the stack, including the
1221   // linkage area, and parameter passing area.  According to the SPU ABI,
1222   // we minimally need space for [LR] and [SP].
1223   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1224
1225   // Insert a call sequence start
1226   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1227                                                             true));
1228
1229   if (!MemOpChains.empty()) {
1230     // Adjust the stack pointer for the stack arguments.
1231     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1232                         &MemOpChains[0], MemOpChains.size());
1233   }
1234
1235   // Build a sequence of copy-to-reg nodes chained together with token chain
1236   // and flag operands which copy the outgoing args into the appropriate regs.
1237   SDValue InFlag;
1238   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1239     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1240                              RegsToPass[i].second, InFlag);
1241     InFlag = Chain.getValue(1);
1242   }
1243
1244   SmallVector<SDValue, 8> Ops;
1245   unsigned CallOpc = SPUISD::CALL;
1246
1247   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1248   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1249   // node so that legalize doesn't hack it.
1250   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1251     GlobalValue *GV = G->getGlobal();
1252     EVT CalleeVT = Callee.getValueType();
1253     SDValue Zero = DAG.getConstant(0, PtrVT);
1254     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1255
1256     if (!ST->usingLargeMem()) {
1257       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1258       // style calls, otherwise, external symbols are BRASL calls. This assumes
1259       // that declared/defined symbols are in the same compilation unit and can
1260       // be reached through PC-relative jumps.
1261       //
1262       // NOTE:
1263       // This may be an unsafe assumption for JIT and really large compilation
1264       // units.
1265       if (GV->isDeclaration()) {
1266         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1267       } else {
1268         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1269       }
1270     } else {
1271       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1272       // address pairs:
1273       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1274     }
1275   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1276     EVT CalleeVT = Callee.getValueType();
1277     SDValue Zero = DAG.getConstant(0, PtrVT);
1278     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1279         Callee.getValueType());
1280
1281     if (!ST->usingLargeMem()) {
1282       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1283     } else {
1284       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1285     }
1286   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1287     // If this is an absolute destination address that appears to be a legal
1288     // local store address, use the munged value.
1289     Callee = SDValue(Dest, 0);
1290   }
1291
1292   Ops.push_back(Chain);
1293   Ops.push_back(Callee);
1294
1295   // Add argument registers to the end of the list so that they are known live
1296   // into the call.
1297   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1298     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1299                                   RegsToPass[i].second.getValueType()));
1300
1301   if (InFlag.getNode())
1302     Ops.push_back(InFlag);
1303   // Returns a chain and a flag for retval copy to use.
1304   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1305                       &Ops[0], Ops.size());
1306   InFlag = Chain.getValue(1);
1307
1308   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1309                              DAG.getIntPtrConstant(0, true), InFlag);
1310   if (!Ins.empty())
1311     InFlag = Chain.getValue(1);
1312
1313   // If the function returns void, just return the chain.
1314   if (Ins.empty())
1315     return Chain;
1316
1317   // If the call has results, copy the values out of the ret val registers.
1318   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1319   default: llvm_unreachable("Unexpected ret value!");
1320   case MVT::Other: break;
1321   case MVT::i32:
1322     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1323       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1324                                  MVT::i32, InFlag).getValue(1);
1325       InVals.push_back(Chain.getValue(0));
1326       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1327                                  Chain.getValue(2)).getValue(1);
1328       InVals.push_back(Chain.getValue(0));
1329     } else {
1330       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1331                                  InFlag).getValue(1);
1332       InVals.push_back(Chain.getValue(0));
1333     }
1334     break;
1335   case MVT::i64:
1336     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1337                                InFlag).getValue(1);
1338     InVals.push_back(Chain.getValue(0));
1339     break;
1340   case MVT::i128:
1341     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1342                                InFlag).getValue(1);
1343     InVals.push_back(Chain.getValue(0));
1344     break;
1345   case MVT::f32:
1346   case MVT::f64:
1347     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1348                                InFlag).getValue(1);
1349     InVals.push_back(Chain.getValue(0));
1350     break;
1351   case MVT::v2f64:
1352   case MVT::v2i64:
1353   case MVT::v4f32:
1354   case MVT::v4i32:
1355   case MVT::v8i16:
1356   case MVT::v16i8:
1357     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1358                                    InFlag).getValue(1);
1359     InVals.push_back(Chain.getValue(0));
1360     break;
1361   }
1362
1363   return Chain;
1364 }
1365
1366 SDValue
1367 SPUTargetLowering::LowerReturn(SDValue Chain,
1368                                CallingConv::ID CallConv, bool isVarArg,
1369                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1370                                DebugLoc dl, SelectionDAG &DAG) {
1371
1372   SmallVector<CCValAssign, 16> RVLocs;
1373   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1374                  RVLocs, *DAG.getContext());
1375   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1376
1377   // If this is the first return lowered for this function, add the regs to the
1378   // liveout set for the function.
1379   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1380     for (unsigned i = 0; i != RVLocs.size(); ++i)
1381       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1382   }
1383
1384   SDValue Flag;
1385
1386   // Copy the result values into the output registers.
1387   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1388     CCValAssign &VA = RVLocs[i];
1389     assert(VA.isRegLoc() && "Can only return in registers!");
1390     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1391                              Outs[i].Val, Flag);
1392     Flag = Chain.getValue(1);
1393   }
1394
1395   if (Flag.getNode())
1396     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1397   else
1398     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1399 }
1400
1401
1402 //===----------------------------------------------------------------------===//
1403 // Vector related lowering:
1404 //===----------------------------------------------------------------------===//
1405
1406 static ConstantSDNode *
1407 getVecImm(SDNode *N) {
1408   SDValue OpVal(0, 0);
1409
1410   // Check to see if this buildvec has a single non-undef value in its elements.
1411   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1412     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1413     if (OpVal.getNode() == 0)
1414       OpVal = N->getOperand(i);
1415     else if (OpVal != N->getOperand(i))
1416       return 0;
1417   }
1418
1419   if (OpVal.getNode() != 0) {
1420     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1421       return CN;
1422     }
1423   }
1424
1425   return 0;
1426 }
1427
1428 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1430 /// constant
1431 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1432                               EVT ValueType) {
1433   if (ConstantSDNode *CN = getVecImm(N)) {
1434     uint64_t Value = CN->getZExtValue();
1435     if (ValueType == MVT::i64) {
1436       uint64_t UValue = CN->getZExtValue();
1437       uint32_t upper = uint32_t(UValue >> 32);
1438       uint32_t lower = uint32_t(UValue);
1439       if (upper != lower)
1440         return SDValue();
1441       Value = Value >> 32;
1442     }
1443     if (Value <= 0x3ffff)
1444       return DAG.getTargetConstant(Value, ValueType);
1445   }
1446
1447   return SDValue();
1448 }
1449
1450 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1451 /// and the value fits into a signed 16-bit constant, and if so, return the
1452 /// constant
1453 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1454                               EVT ValueType) {
1455   if (ConstantSDNode *CN = getVecImm(N)) {
1456     int64_t Value = CN->getSExtValue();
1457     if (ValueType == MVT::i64) {
1458       uint64_t UValue = CN->getZExtValue();
1459       uint32_t upper = uint32_t(UValue >> 32);
1460       uint32_t lower = uint32_t(UValue);
1461       if (upper != lower)
1462         return SDValue();
1463       Value = Value >> 32;
1464     }
1465     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1466       return DAG.getTargetConstant(Value, ValueType);
1467     }
1468   }
1469
1470   return SDValue();
1471 }
1472
1473 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1474 /// and the value fits into a signed 10-bit constant, and if so, return the
1475 /// constant
1476 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1477                               EVT ValueType) {
1478   if (ConstantSDNode *CN = getVecImm(N)) {
1479     int64_t Value = CN->getSExtValue();
1480     if (ValueType == MVT::i64) {
1481       uint64_t UValue = CN->getZExtValue();
1482       uint32_t upper = uint32_t(UValue >> 32);
1483       uint32_t lower = uint32_t(UValue);
1484       if (upper != lower)
1485         return SDValue();
1486       Value = Value >> 32;
1487     }
1488     if (isS10Constant(Value))
1489       return DAG.getTargetConstant(Value, ValueType);
1490   }
1491
1492   return SDValue();
1493 }
1494
1495 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1496 /// and the value fits into a signed 8-bit constant, and if so, return the
1497 /// constant.
1498 ///
1499 /// @note: The incoming vector is v16i8 because that's the only way we can load
1500 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1501 /// same value.
1502 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1503                              EVT ValueType) {
1504   if (ConstantSDNode *CN = getVecImm(N)) {
1505     int Value = (int) CN->getZExtValue();
1506     if (ValueType == MVT::i16
1507         && Value <= 0xffff                 /* truncated from uint64_t */
1508         && ((short) Value >> 8) == ((short) Value & 0xff))
1509       return DAG.getTargetConstant(Value & 0xff, ValueType);
1510     else if (ValueType == MVT::i8
1511              && (Value & 0xff) == Value)
1512       return DAG.getTargetConstant(Value, ValueType);
1513   }
1514
1515   return SDValue();
1516 }
1517
1518 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1519 /// and the value fits into a signed 16-bit constant, and if so, return the
1520 /// constant
1521 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1522                                EVT ValueType) {
1523   if (ConstantSDNode *CN = getVecImm(N)) {
1524     uint64_t Value = CN->getZExtValue();
1525     if ((ValueType == MVT::i32
1526           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1527         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1528       return DAG.getTargetConstant(Value >> 16, ValueType);
1529   }
1530
1531   return SDValue();
1532 }
1533
1534 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1535 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1536   if (ConstantSDNode *CN = getVecImm(N)) {
1537     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1538   }
1539
1540   return SDValue();
1541 }
1542
1543 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1544 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1545   if (ConstantSDNode *CN = getVecImm(N)) {
1546     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1547   }
1548
1549   return SDValue();
1550 }
1551
1552 //! Lower a BUILD_VECTOR instruction creatively:
1553 static SDValue
1554 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1555   EVT VT = Op.getValueType();
1556   EVT EltVT = VT.getVectorElementType();
1557   DebugLoc dl = Op.getDebugLoc();
1558   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1559   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1560   unsigned minSplatBits = EltVT.getSizeInBits();
1561
1562   if (minSplatBits < 16)
1563     minSplatBits = 16;
1564
1565   APInt APSplatBits, APSplatUndef;
1566   unsigned SplatBitSize;
1567   bool HasAnyUndefs;
1568
1569   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1570                             HasAnyUndefs, minSplatBits)
1571       || minSplatBits < SplatBitSize)
1572     return SDValue();   // Wasn't a constant vector or splat exceeded min
1573
1574   uint64_t SplatBits = APSplatBits.getZExtValue();
1575
1576   switch (VT.getSimpleVT().SimpleTy) {
1577   default: {
1578     std::string msg;
1579     raw_string_ostream Msg(msg);
1580     Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1581          << VT.getEVTString();
1582     llvm_report_error(Msg.str());
1583     /*NOTREACHED*/
1584   }
1585   case MVT::v4f32: {
1586     uint32_t Value32 = uint32_t(SplatBits);
1587     assert(SplatBitSize == 32
1588            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1589     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1590     SDValue T = DAG.getConstant(Value32, MVT::i32);
1591     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1592                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1593     break;
1594   }
1595   case MVT::v2f64: {
1596     uint64_t f64val = uint64_t(SplatBits);
1597     assert(SplatBitSize == 64
1598            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1599     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600     SDValue T = DAG.getConstant(f64val, MVT::i64);
1601     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1602                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1603     break;
1604   }
1605   case MVT::v16i8: {
1606    // 8-bit constants have to be expanded to 16-bits
1607    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1608    SmallVector<SDValue, 8> Ops;
1609
1610    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1611    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1612                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1613   }
1614   case MVT::v8i16: {
1615     unsigned short Value16 = SplatBits;
1616     SDValue T = DAG.getConstant(Value16, EltVT);
1617     SmallVector<SDValue, 8> Ops;
1618
1619     Ops.assign(8, T);
1620     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1621   }
1622   case MVT::v4i32: {
1623     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1624     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1625   }
1626   case MVT::v2i32: {
1627     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1628     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1629   }
1630   case MVT::v2i64: {
1631     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1632   }
1633   }
1634
1635   return SDValue();
1636 }
1637
1638 /*!
1639  */
1640 SDValue
1641 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1642                      DebugLoc dl) {
1643   uint32_t upper = uint32_t(SplatVal >> 32);
1644   uint32_t lower = uint32_t(SplatVal);
1645
1646   if (upper == lower) {
1647     // Magic constant that can be matched by IL, ILA, et. al.
1648     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1649     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1650                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1651                                    Val, Val, Val, Val));
1652   } else {
1653     bool upper_special, lower_special;
1654
1655     // NOTE: This code creates common-case shuffle masks that can be easily
1656     // detected as common expressions. It is not attempting to create highly
1657     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1658
1659     // Detect if the upper or lower half is a special shuffle mask pattern:
1660     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1661     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1662
1663     // Both upper and lower are special, lower to a constant pool load:
1664     if (lower_special && upper_special) {
1665       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1666       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1667                          SplatValCN, SplatValCN);
1668     }
1669
1670     SDValue LO32;
1671     SDValue HI32;
1672     SmallVector<SDValue, 16> ShufBytes;
1673     SDValue Result;
1674
1675     // Create lower vector if not a special pattern
1676     if (!lower_special) {
1677       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1678       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1679                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1680                                      LO32C, LO32C, LO32C, LO32C));
1681     }
1682
1683     // Create upper vector if not a special pattern
1684     if (!upper_special) {
1685       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1686       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1687                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1688                                      HI32C, HI32C, HI32C, HI32C));
1689     }
1690
1691     // If either upper or lower are special, then the two input operands are
1692     // the same (basically, one of them is a "don't care")
1693     if (lower_special)
1694       LO32 = HI32;
1695     if (upper_special)
1696       HI32 = LO32;
1697
1698     for (int i = 0; i < 4; ++i) {
1699       uint64_t val = 0;
1700       for (int j = 0; j < 4; ++j) {
1701         SDValue V;
1702         bool process_upper, process_lower;
1703         val <<= 8;
1704         process_upper = (upper_special && (i & 1) == 0);
1705         process_lower = (lower_special && (i & 1) == 1);
1706
1707         if (process_upper || process_lower) {
1708           if ((process_upper && upper == 0)
1709                   || (process_lower && lower == 0))
1710             val |= 0x80;
1711           else if ((process_upper && upper == 0xffffffff)
1712                   || (process_lower && lower == 0xffffffff))
1713             val |= 0xc0;
1714           else if ((process_upper && upper == 0x80000000)
1715                   || (process_lower && lower == 0x80000000))
1716             val |= (j == 0 ? 0xe0 : 0x80);
1717         } else
1718           val |= i * 4 + j + ((i & 1) * 16);
1719       }
1720
1721       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1722     }
1723
1724     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1725                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1726                                    &ShufBytes[0], ShufBytes.size()));
1727   }
1728 }
1729
1730 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1731 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1732 /// permutation vector, V3, is monotonically increasing with one "exception"
1733 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1734 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1735 /// In either case, the net result is going to eventually invoke SHUFB to
1736 /// permute/shuffle the bytes from V1 and V2.
1737 /// \note
1738 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1739 /// control word for byte/halfword/word insertion. This takes care of a single
1740 /// element move from V2 into V1.
1741 /// \note
1742 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1743 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1744   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1745   SDValue V1 = Op.getOperand(0);
1746   SDValue V2 = Op.getOperand(1);
1747   DebugLoc dl = Op.getDebugLoc();
1748
1749   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1750
1751   // If we have a single element being moved from V1 to V2, this can be handled
1752   // using the C*[DX] compute mask instructions, but the vector elements have
1753   // to be monotonically increasing with one exception element.
1754   EVT VecVT = V1.getValueType();
1755   EVT EltVT = VecVT.getVectorElementType();
1756   unsigned EltsFromV2 = 0;
1757   unsigned V2Elt = 0;
1758   unsigned V2EltIdx0 = 0;
1759   unsigned CurrElt = 0;
1760   unsigned MaxElts = VecVT.getVectorNumElements();
1761   unsigned PrevElt = 0;
1762   unsigned V0Elt = 0;
1763   bool monotonic = true;
1764   bool rotate = true;
1765
1766   if (EltVT == MVT::i8) {
1767     V2EltIdx0 = 16;
1768   } else if (EltVT == MVT::i16) {
1769     V2EltIdx0 = 8;
1770   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1771     V2EltIdx0 = 4;
1772   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1773     V2EltIdx0 = 2;
1774   } else
1775     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1776
1777   for (unsigned i = 0; i != MaxElts; ++i) {
1778     if (SVN->getMaskElt(i) < 0)
1779       continue;
1780
1781     unsigned SrcElt = SVN->getMaskElt(i);
1782
1783     if (monotonic) {
1784       if (SrcElt >= V2EltIdx0) {
1785         if (1 >= (++EltsFromV2)) {
1786           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1787         }
1788       } else if (CurrElt != SrcElt) {
1789         monotonic = false;
1790       }
1791
1792       ++CurrElt;
1793     }
1794
1795     if (rotate) {
1796       if (PrevElt > 0 && SrcElt < MaxElts) {
1797         if ((PrevElt == SrcElt - 1)
1798             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1799           PrevElt = SrcElt;
1800           if (SrcElt == 0)
1801             V0Elt = i;
1802         } else {
1803           rotate = false;
1804         }
1805       } else if (PrevElt == 0) {
1806         // First time through, need to keep track of previous element
1807         PrevElt = SrcElt;
1808       } else {
1809         // This isn't a rotation, takes elements from vector 2
1810         rotate = false;
1811       }
1812     }
1813   }
1814
1815   if (EltsFromV2 == 1 && monotonic) {
1816     // Compute mask and shuffle
1817     MachineFunction &MF = DAG.getMachineFunction();
1818     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1819     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1820     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1821     // Initialize temporary register to 0
1822     SDValue InitTempReg =
1823       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1824     // Copy register's contents as index in SHUFFLE_MASK:
1825     SDValue ShufMaskOp =
1826       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1827                   DAG.getTargetConstant(V2Elt, MVT::i32),
1828                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1829     // Use shuffle mask in SHUFB synthetic instruction:
1830     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1831                        ShufMaskOp);
1832   } else if (rotate) {
1833     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1834
1835     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1836                        V1, DAG.getConstant(rotamt, MVT::i16));
1837   } else {
1838    // Convert the SHUFFLE_VECTOR mask's input element units to the
1839    // actual bytes.
1840     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1841
1842     SmallVector<SDValue, 16> ResultMask;
1843     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1844       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1845
1846       for (unsigned j = 0; j < BytesPerElement; ++j)
1847         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1848     }
1849
1850     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1851                                     &ResultMask[0], ResultMask.size());
1852     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1853   }
1854 }
1855
1856 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1857   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1858   DebugLoc dl = Op.getDebugLoc();
1859
1860   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1861     // For a constant, build the appropriate constant vector, which will
1862     // eventually simplify to a vector register load.
1863
1864     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1865     SmallVector<SDValue, 16> ConstVecValues;
1866     EVT VT;
1867     size_t n_copies;
1868
1869     // Create a constant vector:
1870     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1871     default: llvm_unreachable("Unexpected constant value type in "
1872                               "LowerSCALAR_TO_VECTOR");
1873     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1874     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1875     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1876     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1877     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1878     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1879     }
1880
1881     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1882     for (size_t j = 0; j < n_copies; ++j)
1883       ConstVecValues.push_back(CValue);
1884
1885     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1886                        &ConstVecValues[0], ConstVecValues.size());
1887   } else {
1888     // Otherwise, copy the value from one register to another:
1889     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1890     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1891     case MVT::i8:
1892     case MVT::i16:
1893     case MVT::i32:
1894     case MVT::i64:
1895     case MVT::f32:
1896     case MVT::f64:
1897       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1898     }
1899   }
1900
1901   return SDValue();
1902 }
1903
1904 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1905   EVT VT = Op.getValueType();
1906   SDValue N = Op.getOperand(0);
1907   SDValue Elt = Op.getOperand(1);
1908   DebugLoc dl = Op.getDebugLoc();
1909   SDValue retval;
1910
1911   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1912     // Constant argument:
1913     int EltNo = (int) C->getZExtValue();
1914
1915     // sanity checks:
1916     if (VT == MVT::i8 && EltNo >= 16)
1917       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1918     else if (VT == MVT::i16 && EltNo >= 8)
1919       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1920     else if (VT == MVT::i32 && EltNo >= 4)
1921       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1922     else if (VT == MVT::i64 && EltNo >= 2)
1923       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1924
1925     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1926       // i32 and i64: Element 0 is the preferred slot
1927       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1928     }
1929
1930     // Need to generate shuffle mask and extract:
1931     int prefslot_begin = -1, prefslot_end = -1;
1932     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1933
1934     switch (VT.getSimpleVT().SimpleTy) {
1935     default:
1936       assert(false && "Invalid value type!");
1937     case MVT::i8: {
1938       prefslot_begin = prefslot_end = 3;
1939       break;
1940     }
1941     case MVT::i16: {
1942       prefslot_begin = 2; prefslot_end = 3;
1943       break;
1944     }
1945     case MVT::i32:
1946     case MVT::f32: {
1947       prefslot_begin = 0; prefslot_end = 3;
1948       break;
1949     }
1950     case MVT::i64:
1951     case MVT::f64: {
1952       prefslot_begin = 0; prefslot_end = 7;
1953       break;
1954     }
1955     }
1956
1957     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1958            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1959
1960     unsigned int ShufBytes[16] = {
1961       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1962     };
1963     for (int i = 0; i < 16; ++i) {
1964       // zero fill uppper part of preferred slot, don't care about the
1965       // other slots:
1966       unsigned int mask_val;
1967       if (i <= prefslot_end) {
1968         mask_val =
1969           ((i < prefslot_begin)
1970            ? 0x80
1971            : elt_byte + (i - prefslot_begin));
1972
1973         ShufBytes[i] = mask_val;
1974       } else
1975         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1976     }
1977
1978     SDValue ShufMask[4];
1979     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1980       unsigned bidx = i * 4;
1981       unsigned int bits = ((ShufBytes[bidx] << 24) |
1982                            (ShufBytes[bidx+1] << 16) |
1983                            (ShufBytes[bidx+2] << 8) |
1984                            ShufBytes[bidx+3]);
1985       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1986     }
1987
1988     SDValue ShufMaskVec =
1989       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1990                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1991
1992     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1993                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1994                                      N, N, ShufMaskVec));
1995   } else {
1996     // Variable index: Rotate the requested element into slot 0, then replicate
1997     // slot 0 across the vector
1998     EVT VecVT = N.getValueType();
1999     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2000       llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2001                         "vector type!");
2002     }
2003
2004     // Make life easier by making sure the index is zero-extended to i32
2005     if (Elt.getValueType() != MVT::i32)
2006       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2007
2008     // Scale the index to a bit/byte shift quantity
2009     APInt scaleFactor =
2010             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2011     unsigned scaleShift = scaleFactor.logBase2();
2012     SDValue vecShift;
2013
2014     if (scaleShift > 0) {
2015       // Scale the shift factor:
2016       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2017                         DAG.getConstant(scaleShift, MVT::i32));
2018     }
2019
2020     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2021
2022     // Replicate the bytes starting at byte 0 across the entire vector (for
2023     // consistency with the notion of a unified register set)
2024     SDValue replicate;
2025
2026     switch (VT.getSimpleVT().SimpleTy) {
2027     default:
2028       llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2029                         "type");
2030       /*NOTREACHED*/
2031     case MVT::i8: {
2032       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2033       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2034                               factor, factor, factor, factor);
2035       break;
2036     }
2037     case MVT::i16: {
2038       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2039       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2040                               factor, factor, factor, factor);
2041       break;
2042     }
2043     case MVT::i32:
2044     case MVT::f32: {
2045       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2046       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2047                               factor, factor, factor, factor);
2048       break;
2049     }
2050     case MVT::i64:
2051     case MVT::f64: {
2052       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2053       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2054       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2055                               loFactor, hiFactor, loFactor, hiFactor);
2056       break;
2057     }
2058     }
2059
2060     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2061                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2062                                      vecShift, vecShift, replicate));
2063   }
2064
2065   return retval;
2066 }
2067
2068 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2069   SDValue VecOp = Op.getOperand(0);
2070   SDValue ValOp = Op.getOperand(1);
2071   SDValue IdxOp = Op.getOperand(2);
2072   DebugLoc dl = Op.getDebugLoc();
2073   EVT VT = Op.getValueType();
2074
2075   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2076   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2077
2078   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2079   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2080   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2081                                 DAG.getRegister(SPU::R1, PtrVT),
2082                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2083   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2084
2085   SDValue result =
2086     DAG.getNode(SPUISD::SHUFB, dl, VT,
2087                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2088                 VecOp,
2089                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2090
2091   return result;
2092 }
2093
2094 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2095                            const TargetLowering &TLI)
2096 {
2097   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2098   DebugLoc dl = Op.getDebugLoc();
2099   EVT ShiftVT = TLI.getShiftAmountTy();
2100
2101   assert(Op.getValueType() == MVT::i8);
2102   switch (Opc) {
2103   default:
2104     llvm_unreachable("Unhandled i8 math operator");
2105     /*NOTREACHED*/
2106     break;
2107   case ISD::ADD: {
2108     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2109     // the result:
2110     SDValue N1 = Op.getOperand(1);
2111     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2112     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2113     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2114                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2115
2116   }
2117
2118   case ISD::SUB: {
2119     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2120     // the result:
2121     SDValue N1 = Op.getOperand(1);
2122     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2123     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2124     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2125                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2126   }
2127   case ISD::ROTR:
2128   case ISD::ROTL: {
2129     SDValue N1 = Op.getOperand(1);
2130     EVT N1VT = N1.getValueType();
2131
2132     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2133     if (!N1VT.bitsEq(ShiftVT)) {
2134       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2135                        ? ISD::ZERO_EXTEND
2136                        : ISD::TRUNCATE;
2137       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2138     }
2139
2140     // Replicate lower 8-bits into upper 8:
2141     SDValue ExpandArg =
2142       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2143                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2144                               N0, DAG.getConstant(8, MVT::i32)));
2145
2146     // Truncate back down to i8
2147     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2148                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2149   }
2150   case ISD::SRL:
2151   case ISD::SHL: {
2152     SDValue N1 = Op.getOperand(1);
2153     EVT N1VT = N1.getValueType();
2154
2155     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2156     if (!N1VT.bitsEq(ShiftVT)) {
2157       unsigned N1Opc = ISD::ZERO_EXTEND;
2158
2159       if (N1.getValueType().bitsGT(ShiftVT))
2160         N1Opc = ISD::TRUNCATE;
2161
2162       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2163     }
2164
2165     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2166                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2167   }
2168   case ISD::SRA: {
2169     SDValue N1 = Op.getOperand(1);
2170     EVT N1VT = N1.getValueType();
2171
2172     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2173     if (!N1VT.bitsEq(ShiftVT)) {
2174       unsigned N1Opc = ISD::SIGN_EXTEND;
2175
2176       if (N1VT.bitsGT(ShiftVT))
2177         N1Opc = ISD::TRUNCATE;
2178       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2179     }
2180
2181     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2182                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2183   }
2184   case ISD::MUL: {
2185     SDValue N1 = Op.getOperand(1);
2186
2187     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2188     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2189     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2190                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2191     break;
2192   }
2193   }
2194
2195   return SDValue();
2196 }
2197
2198 //! Lower byte immediate operations for v16i8 vectors:
2199 static SDValue
2200 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2201   SDValue ConstVec;
2202   SDValue Arg;
2203   EVT VT = Op.getValueType();
2204   DebugLoc dl = Op.getDebugLoc();
2205
2206   ConstVec = Op.getOperand(0);
2207   Arg = Op.getOperand(1);
2208   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2209     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2210       ConstVec = ConstVec.getOperand(0);
2211     } else {
2212       ConstVec = Op.getOperand(1);
2213       Arg = Op.getOperand(0);
2214       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2215         ConstVec = ConstVec.getOperand(0);
2216       }
2217     }
2218   }
2219
2220   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2221     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2222     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2223
2224     APInt APSplatBits, APSplatUndef;
2225     unsigned SplatBitSize;
2226     bool HasAnyUndefs;
2227     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2228
2229     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2230                               HasAnyUndefs, minSplatBits)
2231         && minSplatBits <= SplatBitSize) {
2232       uint64_t SplatBits = APSplatBits.getZExtValue();
2233       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2234
2235       SmallVector<SDValue, 16> tcVec;
2236       tcVec.assign(16, tc);
2237       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2238                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2239     }
2240   }
2241
2242   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2243   // lowered.  Return the operation, rather than a null SDValue.
2244   return Op;
2245 }
2246
2247 //! Custom lowering for CTPOP (count population)
2248 /*!
2249   Custom lowering code that counts the number ones in the input
2250   operand. SPU has such an instruction, but it counts the number of
2251   ones per byte, which then have to be accumulated.
2252 */
2253 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2254   EVT VT = Op.getValueType();
2255   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2256                                VT, (128 / VT.getSizeInBits()));
2257   DebugLoc dl = Op.getDebugLoc();
2258
2259   switch (VT.getSimpleVT().SimpleTy) {
2260   default:
2261     assert(false && "Invalid value type!");
2262   case MVT::i8: {
2263     SDValue N = Op.getOperand(0);
2264     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2265
2266     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2267     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2268
2269     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2270   }
2271
2272   case MVT::i16: {
2273     MachineFunction &MF = DAG.getMachineFunction();
2274     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2275
2276     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2277
2278     SDValue N = Op.getOperand(0);
2279     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2280     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2281     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2282
2283     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2284     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2285
2286     // CNTB_result becomes the chain to which all of the virtual registers
2287     // CNTB_reg, SUM1_reg become associated:
2288     SDValue CNTB_result =
2289       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2290
2291     SDValue CNTB_rescopy =
2292       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2293
2294     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2295
2296     return DAG.getNode(ISD::AND, dl, MVT::i16,
2297                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2298                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2299                                                Tmp1, Shift1),
2300                                    Tmp1),
2301                        Mask0);
2302   }
2303
2304   case MVT::i32: {
2305     MachineFunction &MF = DAG.getMachineFunction();
2306     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2307
2308     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2309     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2310
2311     SDValue N = Op.getOperand(0);
2312     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2313     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2314     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2315     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2316
2317     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2318     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2319
2320     // CNTB_result becomes the chain to which all of the virtual registers
2321     // CNTB_reg, SUM1_reg become associated:
2322     SDValue CNTB_result =
2323       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2324
2325     SDValue CNTB_rescopy =
2326       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2327
2328     SDValue Comp1 =
2329       DAG.getNode(ISD::SRL, dl, MVT::i32,
2330                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2331                   Shift1);
2332
2333     SDValue Sum1 =
2334       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2335                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2336
2337     SDValue Sum1_rescopy =
2338       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2339
2340     SDValue Comp2 =
2341       DAG.getNode(ISD::SRL, dl, MVT::i32,
2342                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2343                   Shift2);
2344     SDValue Sum2 =
2345       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2346                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2347
2348     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2349   }
2350
2351   case MVT::i64:
2352     break;
2353   }
2354
2355   return SDValue();
2356 }
2357
2358 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2359 /*!
2360  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2361  All conversions to i64 are expanded to a libcall.
2362  */
2363 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2364                               SPUTargetLowering &TLI) {
2365   EVT OpVT = Op.getValueType();
2366   SDValue Op0 = Op.getOperand(0);
2367   EVT Op0VT = Op0.getValueType();
2368
2369   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2370       || OpVT == MVT::i64) {
2371     // Convert f32 / f64 to i32 / i64 via libcall.
2372     RTLIB::Libcall LC =
2373             (Op.getOpcode() == ISD::FP_TO_SINT)
2374              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2375              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2376     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2377     SDValue Dummy;
2378     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2379   }
2380
2381   return Op;
2382 }
2383
2384 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2385 /*!
2386  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2387  All conversions from i64 are expanded to a libcall.
2388  */
2389 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2390                               SPUTargetLowering &TLI) {
2391   EVT OpVT = Op.getValueType();
2392   SDValue Op0 = Op.getOperand(0);
2393   EVT Op0VT = Op0.getValueType();
2394
2395   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2396       || Op0VT == MVT::i64) {
2397     // Convert i32, i64 to f64 via libcall:
2398     RTLIB::Libcall LC =
2399             (Op.getOpcode() == ISD::SINT_TO_FP)
2400              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2401              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2402     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2403     SDValue Dummy;
2404     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2405   }
2406
2407   return Op;
2408 }
2409
2410 //! Lower ISD::SETCC
2411 /*!
2412  This handles MVT::f64 (double floating point) condition lowering
2413  */
2414 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2415                           const TargetLowering &TLI) {
2416   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2417   DebugLoc dl = Op.getDebugLoc();
2418   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2419
2420   SDValue lhs = Op.getOperand(0);
2421   SDValue rhs = Op.getOperand(1);
2422   EVT lhsVT = lhs.getValueType();
2423   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2424
2425   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2426   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2427   EVT IntVT(MVT::i64);
2428
2429   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2430   // selected to a NOP:
2431   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2432   SDValue lhsHi32 =
2433           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2434                       DAG.getNode(ISD::SRL, dl, IntVT,
2435                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2436   SDValue lhsHi32abs =
2437           DAG.getNode(ISD::AND, dl, MVT::i32,
2438                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2439   SDValue lhsLo32 =
2440           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2441
2442   // SETO and SETUO only use the lhs operand:
2443   if (CC->get() == ISD::SETO) {
2444     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2445     // SETUO
2446     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2447     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2448                        DAG.getSetCC(dl, ccResultVT,
2449                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2450                                     ISD::SETUO),
2451                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2452   } else if (CC->get() == ISD::SETUO) {
2453     // Evaluates to true if Op0 is [SQ]NaN
2454     return DAG.getNode(ISD::AND, dl, ccResultVT,
2455                        DAG.getSetCC(dl, ccResultVT,
2456                                     lhsHi32abs,
2457                                     DAG.getConstant(0x7ff00000, MVT::i32),
2458                                     ISD::SETGE),
2459                        DAG.getSetCC(dl, ccResultVT,
2460                                     lhsLo32,
2461                                     DAG.getConstant(0, MVT::i32),
2462                                     ISD::SETGT));
2463   }
2464
2465   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2466   SDValue rhsHi32 =
2467           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2468                       DAG.getNode(ISD::SRL, dl, IntVT,
2469                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2470
2471   // If a value is negative, subtract from the sign magnitude constant:
2472   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2473
2474   // Convert the sign-magnitude representation into 2's complement:
2475   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2476                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2477   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2478   SDValue lhsSelect =
2479           DAG.getNode(ISD::SELECT, dl, IntVT,
2480                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2481
2482   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2483                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2484   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2485   SDValue rhsSelect =
2486           DAG.getNode(ISD::SELECT, dl, IntVT,
2487                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2488
2489   unsigned compareOp;
2490
2491   switch (CC->get()) {
2492   case ISD::SETOEQ:
2493   case ISD::SETUEQ:
2494     compareOp = ISD::SETEQ; break;
2495   case ISD::SETOGT:
2496   case ISD::SETUGT:
2497     compareOp = ISD::SETGT; break;
2498   case ISD::SETOGE:
2499   case ISD::SETUGE:
2500     compareOp = ISD::SETGE; break;
2501   case ISD::SETOLT:
2502   case ISD::SETULT:
2503     compareOp = ISD::SETLT; break;
2504   case ISD::SETOLE:
2505   case ISD::SETULE:
2506     compareOp = ISD::SETLE; break;
2507   case ISD::SETUNE:
2508   case ISD::SETONE:
2509     compareOp = ISD::SETNE; break;
2510   default:
2511     llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2512   }
2513
2514   SDValue result =
2515           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2516                        (ISD::CondCode) compareOp);
2517
2518   if ((CC->get() & 0x8) == 0) {
2519     // Ordered comparison:
2520     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2521                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2522                                   ISD::SETO);
2523     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2524                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2525                                   ISD::SETO);
2526     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2527
2528     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2529   }
2530
2531   return result;
2532 }
2533
2534 //! Lower ISD::SELECT_CC
2535 /*!
2536   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2537   SELB instruction.
2538
2539   \note Need to revisit this in the future: if the code path through the true
2540   and false value computations is longer than the latency of a branch (6
2541   cycles), then it would be more advantageous to branch and insert a new basic
2542   block and branch on the condition. However, this code does not make that
2543   assumption, given the simplisitc uses so far.
2544  */
2545
2546 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2547                               const TargetLowering &TLI) {
2548   EVT VT = Op.getValueType();
2549   SDValue lhs = Op.getOperand(0);
2550   SDValue rhs = Op.getOperand(1);
2551   SDValue trueval = Op.getOperand(2);
2552   SDValue falseval = Op.getOperand(3);
2553   SDValue condition = Op.getOperand(4);
2554   DebugLoc dl = Op.getDebugLoc();
2555
2556   // NOTE: SELB's arguments: $rA, $rB, $mask
2557   //
2558   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2559   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2560   // condition was true and 0s where the condition was false. Hence, the
2561   // arguments to SELB get reversed.
2562
2563   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2564   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2565   // with another "cannot select select_cc" assert:
2566
2567   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2568                                 TLI.getSetCCResultType(Op.getValueType()),
2569                                 lhs, rhs, condition);
2570   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2571 }
2572
2573 //! Custom lower ISD::TRUNCATE
2574 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2575 {
2576   // Type to truncate to
2577   EVT VT = Op.getValueType();
2578   MVT simpleVT = VT.getSimpleVT();
2579   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2580                                VT, (128 / VT.getSizeInBits()));
2581   DebugLoc dl = Op.getDebugLoc();
2582
2583   // Type to truncate from
2584   SDValue Op0 = Op.getOperand(0);
2585   EVT Op0VT = Op0.getValueType();
2586
2587   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2588     // Create shuffle mask, least significant doubleword of quadword
2589     unsigned maskHigh = 0x08090a0b;
2590     unsigned maskLow = 0x0c0d0e0f;
2591     // Use a shuffle to perform the truncation
2592     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2593                                    DAG.getConstant(maskHigh, MVT::i32),
2594                                    DAG.getConstant(maskLow, MVT::i32),
2595                                    DAG.getConstant(maskHigh, MVT::i32),
2596                                    DAG.getConstant(maskLow, MVT::i32));
2597
2598     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2599                                        Op0, Op0, shufMask);
2600
2601     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2602   }
2603
2604   return SDValue();             // Leave the truncate unmolested
2605 }
2606
2607 /*!
2608  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2609  * algorithm is to duplicate the sign bit using rotmai to generate at
2610  * least one byte full of sign bits. Then propagate the "sign-byte" into
2611  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2612  *
2613  * @param Op The sext operand
2614  * @param DAG The current DAG
2615  * @return The SDValue with the entire instruction sequence
2616  */
2617 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2618 {
2619   DebugLoc dl = Op.getDebugLoc();
2620
2621   // Type to extend to
2622   MVT OpVT = Op.getValueType().getSimpleVT();
2623
2624   // Type to extend from
2625   SDValue Op0 = Op.getOperand(0);
2626   MVT Op0VT = Op0.getValueType().getSimpleVT();
2627
2628   // The type to extend to needs to be a i128 and
2629   // the type to extend from needs to be i64 or i32.
2630   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2631           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2632
2633   // Create shuffle mask
2634   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2635   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2636   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2637   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2638                                  DAG.getConstant(mask1, MVT::i32),
2639                                  DAG.getConstant(mask1, MVT::i32),
2640                                  DAG.getConstant(mask2, MVT::i32),
2641                                  DAG.getConstant(mask3, MVT::i32));
2642
2643   // Word wise arithmetic right shift to generate at least one byte
2644   // that contains sign bits.
2645   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2646   SDValue sraVal = DAG.getNode(ISD::SRA,
2647                  dl,
2648                  mvt,
2649                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2650                  DAG.getConstant(31, MVT::i32));
2651
2652   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2653   // and the input value into the lower 64 bits.
2654   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2655       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2656
2657   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2658 }
2659
2660 //! Custom (target-specific) lowering entry point
2661 /*!
2662   This is where LLVM's DAG selection process calls to do target-specific
2663   lowering of nodes.
2664  */
2665 SDValue
2666 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2667 {
2668   unsigned Opc = (unsigned) Op.getOpcode();
2669   EVT VT = Op.getValueType();
2670
2671   switch (Opc) {
2672   default: {
2673 #ifndef NDEBUG
2674     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2675     errs() << "Op.getOpcode() = " << Opc << "\n";
2676     errs() << "*Op.getNode():\n";
2677     Op.getNode()->dump();
2678 #endif
2679     llvm_unreachable(0);
2680   }
2681   case ISD::LOAD:
2682   case ISD::EXTLOAD:
2683   case ISD::SEXTLOAD:
2684   case ISD::ZEXTLOAD:
2685     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2686   case ISD::STORE:
2687     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2688   case ISD::ConstantPool:
2689     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2690   case ISD::GlobalAddress:
2691     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2692   case ISD::JumpTable:
2693     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2694   case ISD::ConstantFP:
2695     return LowerConstantFP(Op, DAG);
2696
2697   // i8, i64 math ops:
2698   case ISD::ADD:
2699   case ISD::SUB:
2700   case ISD::ROTR:
2701   case ISD::ROTL:
2702   case ISD::SRL:
2703   case ISD::SHL:
2704   case ISD::SRA: {
2705     if (VT == MVT::i8)
2706       return LowerI8Math(Op, DAG, Opc, *this);
2707     break;
2708   }
2709
2710   case ISD::FP_TO_SINT:
2711   case ISD::FP_TO_UINT:
2712     return LowerFP_TO_INT(Op, DAG, *this);
2713
2714   case ISD::SINT_TO_FP:
2715   case ISD::UINT_TO_FP:
2716     return LowerINT_TO_FP(Op, DAG, *this);
2717
2718   // Vector-related lowering.
2719   case ISD::BUILD_VECTOR:
2720     return LowerBUILD_VECTOR(Op, DAG);
2721   case ISD::SCALAR_TO_VECTOR:
2722     return LowerSCALAR_TO_VECTOR(Op, DAG);
2723   case ISD::VECTOR_SHUFFLE:
2724     return LowerVECTOR_SHUFFLE(Op, DAG);
2725   case ISD::EXTRACT_VECTOR_ELT:
2726     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2727   case ISD::INSERT_VECTOR_ELT:
2728     return LowerINSERT_VECTOR_ELT(Op, DAG);
2729
2730   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2731   case ISD::AND:
2732   case ISD::OR:
2733   case ISD::XOR:
2734     return LowerByteImmed(Op, DAG);
2735
2736   // Vector and i8 multiply:
2737   case ISD::MUL:
2738     if (VT == MVT::i8)
2739       return LowerI8Math(Op, DAG, Opc, *this);
2740
2741   case ISD::CTPOP:
2742     return LowerCTPOP(Op, DAG);
2743
2744   case ISD::SELECT_CC:
2745     return LowerSELECT_CC(Op, DAG, *this);
2746
2747   case ISD::SETCC:
2748     return LowerSETCC(Op, DAG, *this);
2749
2750   case ISD::TRUNCATE:
2751     return LowerTRUNCATE(Op, DAG);
2752
2753   case ISD::SIGN_EXTEND:
2754     return LowerSIGN_EXTEND(Op, DAG);
2755   }
2756
2757   return SDValue();
2758 }
2759
2760 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2761                                            SmallVectorImpl<SDValue>&Results,
2762                                            SelectionDAG &DAG)
2763 {
2764 #if 0
2765   unsigned Opc = (unsigned) N->getOpcode();
2766   EVT OpVT = N->getValueType(0);
2767
2768   switch (Opc) {
2769   default: {
2770     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2771     errs() << "Op.getOpcode() = " << Opc << "\n";
2772     errs() << "*Op.getNode():\n";
2773     N->dump();
2774     abort();
2775     /*NOTREACHED*/
2776   }
2777   }
2778 #endif
2779
2780   /* Otherwise, return unchanged */
2781 }
2782
2783 //===----------------------------------------------------------------------===//
2784 // Target Optimization Hooks
2785 //===----------------------------------------------------------------------===//
2786
2787 SDValue
2788 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2789 {
2790 #if 0
2791   TargetMachine &TM = getTargetMachine();
2792 #endif
2793   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2794   SelectionDAG &DAG = DCI.DAG;
2795   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2796   EVT NodeVT = N->getValueType(0);      // The node's value type
2797   EVT Op0VT = Op0.getValueType();       // The first operand's result
2798   SDValue Result;                       // Initially, empty result
2799   DebugLoc dl = N->getDebugLoc();
2800
2801   switch (N->getOpcode()) {
2802   default: break;
2803   case ISD::ADD: {
2804     SDValue Op1 = N->getOperand(1);
2805
2806     if (Op0.getOpcode() == SPUISD::IndirectAddr
2807         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2808       // Normalize the operands to reduce repeated code
2809       SDValue IndirectArg = Op0, AddArg = Op1;
2810
2811       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2812         IndirectArg = Op1;
2813         AddArg = Op0;
2814       }
2815
2816       if (isa<ConstantSDNode>(AddArg)) {
2817         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2818         SDValue IndOp1 = IndirectArg.getOperand(1);
2819
2820         if (CN0->isNullValue()) {
2821           // (add (SPUindirect <arg>, <arg>), 0) ->
2822           // (SPUindirect <arg>, <arg>)
2823
2824 #if !defined(NDEBUG)
2825           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2826             errs() << "\n"
2827                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2828                  << "With:    (SPUindirect <arg>, <arg>)\n";
2829           }
2830 #endif
2831
2832           return IndirectArg;
2833         } else if (isa<ConstantSDNode>(IndOp1)) {
2834           // (add (SPUindirect <arg>, <const>), <const>) ->
2835           // (SPUindirect <arg>, <const + const>)
2836           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2837           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2838           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2839
2840 #if !defined(NDEBUG)
2841           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2842             errs() << "\n"
2843                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2844                  << "), " << CN0->getSExtValue() << ")\n"
2845                  << "With:    (SPUindirect <arg>, "
2846                  << combinedConst << ")\n";
2847           }
2848 #endif
2849
2850           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2851                              IndirectArg, combinedValue);
2852         }
2853       }
2854     }
2855     break;
2856   }
2857   case ISD::SIGN_EXTEND:
2858   case ISD::ZERO_EXTEND:
2859   case ISD::ANY_EXTEND: {
2860     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2861       // (any_extend (SPUextract_elt0 <arg>)) ->
2862       // (SPUextract_elt0 <arg>)
2863       // Types must match, however...
2864 #if !defined(NDEBUG)
2865       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2866         errs() << "\nReplace: ";
2867         N->dump(&DAG);
2868         errs() << "\nWith:    ";
2869         Op0.getNode()->dump(&DAG);
2870         errs() << "\n";
2871       }
2872 #endif
2873
2874       return Op0;
2875     }
2876     break;
2877   }
2878   case SPUISD::IndirectAddr: {
2879     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2880       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2881       if (CN != 0 && CN->getZExtValue() == 0) {
2882         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2883         // (SPUaform <addr>, 0)
2884
2885         DEBUG(errs() << "Replace: ");
2886         DEBUG(N->dump(&DAG));
2887         DEBUG(errs() << "\nWith:    ");
2888         DEBUG(Op0.getNode()->dump(&DAG));
2889         DEBUG(errs() << "\n");
2890
2891         return Op0;
2892       }
2893     } else if (Op0.getOpcode() == ISD::ADD) {
2894       SDValue Op1 = N->getOperand(1);
2895       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2896         // (SPUindirect (add <arg>, <arg>), 0) ->
2897         // (SPUindirect <arg>, <arg>)
2898         if (CN1->isNullValue()) {
2899
2900 #if !defined(NDEBUG)
2901           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2902             errs() << "\n"
2903                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2904                  << "With:    (SPUindirect <arg>, <arg>)\n";
2905           }
2906 #endif
2907
2908           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2909                              Op0.getOperand(0), Op0.getOperand(1));
2910         }
2911       }
2912     }
2913     break;
2914   }
2915   case SPUISD::SHLQUAD_L_BITS:
2916   case SPUISD::SHLQUAD_L_BYTES:
2917   case SPUISD::ROTBYTES_LEFT: {
2918     SDValue Op1 = N->getOperand(1);
2919
2920     // Kill degenerate vector shifts:
2921     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2922       if (CN->isNullValue()) {
2923         Result = Op0;
2924       }
2925     }
2926     break;
2927   }
2928   case SPUISD::PREFSLOT2VEC: {
2929     switch (Op0.getOpcode()) {
2930     default:
2931       break;
2932     case ISD::ANY_EXTEND:
2933     case ISD::ZERO_EXTEND:
2934     case ISD::SIGN_EXTEND: {
2935       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2936       // <arg>
2937       // but only if the SPUprefslot2vec and <arg> types match.
2938       SDValue Op00 = Op0.getOperand(0);
2939       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2940         SDValue Op000 = Op00.getOperand(0);
2941         if (Op000.getValueType() == NodeVT) {
2942           Result = Op000;
2943         }
2944       }
2945       break;
2946     }
2947     case SPUISD::VEC2PREFSLOT: {
2948       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2949       // <arg>
2950       Result = Op0.getOperand(0);
2951       break;
2952     }
2953     }
2954     break;
2955   }
2956   }
2957
2958   // Otherwise, return unchanged.
2959 #ifndef NDEBUG
2960   if (Result.getNode()) {
2961     DEBUG(errs() << "\nReplace.SPU: ");
2962     DEBUG(N->dump(&DAG));
2963     DEBUG(errs() << "\nWith:        ");
2964     DEBUG(Result.getNode()->dump(&DAG));
2965     DEBUG(errs() << "\n");
2966   }
2967 #endif
2968
2969   return Result;
2970 }
2971
2972 //===----------------------------------------------------------------------===//
2973 // Inline Assembly Support
2974 //===----------------------------------------------------------------------===//
2975
2976 /// getConstraintType - Given a constraint letter, return the type of
2977 /// constraint it is for this target.
2978 SPUTargetLowering::ConstraintType
2979 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2980   if (ConstraintLetter.size() == 1) {
2981     switch (ConstraintLetter[0]) {
2982     default: break;
2983     case 'b':
2984     case 'r':
2985     case 'f':
2986     case 'v':
2987     case 'y':
2988       return C_RegisterClass;
2989     }
2990   }
2991   return TargetLowering::getConstraintType(ConstraintLetter);
2992 }
2993
2994 std::pair<unsigned, const TargetRegisterClass*>
2995 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2996                                                 EVT VT) const
2997 {
2998   if (Constraint.size() == 1) {
2999     // GCC RS6000 Constraint Letters
3000     switch (Constraint[0]) {
3001     case 'b':   // R1-R31
3002     case 'r':   // R0-R31
3003       if (VT == MVT::i64)
3004         return std::make_pair(0U, SPU::R64CRegisterClass);
3005       return std::make_pair(0U, SPU::R32CRegisterClass);
3006     case 'f':
3007       if (VT == MVT::f32)
3008         return std::make_pair(0U, SPU::R32FPRegisterClass);
3009       else if (VT == MVT::f64)
3010         return std::make_pair(0U, SPU::R64FPRegisterClass);
3011       break;
3012     case 'v':
3013       return std::make_pair(0U, SPU::GPRCRegisterClass);
3014     }
3015   }
3016
3017   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3018 }
3019
3020 //! Compute used/known bits for a SPU operand
3021 void
3022 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3023                                                   const APInt &Mask,
3024                                                   APInt &KnownZero,
3025                                                   APInt &KnownOne,
3026                                                   const SelectionDAG &DAG,
3027                                                   unsigned Depth ) const {
3028 #if 0
3029   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3030
3031   switch (Op.getOpcode()) {
3032   default:
3033     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3034     break;
3035   case CALL:
3036   case SHUFB:
3037   case SHUFFLE_MASK:
3038   case CNTB:
3039   case SPUISD::PREFSLOT2VEC:
3040   case SPUISD::LDRESULT:
3041   case SPUISD::VEC2PREFSLOT:
3042   case SPUISD::SHLQUAD_L_BITS:
3043   case SPUISD::SHLQUAD_L_BYTES:
3044   case SPUISD::VEC_ROTL:
3045   case SPUISD::VEC_ROTR:
3046   case SPUISD::ROTBYTES_LEFT:
3047   case SPUISD::SELECT_MASK:
3048   case SPUISD::SELB:
3049   }
3050 #endif
3051 }
3052
3053 unsigned
3054 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3055                                                    unsigned Depth) const {
3056   switch (Op.getOpcode()) {
3057   default:
3058     return 1;
3059
3060   case ISD::SETCC: {
3061     EVT VT = Op.getValueType();
3062
3063     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3064       VT = MVT::i32;
3065     }
3066     return VT.getSizeInBits();
3067   }
3068   }
3069 }
3070
3071 // LowerAsmOperandForConstraint
3072 void
3073 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3074                                                 char ConstraintLetter,
3075                                                 bool hasMemory,
3076                                                 std::vector<SDValue> &Ops,
3077                                                 SelectionDAG &DAG) const {
3078   // Default, for the time being, to the base class handler
3079   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3080                                                Ops, DAG);
3081 }
3082
3083 /// isLegalAddressImmediate - Return true if the integer value can be used
3084 /// as the offset of the target addressing mode.
3085 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3086                                                 const Type *Ty) const {
3087   // SPU's addresses are 256K:
3088   return (V > -(1 << 18) && V < (1 << 18) - 1);
3089 }
3090
3091 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3092   return false;
3093 }
3094
3095 bool
3096 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3097   // The SPU target isn't yet aware of offsets.
3098   return false;
3099 }