lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "SPUMachineFunction.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Intrinsics.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/Target/TargetOptions.h"
  31 #include "llvm/ADT/VectorExtras.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include <map>
  37
  38 using namespace llvm;
  39
  40 // Used in getTargetNodeName() below
  41 namespace {
  42   std::map<unsigned, const char *> node_names;
  43
  44   //! EVT mapping to useful data for Cell SPU
  45   struct valtype_map_s {
  46     EVT   valtype;
  47     int   prefslot_byte;
  48   };
  49
  50   const valtype_map_s valtype_map[] = {
  51     { MVT::i1,   3 },
  52     { MVT::i8,   3 },
  53     { MVT::i16,  2 },
  54     { MVT::i32,  0 },
  55     { MVT::f32,  0 },
  56     { MVT::i64,  0 },
  57     { MVT::f64,  0 },
  58     { MVT::i128, 0 }
  59   };
  60
  61   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  62
  63   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  64     const valtype_map_s *retval = 0;
  65
  66     for (size_t i = 0; i < n_valtype_map; ++i) {
  67       if (valtype_map[i].valtype == VT) {
  68         retval = valtype_map + i;
  69         break;
  70       }
  71     }
  72
  73 #ifndef NDEBUG
  74     if (retval == 0) {
  75       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  76                          Twine(VT.getEVTString()));
  77     }
  78 #endif
  79
  80     return retval;
  81   }
  82
  83   //! Expand a library call into an actual call DAG node
  84   /*!
  85    \note
  86    This code is taken from SelectionDAGLegalize, since it is not exposed as
  87    part of the LLVM SelectionDAG API.
  88    */
  89
  90   SDValue
  91   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  92                 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
  93     // The input chain to this libcall is the entry node of the function.
  94     // Legalizing the call will automatically add the previous call to the
  95     // dependence.
  96     SDValue InChain = DAG.getEntryNode();
  97
  98     TargetLowering::ArgListTy Args;
  99     TargetLowering::ArgListEntry Entry;
 100     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 101       EVT ArgVT = Op.getOperand(i).getValueType();
 102       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 103       Entry.Node = Op.getOperand(i);
 104       Entry.Ty = ArgTy;
 105       Entry.isSExt = isSigned;
 106       Entry.isZExt = !isSigned;
 107       Args.push_back(Entry);
 108     }
 109     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 110                                            TLI.getPointerTy());
 111
 112     // Splice the libcall in wherever FindInputOutputChains tells us to.
 113     const Type *RetTy =
 114                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 115     std::pair<SDValue, SDValue> CallInfo =
 116             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 117                             0, TLI.getLibcallCallingConv(LC), false,
 118                             /*isReturnValueUsed=*/true,
 119                             Callee, Args, DAG, Op.getDebugLoc());
 120
 121     return CallInfo.first;
 122   }
 123 }
 124
 125 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 126   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 127     SPUTM(TM) {
 128   // Fold away setcc operations if possible.
 129   setPow2DivIsCheap();
 130
 131   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 132   setUseUnderscoreSetJmp(true);
 133   setUseUnderscoreLongJmp(true);
 134
 135   // Set RTLIB libcall names as used by SPU:
 136   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 137
 138   // Set up the SPU's register classes:
 139   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 140   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 141   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 142   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 143   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 144   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 145   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 146
 147   // SPU has no sign or zero extended loads for i1, i8, i16:
 148   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 149   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 150   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 151
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 153   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 154
 155   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 156   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 159
 160   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 161
 162   // SPU constant load actions are custom lowered:
 163   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 164   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 165
 166   // SPU's loads and stores have to be custom lowered:
 167   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 168        ++sctype) {
 169     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 170
 171     setOperationAction(ISD::LOAD,   VT, Custom);
 172     setOperationAction(ISD::STORE,  VT, Custom);
 173     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 174     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 175     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 176
 177     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 178       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 179       setTruncStoreAction(VT, StoreVT, Expand);
 180     }
 181   }
 182
 183   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 184        ++sctype) {
 185     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 186
 187     setOperationAction(ISD::LOAD,   VT, Custom);
 188     setOperationAction(ISD::STORE,  VT, Custom);
 189
 190     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 191       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 192       setTruncStoreAction(VT, StoreVT, Expand);
 193     }
 194   }
 195
 196   // Expand the jumptable branches
 197   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 198   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 199
 200   // Custom lower SELECT_CC for most cases, but expand by default
 201   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 202   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 206
 207   // SPU has no intrinsics for these particular operations:
 208   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 209
 210   // SPU has no division/remainder instructions
 211   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 212   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 213   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 214   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 215   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 216   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 217   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 218   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 219   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 220   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 221   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 222   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 223   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 224   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 225   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 226   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 227   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 228   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 229   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 230   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 231   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 232   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 233   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 234   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 235   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 236   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 237   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 238   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 239   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 240   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 241
 242   // We don't support sin/cos/sqrt/fmod
 243   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 244   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 245   setOperationAction(ISD::FREM , MVT::f64, Expand);
 246   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 248   setOperationAction(ISD::FREM , MVT::f32, Expand);
 249
 250   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 251   // for f32!)
 252   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 253   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 254
 255   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 256   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 257
 258   // SPU can do rotate right and left, so legalize it... but customize for i8
 259   // because instructions don't exist.
 260
 261   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 262   //        .td files.
 263   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 264   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 265   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 266
 267   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 268   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 269   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 270
 271   // SPU has no native version of shift left/right for i8
 272   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 273   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 274   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 275
 276   // Make these operations legal and handle them during instruction selection:
 277   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 279   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 280
 281   // Custom lower i8, i32 and i64 multiplications
 282   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 283   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 284   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 285
 286   // Expand double-width multiplication
 287   // FIXME: It would probably be reasonable to support some of these operations
 288   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 289   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 290   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 291   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 292   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 293   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 294   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 295   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 298   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 299   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 300   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 301   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 302   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 303   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 304
 305   // Need to custom handle (some) common i8, i64 math ops
 306   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 307   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 308   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 309   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 310
 311   // SPU does not have BSWAP. It does have i32 support CTLZ.
 312   // CTPOP has to be custom lowered.
 313   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 314   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 315
 316   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 317   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 321
 322   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 323   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 327
 328   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 329   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 330   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 331   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 332   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 333
 334   // SPU has a version of select that implements (a&~c)|(b&c), just like
 335   // select ought to work:
 336   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 337   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 338   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 339   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 340
 341   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 342   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 343   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 346
 347   // Custom lower i128 -> i64 truncates
 348   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 349
 350   // Custom lower i32/i64 -> i128 sign extend
 351   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 352
 353   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 354   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 355   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 356   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 357   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 358   // to expand to a libcall, hence the custom lowering:
 359   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 360   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 361   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 362   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 363   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 364   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 365
 366   // FDIV on SPU requires custom lowering
 367   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 368
 369   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 373   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 378
 379   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 380   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 383
 384   // We cannot sextinreg(i1).  Expand to shifts.
 385   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 386
 387   // We want to legalize GlobalAddress and ConstantPool nodes into the
 388   // appropriate instructions to materialize the address.
 389   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 390        ++sctype) {
 391     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 392
 393     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 394     setOperationAction(ISD::ConstantPool,   VT, Custom);
 395     setOperationAction(ISD::JumpTable,      VT, Custom);
 396   }
 397
 398   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 399   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 400
 401   // Use the default implementation.
 402   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 403   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 404   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 405   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 406   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 407   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 408   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 409
 410   // Cell SPU has instructions for converting between i64 and fp.
 411   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 412   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 413
 414   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 415   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 416
 417   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 418   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 419
 420   // First set operation action for all vector types to expand. Then we
 421   // will selectively turn on ones that can be effectively codegen'd.
 422   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 423   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 428
 429   // "Odd size" vector classes that we're willing to support:
 430   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 431   addRegisterClass(MVT::v2f32, SPU::VECREGRegisterClass);
 432
 433   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 434        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 435     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 436
 437     // add/sub are legal for all supported vector VT's.
 438     setOperationAction(ISD::ADD,     VT, Legal);
 439     setOperationAction(ISD::SUB,     VT, Legal);
 440     // mul has to be custom lowered.
 441     setOperationAction(ISD::MUL,     VT, Legal);
 442
 443     setOperationAction(ISD::AND,     VT, Legal);
 444     setOperationAction(ISD::OR,      VT, Legal);
 445     setOperationAction(ISD::XOR,     VT, Legal);
 446     setOperationAction(ISD::LOAD,    VT, Legal);
 447     setOperationAction(ISD::SELECT,  VT, Legal);
 448     setOperationAction(ISD::STORE,   VT, Legal);
 449
 450     // These operations need to be expanded:
 451     setOperationAction(ISD::SDIV,    VT, Expand);
 452     setOperationAction(ISD::SREM,    VT, Expand);
 453     setOperationAction(ISD::UDIV,    VT, Expand);
 454     setOperationAction(ISD::UREM,    VT, Expand);
 455
 456     // Custom lower build_vector, constant pool spills, insert and
 457     // extract vector elements:
 458     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 459     setOperationAction(ISD::ConstantPool, VT, Custom);
 460     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 461     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 462     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 463     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 464   }
 465
 466   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 467   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 468   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 469   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 470
 471   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 472
 473   setShiftAmountType(MVT::i32);
 474   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 475
 476   setStackPointerRegisterToSaveRestore(SPU::R1);
 477
 478   // We have target-specific dag combine patterns for the following nodes:
 479   setTargetDAGCombine(ISD::ADD);
 480   setTargetDAGCombine(ISD::ZERO_EXTEND);
 481   setTargetDAGCombine(ISD::SIGN_EXTEND);
 482   setTargetDAGCombine(ISD::ANY_EXTEND);
 483
 484   computeRegisterProperties();
 485
 486   // Set pre-RA register scheduler default to BURR, which produces slightly
 487   // better code than the default (could also be TDRR, but TargetLowering.h
 488   // needs a mod to support that model):
 489   setSchedulingPreference(Sched::RegPressure);
 490 }
 491
 492 const char *
 493 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 494 {
 495   if (node_names.empty()) {
 496     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 497     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 498     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 499     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 500     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 501     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 502     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 503     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 504     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 505     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 506     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 507     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 508     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 509     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 510     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 511     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 512     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 513     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 514     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 515             "SPUISD::ROTBYTES_LEFT_BITS";
 516     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 517     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 518     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 519     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 520     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 521   }
 522
 523   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 524
 525   return ((i != node_names.end()) ? i->second : 0);
 526 }
 527
 528 /// getFunctionAlignment - Return the Log2 alignment of this function.
 529 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 530   return 3;
 531 }
 532
 533 //===----------------------------------------------------------------------===//
 534 // Return the Cell SPU's SETCC result type
 535 //===----------------------------------------------------------------------===//
 536
 537 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 538   // i16 and i32 are valid SETCC result types
 539   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 540     VT.getSimpleVT().SimpleTy :
 541     MVT::i32);
 542 }
 543
 544 //===----------------------------------------------------------------------===//
 545 // Calling convention code:
 546 //===----------------------------------------------------------------------===//
 547
 548 #include "SPUGenCallingConv.inc"
 549
 550 //===----------------------------------------------------------------------===//
 551 //  LowerOperation implementation
 552 //===----------------------------------------------------------------------===//
 553
 554 /// Custom lower loads for CellSPU
 555 /*!
 556  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 557  within a 16-byte block, we have to rotate to extract the requested element.
 558
 559  For extending loads, we also want to ensure that the following sequence is
 560  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 561
 562 \verbatim
 563 %1  v16i8,ch = load
 564 %2  v16i8,ch = rotate %1
 565 %3  v4f8, ch = bitconvert %2
 566 %4  f32      = vec2perfslot %3
 567 %5  f64      = fp_extend %4
 568 \endverbatim
 569 */
 570 static SDValue
 571 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 572   LoadSDNode *LN = cast<LoadSDNode>(Op);
 573   SDValue the_chain = LN->getChain();
 574   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 575   EVT InVT = LN->getMemoryVT();
 576   EVT OutVT = Op.getValueType();
 577   ISD::LoadExtType ExtType = LN->getExtensionType();
 578   unsigned alignment = LN->getAlignment();
 579   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 580   DebugLoc dl = Op.getDebugLoc();
 581
 582   switch (LN->getAddressingMode()) {
 583   case ISD::UNINDEXED: {
 584     SDValue result;
 585     SDValue basePtr = LN->getBasePtr();
 586     SDValue rotate;
 587
 588     if (alignment == 16) {
 589       ConstantSDNode *CN;
 590
 591       // Special cases for a known aligned load to simplify the base pointer
 592       // and the rotation amount:
 593       if (basePtr.getOpcode() == ISD::ADD
 594           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 595         // Known offset into basePtr
 596         int64_t offset = CN->getSExtValue();
 597         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 598
 599         if (rotamt < 0)
 600           rotamt += 16;
 601
 602         rotate = DAG.getConstant(rotamt, MVT::i16);
 603
 604         // Simplify the base pointer for this case:
 605         basePtr = basePtr.getOperand(0);
 606         if ((offset & ~0xf) > 0) {
 607           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 608                                 basePtr,
 609                                 DAG.getConstant((offset & ~0xf), PtrVT));
 610         }
 611       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 612                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 613                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 614                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 615         // Plain aligned a-form address: rotate into preferred slot
 616         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 617         int64_t rotamt = -vtm->prefslot_byte;
 618         if (rotamt < 0)
 619           rotamt += 16;
 620         rotate = DAG.getConstant(rotamt, MVT::i16);
 621       } else {
 622         // Offset the rotate amount by the basePtr and the preferred slot
 623         // byte offset
 624         int64_t rotamt = -vtm->prefslot_byte;
 625         if (rotamt < 0)
 626           rotamt += 16;
 627         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 628                              basePtr,
 629                              DAG.getConstant(rotamt, PtrVT));
 630       }
 631     } else {
 632       // Unaligned load: must be more pessimistic about addressing modes:
 633       if (basePtr.getOpcode() == ISD::ADD) {
 634         MachineFunction &MF = DAG.getMachineFunction();
 635         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 636         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 637         SDValue Flag;
 638
 639         SDValue Op0 = basePtr.getOperand(0);
 640         SDValue Op1 = basePtr.getOperand(1);
 641
 642         if (isa<ConstantSDNode>(Op1)) {
 643           // Convert the (add <ptr>, <const>) to an indirect address contained
 644           // in a register. Note that this is done because we need to avoid
 645           // creating a 0(reg) d-form address due to the SPU's block loads.
 646           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 647           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 648           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 649         } else {
 650           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 651           // will likely be lowered as a reg(reg) x-form address.
 652           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 653         }
 654       } else {
 655         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 656                               basePtr,
 657                               DAG.getConstant(0, PtrVT));
 658       }
 659
 660       // Offset the rotate amount by the basePtr and the preferred slot
 661       // byte offset
 662       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 663                            basePtr,
 664                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 665     }
 666
 667     // Re-emit as a v16i8 vector load
 668     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 669                          LN->getSrcValue(), LN->getSrcValueOffset(),
 670                          LN->isVolatile(), LN->isNonTemporal(), 16);
 671
 672     // Update the chain
 673     the_chain = result.getValue(1);
 674
 675     // Rotate into the preferred slot:
 676     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 677                          result.getValue(0), rotate);
 678
 679     // Convert the loaded v16i8 vector to the appropriate vector type
 680     // specified by the operand:
 681     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 682                                  InVT, (128 / InVT.getSizeInBits()));
 683     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 684                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 685
 686     // Handle extending loads by extending the scalar result:
 687     if (ExtType == ISD::SEXTLOAD) {
 688       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 689     } else if (ExtType == ISD::ZEXTLOAD) {
 690       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 691     } else if (ExtType == ISD::EXTLOAD) {
 692       unsigned NewOpc = ISD::ANY_EXTEND;
 693
 694       if (OutVT.isFloatingPoint())
 695         NewOpc = ISD::FP_EXTEND;
 696
 697       result = DAG.getNode(NewOpc, dl, OutVT, result);
 698     }
 699
 700     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 701     SDValue retops[2] = {
 702       result,
 703       the_chain
 704     };
 705
 706     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 707                          retops, sizeof(retops) / sizeof(retops[0]));
 708     return result;
 709   }
 710   case ISD::PRE_INC:
 711   case ISD::PRE_DEC:
 712   case ISD::POST_INC:
 713   case ISD::POST_DEC:
 714   case ISD::LAST_INDEXED_MODE:
 715     {
 716       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 717                          "than UNINDEXED\n" +
 718                          Twine((unsigned)LN->getAddressingMode()));
 719       /*NOTREACHED*/
 720     }
 721   }
 722
 723   return SDValue();
 724 }
 725
 726 /// Custom lower stores for CellSPU
 727 /*!
 728  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 729  within a 16-byte block, we have to generate a shuffle to insert the
 730  requested element into its place, then store the resulting block.
 731  */
 732 static SDValue
 733 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 734   StoreSDNode *SN = cast<StoreSDNode>(Op);
 735   SDValue Value = SN->getValue();
 736   EVT VT = Value.getValueType();
 737   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 738   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 739   DebugLoc dl = Op.getDebugLoc();
 740   unsigned alignment = SN->getAlignment();
 741
 742   switch (SN->getAddressingMode()) {
 743   case ISD::UNINDEXED: {
 744     // The vector type we really want to load from the 16-byte chunk.
 745     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 746                                  VT, (128 / VT.getSizeInBits()));
 747
 748     SDValue alignLoadVec;
 749     SDValue basePtr = SN->getBasePtr();
 750     SDValue the_chain = SN->getChain();
 751     SDValue insertEltOffs;
 752
 753     if (alignment == 16) {
 754       ConstantSDNode *CN;
 755
 756       // Special cases for a known aligned load to simplify the base pointer
 757       // and insertion byte:
 758       if (basePtr.getOpcode() == ISD::ADD
 759           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 760         // Known offset into basePtr
 761         int64_t offset = CN->getSExtValue();
 762
 763         // Simplify the base pointer for this case:
 764         basePtr = basePtr.getOperand(0);
 765         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 766                                     basePtr,
 767                                     DAG.getConstant((offset & 0xf), PtrVT));
 768
 769         if ((offset & ~0xf) > 0) {
 770           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 771                                 basePtr,
 772                                 DAG.getConstant((offset & ~0xf), PtrVT));
 773         }
 774       } else {
 775         // Otherwise, assume it's at byte 0 of basePtr
 776         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 777                                     basePtr,
 778                                     DAG.getConstant(0, PtrVT));
 779       }
 780     } else {
 781       // Unaligned load: must be more pessimistic about addressing modes:
 782       if (basePtr.getOpcode() == ISD::ADD) {
 783         MachineFunction &MF = DAG.getMachineFunction();
 784         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 785         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 786         SDValue Flag;
 787
 788         SDValue Op0 = basePtr.getOperand(0);
 789         SDValue Op1 = basePtr.getOperand(1);
 790
 791         if (isa<ConstantSDNode>(Op1)) {
 792           // Convert the (add <ptr>, <const>) to an indirect address contained
 793           // in a register. Note that this is done because we need to avoid
 794           // creating a 0(reg) d-form address due to the SPU's block loads.
 795           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 796           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 797           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 798         } else {
 799           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 800           // will likely be lowered as a reg(reg) x-form address.
 801           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 802         }
 803       } else {
 804         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 805                               basePtr,
 806                               DAG.getConstant(0, PtrVT));
 807       }
 808
 809       // Insertion point is solely determined by basePtr's contents
 810       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 811                                   basePtr,
 812                                   DAG.getConstant(0, PtrVT));
 813     }
 814
 815     // Re-emit as a v16i8 vector load
 816     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 817                                SN->getSrcValue(), SN->getSrcValueOffset(),
 818                                SN->isVolatile(), SN->isNonTemporal(), 16);
 819
 820     // Update the chain
 821     the_chain = alignLoadVec.getValue(1);
 822
 823     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 824     SDValue theValue = SN->getValue();
 825     SDValue result;
 826
 827     if (StVT != VT
 828         && (theValue.getOpcode() == ISD::AssertZext
 829             || theValue.getOpcode() == ISD::AssertSext)) {
 830       // Drill down and get the value for zero- and sign-extended
 831       // quantities
 832       theValue = theValue.getOperand(0);
 833     }
 834
 835     // If the base pointer is already a D-form address, then just create
 836     // a new D-form address with a slot offset and the orignal base pointer.
 837     // Otherwise generate a D-form address with the slot offset relative
 838     // to the stack pointer, which is always aligned.
 839 #if !defined(NDEBUG)
 840       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 841         errs() << "CellSPU LowerSTORE: basePtr = ";
 842         basePtr.getNode()->dump(&DAG);
 843         errs() << "\n";
 844       }
 845 #endif
 846
 847     SDValue insertEltOp =
 848             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 849     SDValue vectorizeOp =
 850             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 851
 852     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 853                          vectorizeOp, alignLoadVec,
 854                          DAG.getNode(ISD::BIT_CONVERT, dl,
 855                                      MVT::v4i32, insertEltOp));
 856
 857     result = DAG.getStore(the_chain, dl, result, basePtr,
 858                           LN->getSrcValue(), LN->getSrcValueOffset(),
 859                           LN->isVolatile(), LN->isNonTemporal(),
 860                           LN->getAlignment());
 861
 862 #if 0 && !defined(NDEBUG)
 863     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 864       const SDValue &currentRoot = DAG.getRoot();
 865
 866       DAG.setRoot(result);
 867       errs() << "------- CellSPU:LowerStore result:\n";
 868       DAG.dump();
 869       errs() << "-------\n";
 870       DAG.setRoot(currentRoot);
 871     }
 872 #endif
 873
 874     return result;
 875     /*UNREACHED*/
 876   }
 877   case ISD::PRE_INC:
 878   case ISD::PRE_DEC:
 879   case ISD::POST_INC:
 880   case ISD::POST_DEC:
 881   case ISD::LAST_INDEXED_MODE:
 882     {
 883       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 884                          "than UNINDEXED\n" +
 885                          Twine((unsigned)SN->getAddressingMode()));
 886       /*NOTREACHED*/
 887     }
 888   }
 889
 890   return SDValue();
 891 }
 892
 893 //! Generate the address of a constant pool entry.
 894 static SDValue
 895 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 896   EVT PtrVT = Op.getValueType();
 897   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 898   const Constant *C = CP->getConstVal();
 899   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 900   SDValue Zero = DAG.getConstant(0, PtrVT);
 901   const TargetMachine &TM = DAG.getTarget();
 902   // FIXME there is no actual debug info here
 903   DebugLoc dl = Op.getDebugLoc();
 904
 905   if (TM.getRelocationModel() == Reloc::Static) {
 906     if (!ST->usingLargeMem()) {
 907       // Just return the SDValue with the constant pool address in it.
 908       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 909     } else {
 910       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 911       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 912       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 913     }
 914   }
 915
 916   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 917                    " not supported.");
 918   return SDValue();
 919 }
 920
 921 //! Alternate entry point for generating the address of a constant pool entry
 922 SDValue
 923 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 924   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 925 }
 926
 927 static SDValue
 928 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 929   EVT PtrVT = Op.getValueType();
 930   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 931   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 932   SDValue Zero = DAG.getConstant(0, PtrVT);
 933   const TargetMachine &TM = DAG.getTarget();
 934   // FIXME there is no actual debug info here
 935   DebugLoc dl = Op.getDebugLoc();
 936
 937   if (TM.getRelocationModel() == Reloc::Static) {
 938     if (!ST->usingLargeMem()) {
 939       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 940     } else {
 941       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 942       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 943       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 944     }
 945   }
 946
 947   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 948                    " not supported.");
 949   return SDValue();
 950 }
 951
 952 static SDValue
 953 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 954   EVT PtrVT = Op.getValueType();
 955   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 956   const GlobalValue *GV = GSDN->getGlobal();
 957   SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
 958                                           PtrVT, GSDN->getOffset());
 959   const TargetMachine &TM = DAG.getTarget();
 960   SDValue Zero = DAG.getConstant(0, PtrVT);
 961   // FIXME there is no actual debug info here
 962   DebugLoc dl = Op.getDebugLoc();
 963
 964   if (TM.getRelocationModel() == Reloc::Static) {
 965     if (!ST->usingLargeMem()) {
 966       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 967     } else {
 968       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 969       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 970       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 971     }
 972   } else {
 973     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 974                       "not supported.");
 975     /*NOTREACHED*/
 976   }
 977
 978   return SDValue();
 979 }
 980
 981 //! Custom lower double precision floating point constants
 982 static SDValue
 983 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 984   EVT VT = Op.getValueType();
 985   // FIXME there is no actual debug info here
 986   DebugLoc dl = Op.getDebugLoc();
 987
 988   if (VT == MVT::f64) {
 989     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 990
 991     assert((FP != 0) &&
 992            "LowerConstantFP: Node is not ConstantFPSDNode");
 993
 994     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 995     SDValue T = DAG.getConstant(dbits, MVT::i64);
 996     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 997     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 998                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 999   }
1000
1001   return SDValue();
1002 }
1003
1004 SDValue
1005 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1006                                         CallingConv::ID CallConv, bool isVarArg,
1007                                         const SmallVectorImpl<ISD::InputArg>
1008                                           &Ins,
1009                                         DebugLoc dl, SelectionDAG &DAG,
1010                                         SmallVectorImpl<SDValue> &InVals)
1011                                           const {
1012
1013   MachineFunction &MF = DAG.getMachineFunction();
1014   MachineFrameInfo *MFI = MF.getFrameInfo();
1015   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1016   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1017
1018   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1019   unsigned ArgRegIdx = 0;
1020   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1021
1022   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1023
1024   SmallVector<CCValAssign, 16> ArgLocs;
1025   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1026                  *DAG.getContext());
1027   // FIXME: allow for other calling conventions
1028   CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1029
1030   // Add DAG nodes to load the arguments or copy them out of registers.
1031   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1032     EVT ObjectVT = Ins[ArgNo].VT;
1033     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1034     SDValue ArgVal;
1035     CCValAssign &VA = ArgLocs[ArgNo];
1036
1037     if (VA.isRegLoc()) {
1038       const TargetRegisterClass *ArgRegClass;
1039
1040       switch (ObjectVT.getSimpleVT().SimpleTy) {
1041       default:
1042         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1043                            Twine(ObjectVT.getEVTString()));
1044       case MVT::i8:
1045         ArgRegClass = &SPU::R8CRegClass;
1046         break;
1047       case MVT::i16:
1048         ArgRegClass = &SPU::R16CRegClass;
1049         break;
1050       case MVT::i32:
1051         ArgRegClass = &SPU::R32CRegClass;
1052         break;
1053       case MVT::i64:
1054         ArgRegClass = &SPU::R64CRegClass;
1055         break;
1056       case MVT::i128:
1057         ArgRegClass = &SPU::GPRCRegClass;
1058         break;
1059       case MVT::f32:
1060         ArgRegClass = &SPU::R32FPRegClass;
1061         break;
1062       case MVT::f64:
1063         ArgRegClass = &SPU::R64FPRegClass;
1064         break;
1065       case MVT::v2f64:
1066       case MVT::v4f32:
1067       case MVT::v2i64:
1068       case MVT::v4i32:
1069       case MVT::v8i16:
1070       case MVT::v16i8:
1071       case MVT::v2i32:
1072       case MVT::v2f32:
1073         ArgRegClass = &SPU::VECREGRegClass;
1074         break;
1075       }
1076
1077       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1078       RegInfo.addLiveIn(VA.getLocReg(), VReg);
1079       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1080       ++ArgRegIdx;
1081     } else {
1082       // We need to load the argument to a virtual register if we determined
1083       // above that we ran out of physical registers of the appropriate type
1084       // or we're forced to do vararg
1085       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1086       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1087       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1088       ArgOffset += StackSlotSize;
1089     }
1090
1091     InVals.push_back(ArgVal);
1092     // Update the chain
1093     Chain = ArgVal.getOperand(0);
1094   }
1095
1096   // vararg handling:
1097   if (isVarArg) {
1098     // FIXME: we should be able to query the argument registers from
1099     //        tablegen generated code.
1100     static const unsigned ArgRegs[] = {
1101       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1102       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1103       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1104       SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1105       SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1106       SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1107       SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1108       SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1109       SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1110       SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1111       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1112     };
1113     // size of ArgRegs array
1114     unsigned NumArgRegs = 77;
1115
1116     // We will spill (79-3)+1 registers to the stack
1117     SmallVector<SDValue, 79-3+1> MemOps;
1118
1119     // Create the frame slot
1120     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1121       FuncInfo->setVarArgsFrameIndex(
1122         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1123       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1124       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1125       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1126       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1127                                    false, false, 0);
1128       Chain = Store.getOperand(0);
1129       MemOps.push_back(Store);
1130
1131       // Increment address by stack slot size for the next stored argument
1132       ArgOffset += StackSlotSize;
1133     }
1134     if (!MemOps.empty())
1135       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1136                           &MemOps[0], MemOps.size());
1137   }
1138
1139   return Chain;
1140 }
1141
1142 /// isLSAAddress - Return the immediate to use if the specified
1143 /// value is representable as a LSA address.
1144 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1145   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1146   if (!C) return 0;
1147
1148   int Addr = C->getZExtValue();
1149   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1150       (Addr << 14 >> 14) != Addr)
1151     return 0;  // Top 14 bits have to be sext of immediate.
1152
1153   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1154 }
1155
1156 SDValue
1157 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1158                              CallingConv::ID CallConv, bool isVarArg,
1159                              bool &isTailCall,
1160                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1161                              const SmallVectorImpl<SDValue> &OutVals,
1162                              const SmallVectorImpl<ISD::InputArg> &Ins,
1163                              DebugLoc dl, SelectionDAG &DAG,
1164                              SmallVectorImpl<SDValue> &InVals) const {
1165   // CellSPU target does not yet support tail call optimization.
1166   isTailCall = false;
1167
1168   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1169   unsigned NumOps     = Outs.size();
1170   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1171
1172   SmallVector<CCValAssign, 16> ArgLocs;
1173   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1174                  *DAG.getContext());
1175   // FIXME: allow for other calling conventions
1176   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1177
1178   const unsigned NumArgRegs = ArgLocs.size();
1179
1180
1181   // Handy pointer type
1182   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1183
1184   // Set up a copy of the stack pointer for use loading and storing any
1185   // arguments that may not fit in the registers available for argument
1186   // passing.
1187   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1188
1189   // Figure out which arguments are going to go in registers, and which in
1190   // memory.
1191   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1192   unsigned ArgRegIdx = 0;
1193
1194   // Keep track of registers passing arguments
1195   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1196   // And the arguments passed on the stack
1197   SmallVector<SDValue, 8> MemOpChains;
1198
1199   for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1200     SDValue Arg = OutVals[ArgRegIdx];
1201     CCValAssign &VA = ArgLocs[ArgRegIdx];
1202
1203     // PtrOff will be used to store the current argument to the stack if a
1204     // register cannot be found for it.
1205     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1206     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1207
1208     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1209     default: llvm_unreachable("Unexpected ValueType for argument!");
1210     case MVT::i8:
1211     case MVT::i16:
1212     case MVT::i32:
1213     case MVT::i64:
1214     case MVT::i128:
1215     case MVT::f32:
1216     case MVT::f64:
1217     case MVT::v2i64:
1218     case MVT::v2f64:
1219     case MVT::v4f32:
1220     case MVT::v4i32:
1221     case MVT::v8i16:
1222     case MVT::v16i8:
1223       if (ArgRegIdx != NumArgRegs) {
1224         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1225       } else {
1226         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1227                                            false, false, 0));
1228         ArgOffset += StackSlotSize;
1229       }
1230       break;
1231     }
1232   }
1233
1234   // Accumulate how many bytes are to be pushed on the stack, including the
1235   // linkage area, and parameter passing area.  According to the SPU ABI,
1236   // we minimally need space for [LR] and [SP].
1237   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1238
1239   // Insert a call sequence start
1240   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1241                                                             true));
1242
1243   if (!MemOpChains.empty()) {
1244     // Adjust the stack pointer for the stack arguments.
1245     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1246                         &MemOpChains[0], MemOpChains.size());
1247   }
1248
1249   // Build a sequence of copy-to-reg nodes chained together with token chain
1250   // and flag operands which copy the outgoing args into the appropriate regs.
1251   SDValue InFlag;
1252   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1253     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1254                              RegsToPass[i].second, InFlag);
1255     InFlag = Chain.getValue(1);
1256   }
1257
1258   SmallVector<SDValue, 8> Ops;
1259   unsigned CallOpc = SPUISD::CALL;
1260
1261   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1262   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1263   // node so that legalize doesn't hack it.
1264   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1265     const GlobalValue *GV = G->getGlobal();
1266     EVT CalleeVT = Callee.getValueType();
1267     SDValue Zero = DAG.getConstant(0, PtrVT);
1268     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1269
1270     if (!ST->usingLargeMem()) {
1271       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1272       // style calls, otherwise, external symbols are BRASL calls. This assumes
1273       // that declared/defined symbols are in the same compilation unit and can
1274       // be reached through PC-relative jumps.
1275       //
1276       // NOTE:
1277       // This may be an unsafe assumption for JIT and really large compilation
1278       // units.
1279       if (GV->isDeclaration()) {
1280         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1281       } else {
1282         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1283       }
1284     } else {
1285       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1286       // address pairs:
1287       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1288     }
1289   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1290     EVT CalleeVT = Callee.getValueType();
1291     SDValue Zero = DAG.getConstant(0, PtrVT);
1292     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1293         Callee.getValueType());
1294
1295     if (!ST->usingLargeMem()) {
1296       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1297     } else {
1298       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1299     }
1300   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1301     // If this is an absolute destination address that appears to be a legal
1302     // local store address, use the munged value.
1303     Callee = SDValue(Dest, 0);
1304   }
1305
1306   Ops.push_back(Chain);
1307   Ops.push_back(Callee);
1308
1309   // Add argument registers to the end of the list so that they are known live
1310   // into the call.
1311   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1312     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1313                                   RegsToPass[i].second.getValueType()));
1314
1315   if (InFlag.getNode())
1316     Ops.push_back(InFlag);
1317   // Returns a chain and a flag for retval copy to use.
1318   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1319                       &Ops[0], Ops.size());
1320   InFlag = Chain.getValue(1);
1321
1322   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1323                              DAG.getIntPtrConstant(0, true), InFlag);
1324   if (!Ins.empty())
1325     InFlag = Chain.getValue(1);
1326
1327   // If the function returns void, just return the chain.
1328   if (Ins.empty())
1329     return Chain;
1330
1331   // If the call has results, copy the values out of the ret val registers.
1332   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1333   default: llvm_unreachable("Unexpected ret value!");
1334   case MVT::Other: break;
1335   case MVT::i32:
1336     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1337       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1338                                  MVT::i32, InFlag).getValue(1);
1339       InVals.push_back(Chain.getValue(0));
1340       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1341                                  Chain.getValue(2)).getValue(1);
1342       InVals.push_back(Chain.getValue(0));
1343     } else {
1344       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1345                                  InFlag).getValue(1);
1346       InVals.push_back(Chain.getValue(0));
1347     }
1348     break;
1349   case MVT::i8:
1350   case MVT::i16:
1351   case MVT::i64:
1352   case MVT::i128:
1353   case MVT::f32:
1354   case MVT::f64:
1355   case MVT::v2f64:
1356   case MVT::v2i64:
1357   case MVT::v4f32:
1358   case MVT::v4i32:
1359   case MVT::v8i16:
1360   case MVT::v16i8:
1361     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1362                                    InFlag).getValue(1);
1363     InVals.push_back(Chain.getValue(0));
1364     break;
1365   }
1366
1367   return Chain;
1368 }
1369
1370 SDValue
1371 SPUTargetLowering::LowerReturn(SDValue Chain,
1372                                CallingConv::ID CallConv, bool isVarArg,
1373                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1374                                const SmallVectorImpl<SDValue> &OutVals,
1375                                DebugLoc dl, SelectionDAG &DAG) const {
1376
1377   SmallVector<CCValAssign, 16> RVLocs;
1378   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1379                  RVLocs, *DAG.getContext());
1380   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1381
1382   // If this is the first return lowered for this function, add the regs to the
1383   // liveout set for the function.
1384   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1385     for (unsigned i = 0; i != RVLocs.size(); ++i)
1386       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1387   }
1388
1389   SDValue Flag;
1390
1391   // Copy the result values into the output registers.
1392   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1393     CCValAssign &VA = RVLocs[i];
1394     assert(VA.isRegLoc() && "Can only return in registers!");
1395     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1396                              OutVals[i], Flag);
1397     Flag = Chain.getValue(1);
1398   }
1399
1400   if (Flag.getNode())
1401     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1402   else
1403     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1404 }
1405
1406
1407 //===----------------------------------------------------------------------===//
1408 // Vector related lowering:
1409 //===----------------------------------------------------------------------===//
1410
1411 static ConstantSDNode *
1412 getVecImm(SDNode *N) {
1413   SDValue OpVal(0, 0);
1414
1415   // Check to see if this buildvec has a single non-undef value in its elements.
1416   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1417     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1418     if (OpVal.getNode() == 0)
1419       OpVal = N->getOperand(i);
1420     else if (OpVal != N->getOperand(i))
1421       return 0;
1422   }
1423
1424   if (OpVal.getNode() != 0) {
1425     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1426       return CN;
1427     }
1428   }
1429
1430   return 0;
1431 }
1432
1433 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1435 /// constant
1436 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1437                               EVT ValueType) {
1438   if (ConstantSDNode *CN = getVecImm(N)) {
1439     uint64_t Value = CN->getZExtValue();
1440     if (ValueType == MVT::i64) {
1441       uint64_t UValue = CN->getZExtValue();
1442       uint32_t upper = uint32_t(UValue >> 32);
1443       uint32_t lower = uint32_t(UValue);
1444       if (upper != lower)
1445         return SDValue();
1446       Value = Value >> 32;
1447     }
1448     if (Value <= 0x3ffff)
1449       return DAG.getTargetConstant(Value, ValueType);
1450   }
1451
1452   return SDValue();
1453 }
1454
1455 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1456 /// and the value fits into a signed 16-bit constant, and if so, return the
1457 /// constant
1458 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1459                               EVT ValueType) {
1460   if (ConstantSDNode *CN = getVecImm(N)) {
1461     int64_t Value = CN->getSExtValue();
1462     if (ValueType == MVT::i64) {
1463       uint64_t UValue = CN->getZExtValue();
1464       uint32_t upper = uint32_t(UValue >> 32);
1465       uint32_t lower = uint32_t(UValue);
1466       if (upper != lower)
1467         return SDValue();
1468       Value = Value >> 32;
1469     }
1470     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1471       return DAG.getTargetConstant(Value, ValueType);
1472     }
1473   }
1474
1475   return SDValue();
1476 }
1477
1478 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1479 /// and the value fits into a signed 10-bit constant, and if so, return the
1480 /// constant
1481 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1482                               EVT ValueType) {
1483   if (ConstantSDNode *CN = getVecImm(N)) {
1484     int64_t Value = CN->getSExtValue();
1485     if (ValueType == MVT::i64) {
1486       uint64_t UValue = CN->getZExtValue();
1487       uint32_t upper = uint32_t(UValue >> 32);
1488       uint32_t lower = uint32_t(UValue);
1489       if (upper != lower)
1490         return SDValue();
1491       Value = Value >> 32;
1492     }
1493     if (isInt<10>(Value))
1494       return DAG.getTargetConstant(Value, ValueType);
1495   }
1496
1497   return SDValue();
1498 }
1499
1500 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1501 /// and the value fits into a signed 8-bit constant, and if so, return the
1502 /// constant.
1503 ///
1504 /// @note: The incoming vector is v16i8 because that's the only way we can load
1505 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1506 /// same value.
1507 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1508                              EVT ValueType) {
1509   if (ConstantSDNode *CN = getVecImm(N)) {
1510     int Value = (int) CN->getZExtValue();
1511     if (ValueType == MVT::i16
1512         && Value <= 0xffff                 /* truncated from uint64_t */
1513         && ((short) Value >> 8) == ((short) Value & 0xff))
1514       return DAG.getTargetConstant(Value & 0xff, ValueType);
1515     else if (ValueType == MVT::i8
1516              && (Value & 0xff) == Value)
1517       return DAG.getTargetConstant(Value, ValueType);
1518   }
1519
1520   return SDValue();
1521 }
1522
1523 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1524 /// and the value fits into a signed 16-bit constant, and if so, return the
1525 /// constant
1526 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1527                                EVT ValueType) {
1528   if (ConstantSDNode *CN = getVecImm(N)) {
1529     uint64_t Value = CN->getZExtValue();
1530     if ((ValueType == MVT::i32
1531           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1532         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1533       return DAG.getTargetConstant(Value >> 16, ValueType);
1534   }
1535
1536   return SDValue();
1537 }
1538
1539 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1540 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1541   if (ConstantSDNode *CN = getVecImm(N)) {
1542     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1543   }
1544
1545   return SDValue();
1546 }
1547
1548 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1549 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1550   if (ConstantSDNode *CN = getVecImm(N)) {
1551     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1552   }
1553
1554   return SDValue();
1555 }
1556
1557 //! Lower a BUILD_VECTOR instruction creatively:
1558 static SDValue
1559 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1560   EVT VT = Op.getValueType();
1561   EVT EltVT = VT.getVectorElementType();
1562   DebugLoc dl = Op.getDebugLoc();
1563   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1564   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1565   unsigned minSplatBits = EltVT.getSizeInBits();
1566
1567   if (minSplatBits < 16)
1568     minSplatBits = 16;
1569
1570   APInt APSplatBits, APSplatUndef;
1571   unsigned SplatBitSize;
1572   bool HasAnyUndefs;
1573
1574   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1575                             HasAnyUndefs, minSplatBits)
1576       || minSplatBits < SplatBitSize)
1577     return SDValue();   // Wasn't a constant vector or splat exceeded min
1578
1579   uint64_t SplatBits = APSplatBits.getZExtValue();
1580
1581   switch (VT.getSimpleVT().SimpleTy) {
1582   default:
1583     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1584                        Twine(VT.getEVTString()));
1585     /*NOTREACHED*/
1586   case MVT::v4f32: {
1587     uint32_t Value32 = uint32_t(SplatBits);
1588     assert(SplatBitSize == 32
1589            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1590     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591     SDValue T = DAG.getConstant(Value32, MVT::i32);
1592     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1593                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1594     break;
1595   }
1596   case MVT::v2f64: {
1597     uint64_t f64val = uint64_t(SplatBits);
1598     assert(SplatBitSize == 64
1599            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1600     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1601     SDValue T = DAG.getConstant(f64val, MVT::i64);
1602     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1603                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1604     break;
1605   }
1606   case MVT::v16i8: {
1607    // 8-bit constants have to be expanded to 16-bits
1608    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1609    SmallVector<SDValue, 8> Ops;
1610
1611    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1612    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1613                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1614   }
1615   case MVT::v8i16: {
1616     unsigned short Value16 = SplatBits;
1617     SDValue T = DAG.getConstant(Value16, EltVT);
1618     SmallVector<SDValue, 8> Ops;
1619
1620     Ops.assign(8, T);
1621     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1622   }
1623   case MVT::v4i32: {
1624     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1625     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1626   }
1627   case MVT::v2f32:
1628   case MVT::v2i32: {
1629     return SDValue();
1630   }
1631   case MVT::v2i64: {
1632     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1633   }
1634   }
1635
1636   return SDValue();
1637 }
1638
1639 /*!
1640  */
1641 SDValue
1642 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1643                      DebugLoc dl) {
1644   uint32_t upper = uint32_t(SplatVal >> 32);
1645   uint32_t lower = uint32_t(SplatVal);
1646
1647   if (upper == lower) {
1648     // Magic constant that can be matched by IL, ILA, et. al.
1649     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1650     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1651                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1652                                    Val, Val, Val, Val));
1653   } else {
1654     bool upper_special, lower_special;
1655
1656     // NOTE: This code creates common-case shuffle masks that can be easily
1657     // detected as common expressions. It is not attempting to create highly
1658     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659
1660     // Detect if the upper or lower half is a special shuffle mask pattern:
1661     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1662     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1663
1664     // Both upper and lower are special, lower to a constant pool load:
1665     if (lower_special && upper_special) {
1666       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1667       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1668                          SplatValCN, SplatValCN);
1669     }
1670
1671     SDValue LO32;
1672     SDValue HI32;
1673     SmallVector<SDValue, 16> ShufBytes;
1674     SDValue Result;
1675
1676     // Create lower vector if not a special pattern
1677     if (!lower_special) {
1678       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1679       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1680                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1681                                      LO32C, LO32C, LO32C, LO32C));
1682     }
1683
1684     // Create upper vector if not a special pattern
1685     if (!upper_special) {
1686       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1687       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1688                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1689                                      HI32C, HI32C, HI32C, HI32C));
1690     }
1691
1692     // If either upper or lower are special, then the two input operands are
1693     // the same (basically, one of them is a "don't care")
1694     if (lower_special)
1695       LO32 = HI32;
1696     if (upper_special)
1697       HI32 = LO32;
1698
1699     for (int i = 0; i < 4; ++i) {
1700       uint64_t val = 0;
1701       for (int j = 0; j < 4; ++j) {
1702         SDValue V;
1703         bool process_upper, process_lower;
1704         val <<= 8;
1705         process_upper = (upper_special && (i & 1) == 0);
1706         process_lower = (lower_special && (i & 1) == 1);
1707
1708         if (process_upper || process_lower) {
1709           if ((process_upper && upper == 0)
1710                   || (process_lower && lower == 0))
1711             val |= 0x80;
1712           else if ((process_upper && upper == 0xffffffff)
1713                   || (process_lower && lower == 0xffffffff))
1714             val |= 0xc0;
1715           else if ((process_upper && upper == 0x80000000)
1716                   || (process_lower && lower == 0x80000000))
1717             val |= (j == 0 ? 0xe0 : 0x80);
1718         } else
1719           val |= i * 4 + j + ((i & 1) * 16);
1720       }
1721
1722       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1723     }
1724
1725     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1726                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1727                                    &ShufBytes[0], ShufBytes.size()));
1728   }
1729 }
1730
1731 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1732 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1733 /// permutation vector, V3, is monotonically increasing with one "exception"
1734 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1735 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1736 /// In either case, the net result is going to eventually invoke SHUFB to
1737 /// permute/shuffle the bytes from V1 and V2.
1738 /// \note
1739 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1740 /// control word for byte/halfword/word insertion. This takes care of a single
1741 /// element move from V2 into V1.
1742 /// \note
1743 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1744 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1745   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1746   SDValue V1 = Op.getOperand(0);
1747   SDValue V2 = Op.getOperand(1);
1748   DebugLoc dl = Op.getDebugLoc();
1749
1750   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1751
1752   // If we have a single element being moved from V1 to V2, this can be handled
1753   // using the C*[DX] compute mask instructions, but the vector elements have
1754   // to be monotonically increasing with one exception element.
1755   EVT VecVT = V1.getValueType();
1756   EVT EltVT = VecVT.getVectorElementType();
1757   unsigned EltsFromV2 = 0;
1758   unsigned V2Elt = 0;
1759   unsigned V2EltIdx0 = 0;
1760   unsigned CurrElt = 0;
1761   unsigned MaxElts = VecVT.getVectorNumElements();
1762   unsigned PrevElt = 0;
1763   unsigned V0Elt = 0;
1764   bool monotonic = true;
1765   bool rotate = true;
1766   EVT maskVT;             // which of the c?d instructions to use
1767
1768   if (EltVT == MVT::i8) {
1769     V2EltIdx0 = 16;
1770     maskVT = MVT::v16i8;
1771   } else if (EltVT == MVT::i16) {
1772     V2EltIdx0 = 8;
1773     maskVT = MVT::v8i16;
1774   } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) {
1775     V2EltIdx0 = 2;
1776     maskVT = MVT::v4i32;
1777   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1778     V2EltIdx0 = 4;
1779     maskVT = MVT::v4i32;
1780   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1781     V2EltIdx0 = 2;
1782     maskVT = MVT::v2i64;
1783   } else
1784     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1785
1786   for (unsigned i = 0; i != MaxElts; ++i) {
1787     if (SVN->getMaskElt(i) < 0)
1788       continue;
1789
1790     unsigned SrcElt = SVN->getMaskElt(i);
1791
1792     if (monotonic) {
1793       if (SrcElt >= V2EltIdx0) {
1794         if (1 >= (++EltsFromV2)) {
1795           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1796         }
1797       } else if (CurrElt != SrcElt) {
1798         monotonic = false;
1799       }
1800
1801       ++CurrElt;
1802     }
1803
1804     if (rotate) {
1805       if (PrevElt > 0 && SrcElt < MaxElts) {
1806         if ((PrevElt == SrcElt - 1)
1807             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1808           PrevElt = SrcElt;
1809           if (SrcElt == 0)
1810             V0Elt = i;
1811         } else {
1812           rotate = false;
1813         }
1814       } else if (i == 0) {
1815         // First time through, need to keep track of previous element
1816         PrevElt = SrcElt;
1817       } else {
1818         // This isn't a rotation, takes elements from vector 2
1819         rotate = false;
1820       }
1821     }
1822   }
1823
1824   if (EltsFromV2 == 1 && monotonic) {
1825     // Compute mask and shuffle
1826     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827
1828     // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1829     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1830     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1831                                 DAG.getRegister(SPU::R1, PtrVT),
1832                                 DAG.getConstant(V2Elt, MVT::i32));
1833     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1834                                      maskVT, Pointer);
1835
1836     // Use shuffle mask in SHUFB synthetic instruction:
1837     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1838                        ShufMaskOp);
1839   } else if (rotate) {
1840     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1841
1842     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1843                        V1, DAG.getConstant(rotamt, MVT::i16));
1844   } else {
1845    // Convert the SHUFFLE_VECTOR mask's input element units to the
1846    // actual bytes.
1847     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1848
1849     SmallVector<SDValue, 16> ResultMask;
1850     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1851       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1852
1853       for (unsigned j = 0; j < BytesPerElement; ++j)
1854         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1855     }
1856     // For half vectors padd the mask with zeros for the second half.
1857     // This is needed because mask is assumed to be full vector elsewhere in
1858     // the SPU backend.
1859     if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32)
1860     for( unsigned i = 0; i < 2; ++i )
1861     {
1862       for (unsigned j = 0; j < BytesPerElement; ++j)
1863         ResultMask.push_back(DAG.getConstant(0,MVT::i8));
1864     }
1865
1866     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1867                                     &ResultMask[0], ResultMask.size());
1868     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1869   }
1870 }
1871
1872 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1873   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1874   DebugLoc dl = Op.getDebugLoc();
1875
1876   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1877     // For a constant, build the appropriate constant vector, which will
1878     // eventually simplify to a vector register load.
1879
1880     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1881     SmallVector<SDValue, 16> ConstVecValues;
1882     EVT VT;
1883     size_t n_copies;
1884
1885     // Create a constant vector:
1886     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1887     default: llvm_unreachable("Unexpected constant value type in "
1888                               "LowerSCALAR_TO_VECTOR");
1889     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1890     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1891     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1892     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1893     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1894     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1895     case MVT::v2i32: n_copies = 2; VT = MVT::i32; break;
1896     }
1897
1898     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1899     for (size_t j = 0; j < n_copies; ++j)
1900       ConstVecValues.push_back(CValue);
1901
1902     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1903                        &ConstVecValues[0], ConstVecValues.size());
1904   } else {
1905     // Otherwise, copy the value from one register to another:
1906     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1907     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1908     case MVT::i8:
1909     case MVT::i16:
1910     case MVT::i32:
1911     case MVT::i64:
1912     case MVT::f32:
1913     case MVT::f64:
1914       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1915     }
1916   }
1917
1918   return SDValue();
1919 }
1920
1921 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1922   EVT VT = Op.getValueType();
1923   SDValue N = Op.getOperand(0);
1924   SDValue Elt = Op.getOperand(1);
1925   DebugLoc dl = Op.getDebugLoc();
1926   SDValue retval;
1927
1928   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1929     // Constant argument:
1930     int EltNo = (int) C->getZExtValue();
1931
1932     // sanity checks:
1933     if (VT == MVT::i8 && EltNo >= 16)
1934       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1935     else if (VT == MVT::i16 && EltNo >= 8)
1936       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1937     else if (VT == MVT::i32 && EltNo >= 4)
1938       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1939     else if (VT == MVT::i64 && EltNo >= 2)
1940       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1941
1942     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1943       // i32 and i64: Element 0 is the preferred slot
1944       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1945     }
1946
1947     // Need to generate shuffle mask and extract:
1948     int prefslot_begin = -1, prefslot_end = -1;
1949     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1950
1951     switch (VT.getSimpleVT().SimpleTy) {
1952     default:
1953       assert(false && "Invalid value type!");
1954     case MVT::i8: {
1955       prefslot_begin = prefslot_end = 3;
1956       break;
1957     }
1958     case MVT::i16: {
1959       prefslot_begin = 2; prefslot_end = 3;
1960       break;
1961     }
1962     case MVT::i32:
1963     case MVT::f32: {
1964       prefslot_begin = 0; prefslot_end = 3;
1965       break;
1966     }
1967     case MVT::i64:
1968     case MVT::f64: {
1969       prefslot_begin = 0; prefslot_end = 7;
1970       break;
1971     }
1972     }
1973
1974     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1975            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1976
1977     unsigned int ShufBytes[16] = {
1978       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1979     };
1980     for (int i = 0; i < 16; ++i) {
1981       // zero fill uppper part of preferred slot, don't care about the
1982       // other slots:
1983       unsigned int mask_val;
1984       if (i <= prefslot_end) {
1985         mask_val =
1986           ((i < prefslot_begin)
1987            ? 0x80
1988            : elt_byte + (i - prefslot_begin));
1989
1990         ShufBytes[i] = mask_val;
1991       } else
1992         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1993     }
1994
1995     SDValue ShufMask[4];
1996     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1997       unsigned bidx = i * 4;
1998       unsigned int bits = ((ShufBytes[bidx] << 24) |
1999                            (ShufBytes[bidx+1] << 16) |
2000                            (ShufBytes[bidx+2] << 8) |
2001                            ShufBytes[bidx+3]);
2002       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2003     }
2004
2005     SDValue ShufMaskVec =
2006       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2007                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2008
2009     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2010                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2011                                      N, N, ShufMaskVec));
2012   } else {
2013     // Variable index: Rotate the requested element into slot 0, then replicate
2014     // slot 0 across the vector
2015     EVT VecVT = N.getValueType();
2016     if (!VecVT.isSimple() || !VecVT.isVector()) {
2017       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2018                         "vector type!");
2019     }
2020
2021     // Make life easier by making sure the index is zero-extended to i32
2022     if (Elt.getValueType() != MVT::i32)
2023       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2024
2025     // Scale the index to a bit/byte shift quantity
2026     APInt scaleFactor =
2027             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2028     unsigned scaleShift = scaleFactor.logBase2();
2029     SDValue vecShift;
2030
2031     if (scaleShift > 0) {
2032       // Scale the shift factor:
2033       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2034                         DAG.getConstant(scaleShift, MVT::i32));
2035     }
2036
2037     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2038
2039     // Replicate the bytes starting at byte 0 across the entire vector (for
2040     // consistency with the notion of a unified register set)
2041     SDValue replicate;
2042
2043     switch (VT.getSimpleVT().SimpleTy) {
2044     default:
2045       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2046                         "type");
2047       /*NOTREACHED*/
2048     case MVT::i8: {
2049       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2050       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2051                               factor, factor, factor, factor);
2052       break;
2053     }
2054     case MVT::i16: {
2055       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2056       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2057                               factor, factor, factor, factor);
2058       break;
2059     }
2060     case MVT::i32:
2061     case MVT::f32: {
2062       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2063       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2064                               factor, factor, factor, factor);
2065       break;
2066     }
2067     case MVT::i64:
2068     case MVT::f64: {
2069       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2070       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2071       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2072                               loFactor, hiFactor, loFactor, hiFactor);
2073       break;
2074     }
2075     }
2076
2077     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2078                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2079                                      vecShift, vecShift, replicate));
2080   }
2081
2082   return retval;
2083 }
2084
2085 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2086   SDValue VecOp = Op.getOperand(0);
2087   SDValue ValOp = Op.getOperand(1);
2088   SDValue IdxOp = Op.getOperand(2);
2089   DebugLoc dl = Op.getDebugLoc();
2090   EVT VT = Op.getValueType();
2091
2092   // use 0 when the lane to insert to is 'undef'
2093   int64_t Idx=0;
2094   if (IdxOp.getOpcode() != ISD::UNDEF) {
2095     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2096     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2097     Idx = (CN->getSExtValue());
2098   }
2099
2100   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2101   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2102   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2103                                 DAG.getRegister(SPU::R1, PtrVT),
2104                                 DAG.getConstant(Idx, PtrVT));
2105   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2106
2107   SDValue result =
2108     DAG.getNode(SPUISD::SHUFB, dl, VT,
2109                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2110                 VecOp,
2111                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2112
2113   return result;
2114 }
2115
2116 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2117                            const TargetLowering &TLI)
2118 {
2119   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2120   DebugLoc dl = Op.getDebugLoc();
2121   EVT ShiftVT = TLI.getShiftAmountTy();
2122
2123   assert(Op.getValueType() == MVT::i8);
2124   switch (Opc) {
2125   default:
2126     llvm_unreachable("Unhandled i8 math operator");
2127     /*NOTREACHED*/
2128     break;
2129   case ISD::ADD: {
2130     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2131     // the result:
2132     SDValue N1 = Op.getOperand(1);
2133     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2134     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2135     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2136                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2137
2138   }
2139
2140   case ISD::SUB: {
2141     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2142     // the result:
2143     SDValue N1 = Op.getOperand(1);
2144     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2145     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2146     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2147                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2148   }
2149   case ISD::ROTR:
2150   case ISD::ROTL: {
2151     SDValue N1 = Op.getOperand(1);
2152     EVT N1VT = N1.getValueType();
2153
2154     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2155     if (!N1VT.bitsEq(ShiftVT)) {
2156       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2157                        ? ISD::ZERO_EXTEND
2158                        : ISD::TRUNCATE;
2159       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2160     }
2161
2162     // Replicate lower 8-bits into upper 8:
2163     SDValue ExpandArg =
2164       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2165                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2166                               N0, DAG.getConstant(8, MVT::i32)));
2167
2168     // Truncate back down to i8
2169     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2170                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2171   }
2172   case ISD::SRL:
2173   case ISD::SHL: {
2174     SDValue N1 = Op.getOperand(1);
2175     EVT N1VT = N1.getValueType();
2176
2177     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2178     if (!N1VT.bitsEq(ShiftVT)) {
2179       unsigned N1Opc = ISD::ZERO_EXTEND;
2180
2181       if (N1.getValueType().bitsGT(ShiftVT))
2182         N1Opc = ISD::TRUNCATE;
2183
2184       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2185     }
2186
2187     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2189   }
2190   case ISD::SRA: {
2191     SDValue N1 = Op.getOperand(1);
2192     EVT N1VT = N1.getValueType();
2193
2194     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2195     if (!N1VT.bitsEq(ShiftVT)) {
2196       unsigned N1Opc = ISD::SIGN_EXTEND;
2197
2198       if (N1VT.bitsGT(ShiftVT))
2199         N1Opc = ISD::TRUNCATE;
2200       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2201     }
2202
2203     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2204                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2205   }
2206   case ISD::MUL: {
2207     SDValue N1 = Op.getOperand(1);
2208
2209     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2210     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2211     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2212                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2213     break;
2214   }
2215   }
2216
2217   return SDValue();
2218 }
2219
2220 //! Lower byte immediate operations for v16i8 vectors:
2221 static SDValue
2222 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2223   SDValue ConstVec;
2224   SDValue Arg;
2225   EVT VT = Op.getValueType();
2226   DebugLoc dl = Op.getDebugLoc();
2227
2228   ConstVec = Op.getOperand(0);
2229   Arg = Op.getOperand(1);
2230   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2231     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2232       ConstVec = ConstVec.getOperand(0);
2233     } else {
2234       ConstVec = Op.getOperand(1);
2235       Arg = Op.getOperand(0);
2236       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2237         ConstVec = ConstVec.getOperand(0);
2238       }
2239     }
2240   }
2241
2242   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2243     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2244     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2245
2246     APInt APSplatBits, APSplatUndef;
2247     unsigned SplatBitSize;
2248     bool HasAnyUndefs;
2249     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2250
2251     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2252                               HasAnyUndefs, minSplatBits)
2253         && minSplatBits <= SplatBitSize) {
2254       uint64_t SplatBits = APSplatBits.getZExtValue();
2255       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2256
2257       SmallVector<SDValue, 16> tcVec;
2258       tcVec.assign(16, tc);
2259       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2260                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2261     }
2262   }
2263
2264   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2265   // lowered.  Return the operation, rather than a null SDValue.
2266   return Op;
2267 }
2268
2269 //! Custom lowering for CTPOP (count population)
2270 /*!
2271   Custom lowering code that counts the number ones in the input
2272   operand. SPU has such an instruction, but it counts the number of
2273   ones per byte, which then have to be accumulated.
2274 */
2275 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2276   EVT VT = Op.getValueType();
2277   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2278                                VT, (128 / VT.getSizeInBits()));
2279   DebugLoc dl = Op.getDebugLoc();
2280
2281   switch (VT.getSimpleVT().SimpleTy) {
2282   default:
2283     assert(false && "Invalid value type!");
2284   case MVT::i8: {
2285     SDValue N = Op.getOperand(0);
2286     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2287
2288     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2289     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2290
2291     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2292   }
2293
2294   case MVT::i16: {
2295     MachineFunction &MF = DAG.getMachineFunction();
2296     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2297
2298     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2299
2300     SDValue N = Op.getOperand(0);
2301     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2302     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2303     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2304
2305     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2306     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2307
2308     // CNTB_result becomes the chain to which all of the virtual registers
2309     // CNTB_reg, SUM1_reg become associated:
2310     SDValue CNTB_result =
2311       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2312
2313     SDValue CNTB_rescopy =
2314       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2315
2316     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2317
2318     return DAG.getNode(ISD::AND, dl, MVT::i16,
2319                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2320                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2321                                                Tmp1, Shift1),
2322                                    Tmp1),
2323                        Mask0);
2324   }
2325
2326   case MVT::i32: {
2327     MachineFunction &MF = DAG.getMachineFunction();
2328     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2329
2330     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2331     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2332
2333     SDValue N = Op.getOperand(0);
2334     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2335     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2336     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2337     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2338
2339     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2340     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2341
2342     // CNTB_result becomes the chain to which all of the virtual registers
2343     // CNTB_reg, SUM1_reg become associated:
2344     SDValue CNTB_result =
2345       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2346
2347     SDValue CNTB_rescopy =
2348       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2349
2350     SDValue Comp1 =
2351       DAG.getNode(ISD::SRL, dl, MVT::i32,
2352                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2353                   Shift1);
2354
2355     SDValue Sum1 =
2356       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2357                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2358
2359     SDValue Sum1_rescopy =
2360       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2361
2362     SDValue Comp2 =
2363       DAG.getNode(ISD::SRL, dl, MVT::i32,
2364                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2365                   Shift2);
2366     SDValue Sum2 =
2367       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2368                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2369
2370     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2371   }
2372
2373   case MVT::i64:
2374     break;
2375   }
2376
2377   return SDValue();
2378 }
2379
2380 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2381 /*!
2382  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2383  All conversions to i64 are expanded to a libcall.
2384  */
2385 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2386                               const SPUTargetLowering &TLI) {
2387   EVT OpVT = Op.getValueType();
2388   SDValue Op0 = Op.getOperand(0);
2389   EVT Op0VT = Op0.getValueType();
2390
2391   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2392       || OpVT == MVT::i64) {
2393     // Convert f32 / f64 to i32 / i64 via libcall.
2394     RTLIB::Libcall LC =
2395             (Op.getOpcode() == ISD::FP_TO_SINT)
2396              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2397              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2398     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2399     SDValue Dummy;
2400     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2401   }
2402
2403   return Op;
2404 }
2405
2406 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2407 /*!
2408  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2409  All conversions from i64 are expanded to a libcall.
2410  */
2411 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2412                               const SPUTargetLowering &TLI) {
2413   EVT OpVT = Op.getValueType();
2414   SDValue Op0 = Op.getOperand(0);
2415   EVT Op0VT = Op0.getValueType();
2416
2417   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2418       || Op0VT == MVT::i64) {
2419     // Convert i32, i64 to f64 via libcall:
2420     RTLIB::Libcall LC =
2421             (Op.getOpcode() == ISD::SINT_TO_FP)
2422              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2423              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2424     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2425     SDValue Dummy;
2426     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2427   }
2428
2429   return Op;
2430 }
2431
2432 //! Lower ISD::SETCC
2433 /*!
2434  This handles MVT::f64 (double floating point) condition lowering
2435  */
2436 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2437                           const TargetLowering &TLI) {
2438   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2439   DebugLoc dl = Op.getDebugLoc();
2440   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2441
2442   SDValue lhs = Op.getOperand(0);
2443   SDValue rhs = Op.getOperand(1);
2444   EVT lhsVT = lhs.getValueType();
2445   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2446
2447   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2448   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2449   EVT IntVT(MVT::i64);
2450
2451   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2452   // selected to a NOP:
2453   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2454   SDValue lhsHi32 =
2455           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2456                       DAG.getNode(ISD::SRL, dl, IntVT,
2457                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2458   SDValue lhsHi32abs =
2459           DAG.getNode(ISD::AND, dl, MVT::i32,
2460                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2461   SDValue lhsLo32 =
2462           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2463
2464   // SETO and SETUO only use the lhs operand:
2465   if (CC->get() == ISD::SETO) {
2466     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2467     // SETUO
2468     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2469     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2470                        DAG.getSetCC(dl, ccResultVT,
2471                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2472                                     ISD::SETUO),
2473                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2474   } else if (CC->get() == ISD::SETUO) {
2475     // Evaluates to true if Op0 is [SQ]NaN
2476     return DAG.getNode(ISD::AND, dl, ccResultVT,
2477                        DAG.getSetCC(dl, ccResultVT,
2478                                     lhsHi32abs,
2479                                     DAG.getConstant(0x7ff00000, MVT::i32),
2480                                     ISD::SETGE),
2481                        DAG.getSetCC(dl, ccResultVT,
2482                                     lhsLo32,
2483                                     DAG.getConstant(0, MVT::i32),
2484                                     ISD::SETGT));
2485   }
2486
2487   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2488   SDValue rhsHi32 =
2489           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2490                       DAG.getNode(ISD::SRL, dl, IntVT,
2491                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2492
2493   // If a value is negative, subtract from the sign magnitude constant:
2494   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2495
2496   // Convert the sign-magnitude representation into 2's complement:
2497   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2498                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2499   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2500   SDValue lhsSelect =
2501           DAG.getNode(ISD::SELECT, dl, IntVT,
2502                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2503
2504   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2505                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2506   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2507   SDValue rhsSelect =
2508           DAG.getNode(ISD::SELECT, dl, IntVT,
2509                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2510
2511   unsigned compareOp;
2512
2513   switch (CC->get()) {
2514   case ISD::SETOEQ:
2515   case ISD::SETUEQ:
2516     compareOp = ISD::SETEQ; break;
2517   case ISD::SETOGT:
2518   case ISD::SETUGT:
2519     compareOp = ISD::SETGT; break;
2520   case ISD::SETOGE:
2521   case ISD::SETUGE:
2522     compareOp = ISD::SETGE; break;
2523   case ISD::SETOLT:
2524   case ISD::SETULT:
2525     compareOp = ISD::SETLT; break;
2526   case ISD::SETOLE:
2527   case ISD::SETULE:
2528     compareOp = ISD::SETLE; break;
2529   case ISD::SETUNE:
2530   case ISD::SETONE:
2531     compareOp = ISD::SETNE; break;
2532   default:
2533     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2534   }
2535
2536   SDValue result =
2537           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2538                        (ISD::CondCode) compareOp);
2539
2540   if ((CC->get() & 0x8) == 0) {
2541     // Ordered comparison:
2542     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2543                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2544                                   ISD::SETO);
2545     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2546                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2547                                   ISD::SETO);
2548     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2549
2550     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2551   }
2552
2553   return result;
2554 }
2555
2556 //! Lower ISD::SELECT_CC
2557 /*!
2558   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2559   SELB instruction.
2560
2561   \note Need to revisit this in the future: if the code path through the true
2562   and false value computations is longer than the latency of a branch (6
2563   cycles), then it would be more advantageous to branch and insert a new basic
2564   block and branch on the condition. However, this code does not make that
2565   assumption, given the simplisitc uses so far.
2566  */
2567
2568 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2569                               const TargetLowering &TLI) {
2570   EVT VT = Op.getValueType();
2571   SDValue lhs = Op.getOperand(0);
2572   SDValue rhs = Op.getOperand(1);
2573   SDValue trueval = Op.getOperand(2);
2574   SDValue falseval = Op.getOperand(3);
2575   SDValue condition = Op.getOperand(4);
2576   DebugLoc dl = Op.getDebugLoc();
2577
2578   // NOTE: SELB's arguments: $rA, $rB, $mask
2579   //
2580   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2581   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2582   // condition was true and 0s where the condition was false. Hence, the
2583   // arguments to SELB get reversed.
2584
2585   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2586   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2587   // with another "cannot select select_cc" assert:
2588
2589   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2590                                 TLI.getSetCCResultType(Op.getValueType()),
2591                                 lhs, rhs, condition);
2592   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2593 }
2594
2595 //! Custom lower ISD::TRUNCATE
2596 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2597 {
2598   // Type to truncate to
2599   EVT VT = Op.getValueType();
2600   MVT simpleVT = VT.getSimpleVT();
2601   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2602                                VT, (128 / VT.getSizeInBits()));
2603   DebugLoc dl = Op.getDebugLoc();
2604
2605   // Type to truncate from
2606   SDValue Op0 = Op.getOperand(0);
2607   EVT Op0VT = Op0.getValueType();
2608
2609   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2610     // Create shuffle mask, least significant doubleword of quadword
2611     unsigned maskHigh = 0x08090a0b;
2612     unsigned maskLow = 0x0c0d0e0f;
2613     // Use a shuffle to perform the truncation
2614     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2615                                    DAG.getConstant(maskHigh, MVT::i32),
2616                                    DAG.getConstant(maskLow, MVT::i32),
2617                                    DAG.getConstant(maskHigh, MVT::i32),
2618                                    DAG.getConstant(maskLow, MVT::i32));
2619
2620     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2621                                        Op0, Op0, shufMask);
2622
2623     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2624   }
2625
2626   return SDValue();             // Leave the truncate unmolested
2627 }
2628
2629 /*!
2630  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2631  * algorithm is to duplicate the sign bit using rotmai to generate at
2632  * least one byte full of sign bits. Then propagate the "sign-byte" into
2633  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2634  *
2635  * @param Op The sext operand
2636  * @param DAG The current DAG
2637  * @return The SDValue with the entire instruction sequence
2638  */
2639 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2640 {
2641   DebugLoc dl = Op.getDebugLoc();
2642
2643   // Type to extend to
2644   MVT OpVT = Op.getValueType().getSimpleVT();
2645
2646   // Type to extend from
2647   SDValue Op0 = Op.getOperand(0);
2648   MVT Op0VT = Op0.getValueType().getSimpleVT();
2649
2650   // The type to extend to needs to be a i128 and
2651   // the type to extend from needs to be i64 or i32.
2652   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2653           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2654
2655   // Create shuffle mask
2656   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2657   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2658   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2659   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2660                                  DAG.getConstant(mask1, MVT::i32),
2661                                  DAG.getConstant(mask1, MVT::i32),
2662                                  DAG.getConstant(mask2, MVT::i32),
2663                                  DAG.getConstant(mask3, MVT::i32));
2664
2665   // Word wise arithmetic right shift to generate at least one byte
2666   // that contains sign bits.
2667   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2668   SDValue sraVal = DAG.getNode(ISD::SRA,
2669                  dl,
2670                  mvt,
2671                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2672                  DAG.getConstant(31, MVT::i32));
2673
2674   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2675   // and the input value into the lower 64 bits.
2676   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2677       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2678
2679   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2680 }
2681
2682 //! Custom (target-specific) lowering entry point
2683 /*!
2684   This is where LLVM's DAG selection process calls to do target-specific
2685   lowering of nodes.
2686  */
2687 SDValue
2688 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2689 {
2690   unsigned Opc = (unsigned) Op.getOpcode();
2691   EVT VT = Op.getValueType();
2692
2693   switch (Opc) {
2694   default: {
2695 #ifndef NDEBUG
2696     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2697     errs() << "Op.getOpcode() = " << Opc << "\n";
2698     errs() << "*Op.getNode():\n";
2699     Op.getNode()->dump();
2700 #endif
2701     llvm_unreachable(0);
2702   }
2703   case ISD::LOAD:
2704   case ISD::EXTLOAD:
2705   case ISD::SEXTLOAD:
2706   case ISD::ZEXTLOAD:
2707     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2708   case ISD::STORE:
2709     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2710   case ISD::ConstantPool:
2711     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2712   case ISD::GlobalAddress:
2713     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2714   case ISD::JumpTable:
2715     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2716   case ISD::ConstantFP:
2717     return LowerConstantFP(Op, DAG);
2718
2719   // i8, i64 math ops:
2720   case ISD::ADD:
2721   case ISD::SUB:
2722   case ISD::ROTR:
2723   case ISD::ROTL:
2724   case ISD::SRL:
2725   case ISD::SHL:
2726   case ISD::SRA: {
2727     if (VT == MVT::i8)
2728       return LowerI8Math(Op, DAG, Opc, *this);
2729     break;
2730   }
2731
2732   case ISD::FP_TO_SINT:
2733   case ISD::FP_TO_UINT:
2734     return LowerFP_TO_INT(Op, DAG, *this);
2735
2736   case ISD::SINT_TO_FP:
2737   case ISD::UINT_TO_FP:
2738     return LowerINT_TO_FP(Op, DAG, *this);
2739
2740   // Vector-related lowering.
2741   case ISD::BUILD_VECTOR:
2742     return LowerBUILD_VECTOR(Op, DAG);
2743   case ISD::SCALAR_TO_VECTOR:
2744     return LowerSCALAR_TO_VECTOR(Op, DAG);
2745   case ISD::VECTOR_SHUFFLE:
2746     return LowerVECTOR_SHUFFLE(Op, DAG);
2747   case ISD::EXTRACT_VECTOR_ELT:
2748     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2749   case ISD::INSERT_VECTOR_ELT:
2750     return LowerINSERT_VECTOR_ELT(Op, DAG);
2751
2752   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2753   case ISD::AND:
2754   case ISD::OR:
2755   case ISD::XOR:
2756     return LowerByteImmed(Op, DAG);
2757
2758   // Vector and i8 multiply:
2759   case ISD::MUL:
2760     if (VT == MVT::i8)
2761       return LowerI8Math(Op, DAG, Opc, *this);
2762
2763   case ISD::CTPOP:
2764     return LowerCTPOP(Op, DAG);
2765
2766   case ISD::SELECT_CC:
2767     return LowerSELECT_CC(Op, DAG, *this);
2768
2769   case ISD::SETCC:
2770     return LowerSETCC(Op, DAG, *this);
2771
2772   case ISD::TRUNCATE:
2773     return LowerTRUNCATE(Op, DAG);
2774
2775   case ISD::SIGN_EXTEND:
2776     return LowerSIGN_EXTEND(Op, DAG);
2777   }
2778
2779   return SDValue();
2780 }
2781
2782 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2783                                            SmallVectorImpl<SDValue>&Results,
2784                                            SelectionDAG &DAG) const
2785 {
2786 #if 0
2787   unsigned Opc = (unsigned) N->getOpcode();
2788   EVT OpVT = N->getValueType(0);
2789
2790   switch (Opc) {
2791   default: {
2792     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2793     errs() << "Op.getOpcode() = " << Opc << "\n";
2794     errs() << "*Op.getNode():\n";
2795     N->dump();
2796     abort();
2797     /*NOTREACHED*/
2798   }
2799   }
2800 #endif
2801
2802   /* Otherwise, return unchanged */
2803 }
2804
2805 //===----------------------------------------------------------------------===//
2806 // Target Optimization Hooks
2807 //===----------------------------------------------------------------------===//
2808
2809 SDValue
2810 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2811 {
2812 #if 0
2813   TargetMachine &TM = getTargetMachine();
2814 #endif
2815   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2816   SelectionDAG &DAG = DCI.DAG;
2817   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2818   EVT NodeVT = N->getValueType(0);      // The node's value type
2819   EVT Op0VT = Op0.getValueType();       // The first operand's result
2820   SDValue Result;                       // Initially, empty result
2821   DebugLoc dl = N->getDebugLoc();
2822
2823   switch (N->getOpcode()) {
2824   default: break;
2825   case ISD::ADD: {
2826     SDValue Op1 = N->getOperand(1);
2827
2828     if (Op0.getOpcode() == SPUISD::IndirectAddr
2829         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2830       // Normalize the operands to reduce repeated code
2831       SDValue IndirectArg = Op0, AddArg = Op1;
2832
2833       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2834         IndirectArg = Op1;
2835         AddArg = Op0;
2836       }
2837
2838       if (isa<ConstantSDNode>(AddArg)) {
2839         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2840         SDValue IndOp1 = IndirectArg.getOperand(1);
2841
2842         if (CN0->isNullValue()) {
2843           // (add (SPUindirect <arg>, <arg>), 0) ->
2844           // (SPUindirect <arg>, <arg>)
2845
2846 #if !defined(NDEBUG)
2847           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2848             errs() << "\n"
2849                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2850                  << "With:    (SPUindirect <arg>, <arg>)\n";
2851           }
2852 #endif
2853
2854           return IndirectArg;
2855         } else if (isa<ConstantSDNode>(IndOp1)) {
2856           // (add (SPUindirect <arg>, <const>), <const>) ->
2857           // (SPUindirect <arg>, <const + const>)
2858           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2859           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2860           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2861
2862 #if !defined(NDEBUG)
2863           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2864             errs() << "\n"
2865                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2866                  << "), " << CN0->getSExtValue() << ")\n"
2867                  << "With:    (SPUindirect <arg>, "
2868                  << combinedConst << ")\n";
2869           }
2870 #endif
2871
2872           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2873                              IndirectArg, combinedValue);
2874         }
2875       }
2876     }
2877     break;
2878   }
2879   case ISD::SIGN_EXTEND:
2880   case ISD::ZERO_EXTEND:
2881   case ISD::ANY_EXTEND: {
2882     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2883       // (any_extend (SPUextract_elt0 <arg>)) ->
2884       // (SPUextract_elt0 <arg>)
2885       // Types must match, however...
2886 #if !defined(NDEBUG)
2887       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2888         errs() << "\nReplace: ";
2889         N->dump(&DAG);
2890         errs() << "\nWith:    ";
2891         Op0.getNode()->dump(&DAG);
2892         errs() << "\n";
2893       }
2894 #endif
2895
2896       return Op0;
2897     }
2898     break;
2899   }
2900   case SPUISD::IndirectAddr: {
2901     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2902       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2903       if (CN != 0 && CN->isNullValue()) {
2904         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2905         // (SPUaform <addr>, 0)
2906
2907         DEBUG(errs() << "Replace: ");
2908         DEBUG(N->dump(&DAG));
2909         DEBUG(errs() << "\nWith:    ");
2910         DEBUG(Op0.getNode()->dump(&DAG));
2911         DEBUG(errs() << "\n");
2912
2913         return Op0;
2914       }
2915     } else if (Op0.getOpcode() == ISD::ADD) {
2916       SDValue Op1 = N->getOperand(1);
2917       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2918         // (SPUindirect (add <arg>, <arg>), 0) ->
2919         // (SPUindirect <arg>, <arg>)
2920         if (CN1->isNullValue()) {
2921
2922 #if !defined(NDEBUG)
2923           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2924             errs() << "\n"
2925                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2926                  << "With:    (SPUindirect <arg>, <arg>)\n";
2927           }
2928 #endif
2929
2930           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2931                              Op0.getOperand(0), Op0.getOperand(1));
2932         }
2933       }
2934     }
2935     break;
2936   }
2937   case SPUISD::SHLQUAD_L_BITS:
2938   case SPUISD::SHLQUAD_L_BYTES:
2939   case SPUISD::ROTBYTES_LEFT: {
2940     SDValue Op1 = N->getOperand(1);
2941
2942     // Kill degenerate vector shifts:
2943     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2944       if (CN->isNullValue()) {
2945         Result = Op0;
2946       }
2947     }
2948     break;
2949   }
2950   case SPUISD::PREFSLOT2VEC: {
2951     switch (Op0.getOpcode()) {
2952     default:
2953       break;
2954     case ISD::ANY_EXTEND:
2955     case ISD::ZERO_EXTEND:
2956     case ISD::SIGN_EXTEND: {
2957       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2958       // <arg>
2959       // but only if the SPUprefslot2vec and <arg> types match.
2960       SDValue Op00 = Op0.getOperand(0);
2961       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2962         SDValue Op000 = Op00.getOperand(0);
2963         if (Op000.getValueType() == NodeVT) {
2964           Result = Op000;
2965         }
2966       }
2967       break;
2968     }
2969     case SPUISD::VEC2PREFSLOT: {
2970       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2971       // <arg>
2972       Result = Op0.getOperand(0);
2973       break;
2974     }
2975     }
2976     break;
2977   }
2978   }
2979
2980   // Otherwise, return unchanged.
2981 #ifndef NDEBUG
2982   if (Result.getNode()) {
2983     DEBUG(errs() << "\nReplace.SPU: ");
2984     DEBUG(N->dump(&DAG));
2985     DEBUG(errs() << "\nWith:        ");
2986     DEBUG(Result.getNode()->dump(&DAG));
2987     DEBUG(errs() << "\n");
2988   }
2989 #endif
2990
2991   return Result;
2992 }
2993
2994 //===----------------------------------------------------------------------===//
2995 // Inline Assembly Support
2996 //===----------------------------------------------------------------------===//
2997
2998 /// getConstraintType - Given a constraint letter, return the type of
2999 /// constraint it is for this target.
3000 SPUTargetLowering::ConstraintType
3001 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3002   if (ConstraintLetter.size() == 1) {
3003     switch (ConstraintLetter[0]) {
3004     default: break;
3005     case 'b':
3006     case 'r':
3007     case 'f':
3008     case 'v':
3009     case 'y':
3010       return C_RegisterClass;
3011     }
3012   }
3013   return TargetLowering::getConstraintType(ConstraintLetter);
3014 }
3015
3016 std::pair<unsigned, const TargetRegisterClass*>
3017 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3018                                                 EVT VT) const
3019 {
3020   if (Constraint.size() == 1) {
3021     // GCC RS6000 Constraint Letters
3022     switch (Constraint[0]) {
3023     case 'b':   // R1-R31
3024     case 'r':   // R0-R31
3025       if (VT == MVT::i64)
3026         return std::make_pair(0U, SPU::R64CRegisterClass);
3027       return std::make_pair(0U, SPU::R32CRegisterClass);
3028     case 'f':
3029       if (VT == MVT::f32)
3030         return std::make_pair(0U, SPU::R32FPRegisterClass);
3031       else if (VT == MVT::f64)
3032         return std::make_pair(0U, SPU::R64FPRegisterClass);
3033       break;
3034     case 'v':
3035       return std::make_pair(0U, SPU::GPRCRegisterClass);
3036     }
3037   }
3038
3039   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3040 }
3041
3042 //! Compute used/known bits for a SPU operand
3043 void
3044 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3045                                                   const APInt &Mask,
3046                                                   APInt &KnownZero,
3047                                                   APInt &KnownOne,
3048                                                   const SelectionDAG &DAG,
3049                                                   unsigned Depth ) const {
3050 #if 0
3051   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3052
3053   switch (Op.getOpcode()) {
3054   default:
3055     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3056     break;
3057   case CALL:
3058   case SHUFB:
3059   case SHUFFLE_MASK:
3060   case CNTB:
3061   case SPUISD::PREFSLOT2VEC:
3062   case SPUISD::LDRESULT:
3063   case SPUISD::VEC2PREFSLOT:
3064   case SPUISD::SHLQUAD_L_BITS:
3065   case SPUISD::SHLQUAD_L_BYTES:
3066   case SPUISD::VEC_ROTL:
3067   case SPUISD::VEC_ROTR:
3068   case SPUISD::ROTBYTES_LEFT:
3069   case SPUISD::SELECT_MASK:
3070   case SPUISD::SELB:
3071   }
3072 #endif
3073 }
3074
3075 unsigned
3076 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3077                                                    unsigned Depth) const {
3078   switch (Op.getOpcode()) {
3079   default:
3080     return 1;
3081
3082   case ISD::SETCC: {
3083     EVT VT = Op.getValueType();
3084
3085     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3086       VT = MVT::i32;
3087     }
3088     return VT.getSizeInBits();
3089   }
3090   }
3091 }
3092
3093 // LowerAsmOperandForConstraint
3094 void
3095 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3096                                                 char ConstraintLetter,
3097                                                 std::vector<SDValue> &Ops,
3098                                                 SelectionDAG &DAG) const {
3099   // Default, for the time being, to the base class handler
3100   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3101 }
3102
3103 /// isLegalAddressImmediate - Return true if the integer value can be used
3104 /// as the offset of the target addressing mode.
3105 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3106                                                 const Type *Ty) const {
3107   // SPU's addresses are 256K:
3108   return (V > -(1 << 18) && V < (1 << 18) - 1);
3109 }
3110
3111 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3112   return false;
3113 }
3114
3115 bool
3116 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3117   // The SPU target isn't yet aware of offsets.
3118   return false;
3119 }