lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "SPUMachineFunction.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Intrinsics.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/Target/TargetOptions.h"
  31 #include "llvm/ADT/VectorExtras.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include <map>
  37
  38 using namespace llvm;
  39
  40 // Used in getTargetNodeName() below
  41 namespace {
  42   std::map<unsigned, const char *> node_names;
  43
  44   //! EVT mapping to useful data for Cell SPU
  45   struct valtype_map_s {
  46     EVT   valtype;
  47     int   prefslot_byte;
  48   };
  49
  50   const valtype_map_s valtype_map[] = {
  51     { MVT::i1,   3 },
  52     { MVT::i8,   3 },
  53     { MVT::i16,  2 },
  54     { MVT::i32,  0 },
  55     { MVT::f32,  0 },
  56     { MVT::i64,  0 },
  57     { MVT::f64,  0 },
  58     { MVT::i128, 0 }
  59   };
  60
  61   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  62
  63   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  64     const valtype_map_s *retval = 0;
  65
  66     for (size_t i = 0; i < n_valtype_map; ++i) {
  67       if (valtype_map[i].valtype == VT) {
  68         retval = valtype_map + i;
  69         break;
  70       }
  71     }
  72
  73 #ifndef NDEBUG
  74     if (retval == 0) {
  75       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  76                          Twine(VT.getEVTString()));
  77     }
  78 #endif
  79
  80     return retval;
  81   }
  82
  83   //! Expand a library call into an actual call DAG node
  84   /*!
  85    \note
  86    This code is taken from SelectionDAGLegalize, since it is not exposed as
  87    part of the LLVM SelectionDAG API.
  88    */
  89
  90   SDValue
  91   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  92                 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
  93     // The input chain to this libcall is the entry node of the function.
  94     // Legalizing the call will automatically add the previous call to the
  95     // dependence.
  96     SDValue InChain = DAG.getEntryNode();
  97
  98     TargetLowering::ArgListTy Args;
  99     TargetLowering::ArgListEntry Entry;
 100     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 101       EVT ArgVT = Op.getOperand(i).getValueType();
 102       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 103       Entry.Node = Op.getOperand(i);
 104       Entry.Ty = ArgTy;
 105       Entry.isSExt = isSigned;
 106       Entry.isZExt = !isSigned;
 107       Args.push_back(Entry);
 108     }
 109     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 110                                            TLI.getPointerTy());
 111
 112     // Splice the libcall in wherever FindInputOutputChains tells us to.
 113     const Type *RetTy =
 114                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 115     std::pair<SDValue, SDValue> CallInfo =
 116             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 117                             0, TLI.getLibcallCallingConv(LC), false,
 118                             /*isReturnValueUsed=*/true,
 119                             Callee, Args, DAG, Op.getDebugLoc());
 120
 121     return CallInfo.first;
 122   }
 123 }
 124
 125 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 126   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 127     SPUTM(TM) {
 128   // Fold away setcc operations if possible.
 129   setPow2DivIsCheap();
 130
 131   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 132   setUseUnderscoreSetJmp(true);
 133   setUseUnderscoreLongJmp(true);
 134
 135   // Set RTLIB libcall names as used by SPU:
 136   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 137
 138   // Set up the SPU's register classes:
 139   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 140   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 141   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 142   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 143   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 144   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 145   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 146
 147   // SPU has no sign or zero extended loads for i1, i8, i16:
 148   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 149   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 150   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 151
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 153   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 154
 155   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 156   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 159
 160   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 161
 162   // SPU constant load actions are custom lowered:
 163   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 164   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 165
 166   // SPU's loads and stores have to be custom lowered:
 167   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 168        ++sctype) {
 169     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 170
 171     setOperationAction(ISD::LOAD,   VT, Custom);
 172     setOperationAction(ISD::STORE,  VT, Custom);
 173     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 174     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 175     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 176
 177     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 178       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 179       setTruncStoreAction(VT, StoreVT, Expand);
 180     }
 181   }
 182
 183   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 184        ++sctype) {
 185     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 186
 187     setOperationAction(ISD::LOAD,   VT, Custom);
 188     setOperationAction(ISD::STORE,  VT, Custom);
 189
 190     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 191       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 192       setTruncStoreAction(VT, StoreVT, Expand);
 193     }
 194   }
 195
 196   // Expand the jumptable branches
 197   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 198   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 199
 200   // Custom lower SELECT_CC for most cases, but expand by default
 201   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 202   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 206
 207   // SPU has no intrinsics for these particular operations:
 208   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 209
 210   // SPU has no division/remainder instructions
 211   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 212   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 213   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 214   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 215   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 216   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 217   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 218   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 219   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 220   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 221   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 222   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 223   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 224   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 225   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 226   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 227   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 228   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 229   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 230   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 231   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 232   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 233   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 234   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 235   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 236   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 237   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 238   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 239   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 240   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 241
 242   // We don't support sin/cos/sqrt/fmod
 243   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 244   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 245   setOperationAction(ISD::FREM , MVT::f64, Expand);
 246   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 248   setOperationAction(ISD::FREM , MVT::f32, Expand);
 249
 250   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 251   // for f32!)
 252   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 253   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 254
 255   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 256   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 257
 258   // SPU can do rotate right and left, so legalize it... but customize for i8
 259   // because instructions don't exist.
 260
 261   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 262   //        .td files.
 263   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 264   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 265   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 266
 267   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 268   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 269   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 270
 271   // SPU has no native version of shift left/right for i8
 272   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 273   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 274   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 275
 276   // Make these operations legal and handle them during instruction selection:
 277   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 279   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 280
 281   // Custom lower i8, i32 and i64 multiplications
 282   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 283   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 284   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 285
 286   // Expand double-width multiplication
 287   // FIXME: It would probably be reasonable to support some of these operations
 288   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 289   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 290   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 291   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 292   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 293   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 294   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 295   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 298   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 299   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 300   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 301   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 302   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 303   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 304
 305   // Need to custom handle (some) common i8, i64 math ops
 306   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 307   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 308   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 309   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 310
 311   // SPU does not have BSWAP. It does have i32 support CTLZ.
 312   // CTPOP has to be custom lowered.
 313   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 314   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 315
 316   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 317   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 321
 322   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 323   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 327
 328   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 329   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 330   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 331   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 332   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 333
 334   // SPU has a version of select that implements (a&~c)|(b&c), just like
 335   // select ought to work:
 336   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 337   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 338   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 339   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 340
 341   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 342   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 343   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 346
 347   // Custom lower i128 -> i64 truncates
 348   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 349
 350   // Custom lower i32/i64 -> i128 sign extend
 351   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 352
 353   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 354   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 355   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 356   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 357   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 358   // to expand to a libcall, hence the custom lowering:
 359   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 360   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 361   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 362   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 363   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 364   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 365
 366   // FDIV on SPU requires custom lowering
 367   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 368
 369   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 373   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 378
 379   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 380   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 383
 384   // We cannot sextinreg(i1).  Expand to shifts.
 385   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 386
 387   // We want to legalize GlobalAddress and ConstantPool nodes into the
 388   // appropriate instructions to materialize the address.
 389   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 390        ++sctype) {
 391     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 392
 393     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 394     setOperationAction(ISD::ConstantPool,   VT, Custom);
 395     setOperationAction(ISD::JumpTable,      VT, Custom);
 396   }
 397
 398   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 399   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 400
 401   // Use the default implementation.
 402   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 403   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 404   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 405   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 406   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 407   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 408   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 409
 410   // Cell SPU has instructions for converting between i64 and fp.
 411   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 412   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 413
 414   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 415   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 416
 417   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 418   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 419
 420   // First set operation action for all vector types to expand. Then we
 421   // will selectively turn on ones that can be effectively codegen'd.
 422   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 423   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 428
 429   // "Odd size" vector classes that we're willing to support:
 430   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 431   addRegisterClass(MVT::v2f32, SPU::VECREGRegisterClass);
 432
 433   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 434        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 435     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 436
 437     // add/sub are legal for all supported vector VT's.
 438     setOperationAction(ISD::ADD,     VT, Legal);
 439     setOperationAction(ISD::SUB,     VT, Legal);
 440     // mul has to be custom lowered.
 441     setOperationAction(ISD::MUL,     VT, Legal);
 442
 443     setOperationAction(ISD::AND,     VT, Legal);
 444     setOperationAction(ISD::OR,      VT, Legal);
 445     setOperationAction(ISD::XOR,     VT, Legal);
 446     setOperationAction(ISD::LOAD,    VT, Legal);
 447     setOperationAction(ISD::SELECT,  VT, Legal);
 448     setOperationAction(ISD::STORE,   VT, Legal);
 449
 450     // These operations need to be expanded:
 451     setOperationAction(ISD::SDIV,    VT, Expand);
 452     setOperationAction(ISD::SREM,    VT, Expand);
 453     setOperationAction(ISD::UDIV,    VT, Expand);
 454     setOperationAction(ISD::UREM,    VT, Expand);
 455
 456     // Custom lower build_vector, constant pool spills, insert and
 457     // extract vector elements:
 458     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 459     setOperationAction(ISD::ConstantPool, VT, Custom);
 460     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 461     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 462     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 463     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 464   }
 465
 466   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 467   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 468   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 469   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 470
 471   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 472
 473   setShiftAmountType(MVT::i32);
 474   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 475
 476   setStackPointerRegisterToSaveRestore(SPU::R1);
 477
 478   // We have target-specific dag combine patterns for the following nodes:
 479   setTargetDAGCombine(ISD::ADD);
 480   setTargetDAGCombine(ISD::ZERO_EXTEND);
 481   setTargetDAGCombine(ISD::SIGN_EXTEND);
 482   setTargetDAGCombine(ISD::ANY_EXTEND);
 483
 484   computeRegisterProperties();
 485
 486   // Set pre-RA register scheduler default to BURR, which produces slightly
 487   // better code than the default (could also be TDRR, but TargetLowering.h
 488   // needs a mod to support that model):
 489   setSchedulingPreference(Sched::RegPressure);
 490 }
 491
 492 const char *
 493 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 494 {
 495   if (node_names.empty()) {
 496     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 497     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 498     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 499     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 500     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 501     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 502     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 503     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 504     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 505     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 506     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 507     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 508     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 509     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 510     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 511     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 512     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 513     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 514     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 515             "SPUISD::ROTBYTES_LEFT_BITS";
 516     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 517     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 518     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 519     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 520     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 521   }
 522
 523   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 524
 525   return ((i != node_names.end()) ? i->second : 0);
 526 }
 527
 528 /// getFunctionAlignment - Return the Log2 alignment of this function.
 529 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 530   return 3;
 531 }
 532
 533 //===----------------------------------------------------------------------===//
 534 // Return the Cell SPU's SETCC result type
 535 //===----------------------------------------------------------------------===//
 536
 537 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 538   // i16 and i32 are valid SETCC result types
 539   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 540     VT.getSimpleVT().SimpleTy :
 541     MVT::i32);
 542 }
 543
 544 //===----------------------------------------------------------------------===//
 545 // Calling convention code:
 546 //===----------------------------------------------------------------------===//
 547
 548 #include "SPUGenCallingConv.inc"
 549
 550 //===----------------------------------------------------------------------===//
 551 //  LowerOperation implementation
 552 //===----------------------------------------------------------------------===//
 553
 554 /// Custom lower loads for CellSPU
 555 /*!
 556  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 557  within a 16-byte block, we have to rotate to extract the requested element.
 558
 559  For extending loads, we also want to ensure that the following sequence is
 560  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 561
 562 \verbatim
 563 %1  v16i8,ch = load
 564 %2  v16i8,ch = rotate %1
 565 %3  v4f8, ch = bitconvert %2
 566 %4  f32      = vec2perfslot %3
 567 %5  f64      = fp_extend %4
 568 \endverbatim
 569 */
 570 static SDValue
 571 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 572   LoadSDNode *LN = cast<LoadSDNode>(Op);
 573   SDValue the_chain = LN->getChain();
 574   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 575   EVT InVT = LN->getMemoryVT();
 576   EVT OutVT = Op.getValueType();
 577   ISD::LoadExtType ExtType = LN->getExtensionType();
 578   unsigned alignment = LN->getAlignment();
 579   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 580   DebugLoc dl = Op.getDebugLoc();
 581
 582   switch (LN->getAddressingMode()) {
 583   case ISD::UNINDEXED: {
 584     SDValue result;
 585     SDValue basePtr = LN->getBasePtr();
 586     SDValue rotate;
 587
 588     if (alignment == 16) {
 589       ConstantSDNode *CN;
 590
 591       // Special cases for a known aligned load to simplify the base pointer
 592       // and the rotation amount:
 593       if (basePtr.getOpcode() == ISD::ADD
 594           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 595         // Known offset into basePtr
 596         int64_t offset = CN->getSExtValue();
 597         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 598
 599         if (rotamt < 0)
 600           rotamt += 16;
 601
 602         rotate = DAG.getConstant(rotamt, MVT::i16);
 603
 604         // Simplify the base pointer for this case:
 605         basePtr = basePtr.getOperand(0);
 606         if ((offset & ~0xf) > 0) {
 607           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 608                                 basePtr,
 609                                 DAG.getConstant((offset & ~0xf), PtrVT));
 610         }
 611       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 612                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 613                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 614                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 615         // Plain aligned a-form address: rotate into preferred slot
 616         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 617         int64_t rotamt = -vtm->prefslot_byte;
 618         if (rotamt < 0)
 619           rotamt += 16;
 620         rotate = DAG.getConstant(rotamt, MVT::i16);
 621       } else {
 622         // Offset the rotate amount by the basePtr and the preferred slot
 623         // byte offset
 624         int64_t rotamt = -vtm->prefslot_byte;
 625         if (rotamt < 0)
 626           rotamt += 16;
 627         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 628                              basePtr,
 629                              DAG.getConstant(rotamt, PtrVT));
 630       }
 631     } else {
 632       // Unaligned load: must be more pessimistic about addressing modes:
 633       if (basePtr.getOpcode() == ISD::ADD) {
 634         MachineFunction &MF = DAG.getMachineFunction();
 635         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 636         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 637         SDValue Flag;
 638
 639         SDValue Op0 = basePtr.getOperand(0);
 640         SDValue Op1 = basePtr.getOperand(1);
 641
 642         if (isa<ConstantSDNode>(Op1)) {
 643           // Convert the (add <ptr>, <const>) to an indirect address contained
 644           // in a register. Note that this is done because we need to avoid
 645           // creating a 0(reg) d-form address due to the SPU's block loads.
 646           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 647           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 648           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 649         } else {
 650           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 651           // will likely be lowered as a reg(reg) x-form address.
 652           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 653         }
 654       } else {
 655         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 656                               basePtr,
 657                               DAG.getConstant(0, PtrVT));
 658       }
 659
 660       // Offset the rotate amount by the basePtr and the preferred slot
 661       // byte offset
 662       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 663                            basePtr,
 664                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 665     }
 666
 667     // Re-emit as a v16i8 vector load
 668     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 669                          LN->getSrcValue(), LN->getSrcValueOffset(),
 670                          LN->isVolatile(), LN->isNonTemporal(), 16);
 671
 672     // Update the chain
 673     the_chain = result.getValue(1);
 674
 675     // Rotate into the preferred slot:
 676     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 677                          result.getValue(0), rotate);
 678
 679     // Convert the loaded v16i8 vector to the appropriate vector type
 680     // specified by the operand:
 681     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 682                                  InVT, (128 / InVT.getSizeInBits()));
 683     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 684                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 685
 686     // Handle extending loads by extending the scalar result:
 687     if (ExtType == ISD::SEXTLOAD) {
 688       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 689     } else if (ExtType == ISD::ZEXTLOAD) {
 690       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 691     } else if (ExtType == ISD::EXTLOAD) {
 692       unsigned NewOpc = ISD::ANY_EXTEND;
 693
 694       if (OutVT.isFloatingPoint())
 695         NewOpc = ISD::FP_EXTEND;
 696
 697       result = DAG.getNode(NewOpc, dl, OutVT, result);
 698     }
 699
 700     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 701     SDValue retops[2] = {
 702       result,
 703       the_chain
 704     };
 705
 706     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 707                          retops, sizeof(retops) / sizeof(retops[0]));
 708     return result;
 709   }
 710   case ISD::PRE_INC:
 711   case ISD::PRE_DEC:
 712   case ISD::POST_INC:
 713   case ISD::POST_DEC:
 714   case ISD::LAST_INDEXED_MODE:
 715     {
 716       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 717                          "than UNINDEXED\n" +
 718                          Twine((unsigned)LN->getAddressingMode()));
 719       /*NOTREACHED*/
 720     }
 721   }
 722
 723   return SDValue();
 724 }
 725
 726 /// Custom lower stores for CellSPU
 727 /*!
 728  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 729  within a 16-byte block, we have to generate a shuffle to insert the
 730  requested element into its place, then store the resulting block.
 731  */
 732 static SDValue
 733 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 734   StoreSDNode *SN = cast<StoreSDNode>(Op);
 735   SDValue Value = SN->getValue();
 736   EVT VT = Value.getValueType();
 737   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 738   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 739   DebugLoc dl = Op.getDebugLoc();
 740   unsigned alignment = SN->getAlignment();
 741
 742   switch (SN->getAddressingMode()) {
 743   case ISD::UNINDEXED: {
 744     // The vector type we really want to load from the 16-byte chunk.
 745     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 746                                  VT, (128 / VT.getSizeInBits()));
 747
 748     SDValue alignLoadVec;
 749     SDValue basePtr = SN->getBasePtr();
 750     SDValue the_chain = SN->getChain();
 751     SDValue insertEltOffs;
 752
 753     if (alignment == 16) {
 754       ConstantSDNode *CN;
 755
 756       // Special cases for a known aligned load to simplify the base pointer
 757       // and insertion byte:
 758       if (basePtr.getOpcode() == ISD::ADD
 759           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 760         // Known offset into basePtr
 761         int64_t offset = CN->getSExtValue();
 762
 763         // Simplify the base pointer for this case:
 764         basePtr = basePtr.getOperand(0);
 765         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 766                                     basePtr,
 767                                     DAG.getConstant((offset & 0xf), PtrVT));
 768
 769         if ((offset & ~0xf) > 0) {
 770           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 771                                 basePtr,
 772                                 DAG.getConstant((offset & ~0xf), PtrVT));
 773         }
 774       } else {
 775         // Otherwise, assume it's at byte 0 of basePtr
 776         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 777                                     basePtr,
 778                                     DAG.getConstant(0, PtrVT));
 779       }
 780     } else {
 781       // Unaligned load: must be more pessimistic about addressing modes:
 782       if (basePtr.getOpcode() == ISD::ADD) {
 783         MachineFunction &MF = DAG.getMachineFunction();
 784         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 785         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 786         SDValue Flag;
 787
 788         SDValue Op0 = basePtr.getOperand(0);
 789         SDValue Op1 = basePtr.getOperand(1);
 790
 791         if (isa<ConstantSDNode>(Op1)) {
 792           // Convert the (add <ptr>, <const>) to an indirect address contained
 793           // in a register. Note that this is done because we need to avoid
 794           // creating a 0(reg) d-form address due to the SPU's block loads.
 795           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 796           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 797           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 798         } else {
 799           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 800           // will likely be lowered as a reg(reg) x-form address.
 801           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 802         }
 803       } else {
 804         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 805                               basePtr,
 806                               DAG.getConstant(0, PtrVT));
 807       }
 808
 809       // Insertion point is solely determined by basePtr's contents
 810       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 811                                   basePtr,
 812                                   DAG.getConstant(0, PtrVT));
 813     }
 814
 815     // Re-emit as a v16i8 vector load
 816     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 817                                SN->getSrcValue(), SN->getSrcValueOffset(),
 818                                SN->isVolatile(), SN->isNonTemporal(), 16);
 819
 820     // Update the chain
 821     the_chain = alignLoadVec.getValue(1);
 822
 823     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 824     SDValue theValue = SN->getValue();
 825     SDValue result;
 826
 827     if (StVT != VT
 828         && (theValue.getOpcode() == ISD::AssertZext
 829             || theValue.getOpcode() == ISD::AssertSext)) {
 830       // Drill down and get the value for zero- and sign-extended
 831       // quantities
 832       theValue = theValue.getOperand(0);
 833     }
 834
 835     // If the base pointer is already a D-form address, then just create
 836     // a new D-form address with a slot offset and the orignal base pointer.
 837     // Otherwise generate a D-form address with the slot offset relative
 838     // to the stack pointer, which is always aligned.
 839 #if !defined(NDEBUG)
 840       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 841         errs() << "CellSPU LowerSTORE: basePtr = ";
 842         basePtr.getNode()->dump(&DAG);
 843         errs() << "\n";
 844       }
 845 #endif
 846
 847     SDValue insertEltOp =
 848             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 849     SDValue vectorizeOp =
 850             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 851
 852     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 853                          vectorizeOp, alignLoadVec,
 854                          DAG.getNode(ISD::BIT_CONVERT, dl,
 855                                      MVT::v4i32, insertEltOp));
 856
 857     result = DAG.getStore(the_chain, dl, result, basePtr,
 858                           LN->getSrcValue(), LN->getSrcValueOffset(),
 859                           LN->isVolatile(), LN->isNonTemporal(),
 860                           LN->getAlignment());
 861
 862 #if 0 && !defined(NDEBUG)
 863     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 864       const SDValue &currentRoot = DAG.getRoot();
 865
 866       DAG.setRoot(result);
 867       errs() << "------- CellSPU:LowerStore result:\n";
 868       DAG.dump();
 869       errs() << "-------\n";
 870       DAG.setRoot(currentRoot);
 871     }
 872 #endif
 873
 874     return result;
 875     /*UNREACHED*/
 876   }
 877   case ISD::PRE_INC:
 878   case ISD::PRE_DEC:
 879   case ISD::POST_INC:
 880   case ISD::POST_DEC:
 881   case ISD::LAST_INDEXED_MODE:
 882     {
 883       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 884                          "than UNINDEXED\n" +
 885                          Twine((unsigned)SN->getAddressingMode()));
 886       /*NOTREACHED*/
 887     }
 888   }
 889
 890   return SDValue();
 891 }
 892
 893 //! Generate the address of a constant pool entry.
 894 static SDValue
 895 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 896   EVT PtrVT = Op.getValueType();
 897   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 898   const Constant *C = CP->getConstVal();
 899   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 900   SDValue Zero = DAG.getConstant(0, PtrVT);
 901   const TargetMachine &TM = DAG.getTarget();
 902   // FIXME there is no actual debug info here
 903   DebugLoc dl = Op.getDebugLoc();
 904
 905   if (TM.getRelocationModel() == Reloc::Static) {
 906     if (!ST->usingLargeMem()) {
 907       // Just return the SDValue with the constant pool address in it.
 908       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 909     } else {
 910       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 911       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 912       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 913     }
 914   }
 915
 916   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 917                    " not supported.");
 918   return SDValue();
 919 }
 920
 921 //! Alternate entry point for generating the address of a constant pool entry
 922 SDValue
 923 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 924   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 925 }
 926
 927 static SDValue
 928 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 929   EVT PtrVT = Op.getValueType();
 930   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 931   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 932   SDValue Zero = DAG.getConstant(0, PtrVT);
 933   const TargetMachine &TM = DAG.getTarget();
 934   // FIXME there is no actual debug info here
 935   DebugLoc dl = Op.getDebugLoc();
 936
 937   if (TM.getRelocationModel() == Reloc::Static) {
 938     if (!ST->usingLargeMem()) {
 939       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 940     } else {
 941       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 942       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 943       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 944     }
 945   }
 946
 947   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 948                    " not supported.");
 949   return SDValue();
 950 }
 951
 952 static SDValue
 953 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 954   EVT PtrVT = Op.getValueType();
 955   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 956   const GlobalValue *GV = GSDN->getGlobal();
 957   SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
 958                                           PtrVT, GSDN->getOffset());
 959   const TargetMachine &TM = DAG.getTarget();
 960   SDValue Zero = DAG.getConstant(0, PtrVT);
 961   // FIXME there is no actual debug info here
 962   DebugLoc dl = Op.getDebugLoc();
 963
 964   if (TM.getRelocationModel() == Reloc::Static) {
 965     if (!ST->usingLargeMem()) {
 966       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 967     } else {
 968       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 969       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 970       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 971     }
 972   } else {
 973     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 974                       "not supported.");
 975     /*NOTREACHED*/
 976   }
 977
 978   return SDValue();
 979 }
 980
 981 //! Custom lower double precision floating point constants
 982 static SDValue
 983 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 984   EVT VT = Op.getValueType();
 985   // FIXME there is no actual debug info here
 986   DebugLoc dl = Op.getDebugLoc();
 987
 988   if (VT == MVT::f64) {
 989     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 990
 991     assert((FP != 0) &&
 992            "LowerConstantFP: Node is not ConstantFPSDNode");
 993
 994     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 995     SDValue T = DAG.getConstant(dbits, MVT::i64);
 996     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 997     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 998                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 999   }
1000
1001   return SDValue();
1002 }
1003
1004 SDValue
1005 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1006                                         CallingConv::ID CallConv, bool isVarArg,
1007                                         const SmallVectorImpl<ISD::InputArg>
1008                                           &Ins,
1009                                         DebugLoc dl, SelectionDAG &DAG,
1010                                         SmallVectorImpl<SDValue> &InVals)
1011                                           const {
1012
1013   MachineFunction &MF = DAG.getMachineFunction();
1014   MachineFrameInfo *MFI = MF.getFrameInfo();
1015   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1016   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1017
1018   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1019   unsigned ArgRegIdx = 0;
1020   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1021
1022   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1023
1024   SmallVector<CCValAssign, 16> ArgLocs;
1025   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1026                  *DAG.getContext());
1027   // FIXME: allow for other calling conventions
1028   CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1029
1030   // Add DAG nodes to load the arguments or copy them out of registers.
1031   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1032     EVT ObjectVT = Ins[ArgNo].VT;
1033     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1034     SDValue ArgVal;
1035     CCValAssign &VA = ArgLocs[ArgNo];
1036
1037     if (VA.isRegLoc()) {
1038       const TargetRegisterClass *ArgRegClass;
1039
1040       switch (ObjectVT.getSimpleVT().SimpleTy) {
1041       default:
1042         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1043                            Twine(ObjectVT.getEVTString()));
1044       case MVT::i8:
1045         ArgRegClass = &SPU::R8CRegClass;
1046         break;
1047       case MVT::i16:
1048         ArgRegClass = &SPU::R16CRegClass;
1049         break;
1050       case MVT::i32:
1051         ArgRegClass = &SPU::R32CRegClass;
1052         break;
1053       case MVT::i64:
1054         ArgRegClass = &SPU::R64CRegClass;
1055         break;
1056       case MVT::i128:
1057         ArgRegClass = &SPU::GPRCRegClass;
1058         break;
1059       case MVT::f32:
1060         ArgRegClass = &SPU::R32FPRegClass;
1061         break;
1062       case MVT::f64:
1063         ArgRegClass = &SPU::R64FPRegClass;
1064         break;
1065       case MVT::v2f64:
1066       case MVT::v4f32:
1067       case MVT::v2i64:
1068       case MVT::v4i32:
1069       case MVT::v8i16:
1070       case MVT::v16i8:
1071       case MVT::v2i32:
1072       case MVT::v2f32:
1073         ArgRegClass = &SPU::VECREGRegClass;
1074         break;
1075       }
1076
1077       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1078       RegInfo.addLiveIn(VA.getLocReg(), VReg);
1079       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1080       ++ArgRegIdx;
1081     } else {
1082       // We need to load the argument to a virtual register if we determined
1083       // above that we ran out of physical registers of the appropriate type
1084       // or we're forced to do vararg
1085       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1086       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1087       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1088       ArgOffset += StackSlotSize;
1089     }
1090
1091     InVals.push_back(ArgVal);
1092     // Update the chain
1093     Chain = ArgVal.getOperand(0);
1094   }
1095
1096   // vararg handling:
1097   if (isVarArg) {
1098     // FIXME: we should be able to query the argument registers from
1099     //        tablegen generated code.
1100     static const unsigned ArgRegs[] = {
1101       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1102       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1103       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1104       SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1105       SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1106       SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1107       SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1108       SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1109       SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1110       SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1111       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1112     };
1113     // size of ArgRegs array
1114     unsigned NumArgRegs = 77;
1115
1116     // We will spill (79-3)+1 registers to the stack
1117     SmallVector<SDValue, 79-3+1> MemOps;
1118
1119     // Create the frame slot
1120     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1121       FuncInfo->setVarArgsFrameIndex(
1122         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1123       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1124       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1125       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1126       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1127                                    false, false, 0);
1128       Chain = Store.getOperand(0);
1129       MemOps.push_back(Store);
1130
1131       // Increment address by stack slot size for the next stored argument
1132       ArgOffset += StackSlotSize;
1133     }
1134     if (!MemOps.empty())
1135       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1136                           &MemOps[0], MemOps.size());
1137   }
1138
1139   return Chain;
1140 }
1141
1142 /// isLSAAddress - Return the immediate to use if the specified
1143 /// value is representable as a LSA address.
1144 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1145   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1146   if (!C) return 0;
1147
1148   int Addr = C->getZExtValue();
1149   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1150       (Addr << 14 >> 14) != Addr)
1151     return 0;  // Top 14 bits have to be sext of immediate.
1152
1153   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1154 }
1155
1156 SDValue
1157 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1158                              CallingConv::ID CallConv, bool isVarArg,
1159                              bool &isTailCall,
1160                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1161                              const SmallVectorImpl<SDValue> &OutVals,
1162                              const SmallVectorImpl<ISD::InputArg> &Ins,
1163                              DebugLoc dl, SelectionDAG &DAG,
1164                              SmallVectorImpl<SDValue> &InVals) const {
1165   // CellSPU target does not yet support tail call optimization.
1166   isTailCall = false;
1167
1168   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1169   unsigned NumOps     = Outs.size();
1170   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1171
1172   SmallVector<CCValAssign, 16> ArgLocs;
1173   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1174                  *DAG.getContext());
1175   // FIXME: allow for other calling conventions
1176   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1177
1178   const unsigned NumArgRegs = ArgLocs.size();
1179
1180
1181   // Handy pointer type
1182   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1183
1184   // Set up a copy of the stack pointer for use loading and storing any
1185   // arguments that may not fit in the registers available for argument
1186   // passing.
1187   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1188
1189   // Figure out which arguments are going to go in registers, and which in
1190   // memory.
1191   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1192   unsigned ArgRegIdx = 0;
1193
1194   // Keep track of registers passing arguments
1195   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1196   // And the arguments passed on the stack
1197   SmallVector<SDValue, 8> MemOpChains;
1198
1199   for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1200     SDValue Arg = OutVals[ArgRegIdx];
1201     CCValAssign &VA = ArgLocs[ArgRegIdx];
1202
1203     // PtrOff will be used to store the current argument to the stack if a
1204     // register cannot be found for it.
1205     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1206     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1207
1208     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1209     default: llvm_unreachable("Unexpected ValueType for argument!");
1210     case MVT::i8:
1211     case MVT::i16:
1212     case MVT::i32:
1213     case MVT::i64:
1214     case MVT::i128:
1215     case MVT::f32:
1216     case MVT::f64:
1217     case MVT::v2i64:
1218     case MVT::v2f64:
1219     case MVT::v4f32:
1220     case MVT::v4i32:
1221     case MVT::v8i16:
1222     case MVT::v16i8:
1223       if (ArgRegIdx != NumArgRegs) {
1224         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1225       } else {
1226         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1227                                            false, false, 0));
1228         ArgOffset += StackSlotSize;
1229       }
1230       break;
1231     }
1232   }
1233
1234   // Accumulate how many bytes are to be pushed on the stack, including the
1235   // linkage area, and parameter passing area.  According to the SPU ABI,
1236   // we minimally need space for [LR] and [SP].
1237   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1238
1239   // Insert a call sequence start
1240   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1241                                                             true));
1242
1243   if (!MemOpChains.empty()) {
1244     // Adjust the stack pointer for the stack arguments.
1245     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1246                         &MemOpChains[0], MemOpChains.size());
1247   }
1248
1249   // Build a sequence of copy-to-reg nodes chained together with token chain
1250   // and flag operands which copy the outgoing args into the appropriate regs.
1251   SDValue InFlag;
1252   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1253     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1254                              RegsToPass[i].second, InFlag);
1255     InFlag = Chain.getValue(1);
1256   }
1257
1258   SmallVector<SDValue, 8> Ops;
1259   unsigned CallOpc = SPUISD::CALL;
1260
1261   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1262   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1263   // node so that legalize doesn't hack it.
1264   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1265     const GlobalValue *GV = G->getGlobal();
1266     EVT CalleeVT = Callee.getValueType();
1267     SDValue Zero = DAG.getConstant(0, PtrVT);
1268     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1269
1270     if (!ST->usingLargeMem()) {
1271       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1272       // style calls, otherwise, external symbols are BRASL calls. This assumes
1273       // that declared/defined symbols are in the same compilation unit and can
1274       // be reached through PC-relative jumps.
1275       //
1276       // NOTE:
1277       // This may be an unsafe assumption for JIT and really large compilation
1278       // units.
1279       if (GV->isDeclaration()) {
1280         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1281       } else {
1282         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1283       }
1284     } else {
1285       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1286       // address pairs:
1287       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1288     }
1289   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1290     EVT CalleeVT = Callee.getValueType();
1291     SDValue Zero = DAG.getConstant(0, PtrVT);
1292     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1293         Callee.getValueType());
1294
1295     if (!ST->usingLargeMem()) {
1296       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1297     } else {
1298       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1299     }
1300   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1301     // If this is an absolute destination address that appears to be a legal
1302     // local store address, use the munged value.
1303     Callee = SDValue(Dest, 0);
1304   }
1305
1306   Ops.push_back(Chain);
1307   Ops.push_back(Callee);
1308
1309   // Add argument registers to the end of the list so that they are known live
1310   // into the call.
1311   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1312     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1313                                   RegsToPass[i].second.getValueType()));
1314
1315   if (InFlag.getNode())
1316     Ops.push_back(InFlag);
1317   // Returns a chain and a flag for retval copy to use.
1318   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1319                       &Ops[0], Ops.size());
1320   InFlag = Chain.getValue(1);
1321
1322   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1323                              DAG.getIntPtrConstant(0, true), InFlag);
1324   if (!Ins.empty())
1325     InFlag = Chain.getValue(1);
1326
1327   // If the function returns void, just return the chain.
1328   if (Ins.empty())
1329     return Chain;
1330
1331   // If the call has results, copy the values out of the ret val registers.
1332   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1333   default: llvm_unreachable("Unexpected ret value!");
1334   case MVT::Other: break;
1335   case MVT::i32:
1336     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1337       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1338                                  MVT::i32, InFlag).getValue(1);
1339       InVals.push_back(Chain.getValue(0));
1340       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1341                                  Chain.getValue(2)).getValue(1);
1342       InVals.push_back(Chain.getValue(0));
1343     } else {
1344       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1345                                  InFlag).getValue(1);
1346       InVals.push_back(Chain.getValue(0));
1347     }
1348     break;
1349   case MVT::i8:
1350   case MVT::i16:
1351   case MVT::i64:
1352   case MVT::i128:
1353   case MVT::f32:
1354   case MVT::f64:
1355   case MVT::v2f64:
1356   case MVT::v2i64:
1357   case MVT::v4f32:
1358   case MVT::v4i32:
1359   case MVT::v8i16:
1360   case MVT::v16i8:
1361     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1362                                    InFlag).getValue(1);
1363     InVals.push_back(Chain.getValue(0));
1364     break;
1365   }
1366
1367   return Chain;
1368 }
1369
1370 SDValue
1371 SPUTargetLowering::LowerReturn(SDValue Chain,
1372                                CallingConv::ID CallConv, bool isVarArg,
1373                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1374                                const SmallVectorImpl<SDValue> &OutVals,
1375                                DebugLoc dl, SelectionDAG &DAG) const {
1376
1377   SmallVector<CCValAssign, 16> RVLocs;
1378   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1379                  RVLocs, *DAG.getContext());
1380   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1381
1382   // If this is the first return lowered for this function, add the regs to the
1383   // liveout set for the function.
1384   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1385     for (unsigned i = 0; i != RVLocs.size(); ++i)
1386       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1387   }
1388
1389   SDValue Flag;
1390
1391   // Copy the result values into the output registers.
1392   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1393     CCValAssign &VA = RVLocs[i];
1394     assert(VA.isRegLoc() && "Can only return in registers!");
1395     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1396                              OutVals[i], Flag);
1397     Flag = Chain.getValue(1);
1398   }
1399
1400   if (Flag.getNode())
1401     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1402   else
1403     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1404 }
1405
1406
1407 //===----------------------------------------------------------------------===//
1408 // Vector related lowering:
1409 //===----------------------------------------------------------------------===//
1410
1411 static ConstantSDNode *
1412 getVecImm(SDNode *N) {
1413   SDValue OpVal(0, 0);
1414
1415   // Check to see if this buildvec has a single non-undef value in its elements.
1416   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1417     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1418     if (OpVal.getNode() == 0)
1419       OpVal = N->getOperand(i);
1420     else if (OpVal != N->getOperand(i))
1421       return 0;
1422   }
1423
1424   if (OpVal.getNode() != 0) {
1425     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1426       return CN;
1427     }
1428   }
1429
1430   return 0;
1431 }
1432
1433 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1435 /// constant
1436 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1437                               EVT ValueType) {
1438   if (ConstantSDNode *CN = getVecImm(N)) {
1439     uint64_t Value = CN->getZExtValue();
1440     if (ValueType == MVT::i64) {
1441       uint64_t UValue = CN->getZExtValue();
1442       uint32_t upper = uint32_t(UValue >> 32);
1443       uint32_t lower = uint32_t(UValue);
1444       if (upper != lower)
1445         return SDValue();
1446       Value = Value >> 32;
1447     }
1448     if (Value <= 0x3ffff)
1449       return DAG.getTargetConstant(Value, ValueType);
1450   }
1451
1452   return SDValue();
1453 }
1454
1455 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1456 /// and the value fits into a signed 16-bit constant, and if so, return the
1457 /// constant
1458 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1459                               EVT ValueType) {
1460   if (ConstantSDNode *CN = getVecImm(N)) {
1461     int64_t Value = CN->getSExtValue();
1462     if (ValueType == MVT::i64) {
1463       uint64_t UValue = CN->getZExtValue();
1464       uint32_t upper = uint32_t(UValue >> 32);
1465       uint32_t lower = uint32_t(UValue);
1466       if (upper != lower)
1467         return SDValue();
1468       Value = Value >> 32;
1469     }
1470     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1471       return DAG.getTargetConstant(Value, ValueType);
1472     }
1473   }
1474
1475   return SDValue();
1476 }
1477
1478 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1479 /// and the value fits into a signed 10-bit constant, and if so, return the
1480 /// constant
1481 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1482                               EVT ValueType) {
1483   if (ConstantSDNode *CN = getVecImm(N)) {
1484     int64_t Value = CN->getSExtValue();
1485     if (ValueType == MVT::i64) {
1486       uint64_t UValue = CN->getZExtValue();
1487       uint32_t upper = uint32_t(UValue >> 32);
1488       uint32_t lower = uint32_t(UValue);
1489       if (upper != lower)
1490         return SDValue();
1491       Value = Value >> 32;
1492     }
1493     if (isInt<10>(Value))
1494       return DAG.getTargetConstant(Value, ValueType);
1495   }
1496
1497   return SDValue();
1498 }
1499
1500 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1501 /// and the value fits into a signed 8-bit constant, and if so, return the
1502 /// constant.
1503 ///
1504 /// @note: The incoming vector is v16i8 because that's the only way we can load
1505 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1506 /// same value.
1507 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1508                              EVT ValueType) {
1509   if (ConstantSDNode *CN = getVecImm(N)) {
1510     int Value = (int) CN->getZExtValue();
1511     if (ValueType == MVT::i16
1512         && Value <= 0xffff                 /* truncated from uint64_t */
1513         && ((short) Value >> 8) == ((short) Value & 0xff))
1514       return DAG.getTargetConstant(Value & 0xff, ValueType);
1515     else if (ValueType == MVT::i8
1516              && (Value & 0xff) == Value)
1517       return DAG.getTargetConstant(Value, ValueType);
1518   }
1519
1520   return SDValue();
1521 }
1522
1523 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1524 /// and the value fits into a signed 16-bit constant, and if so, return the
1525 /// constant
1526 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1527                                EVT ValueType) {
1528   if (ConstantSDNode *CN = getVecImm(N)) {
1529     uint64_t Value = CN->getZExtValue();
1530     if ((ValueType == MVT::i32
1531           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1532         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1533       return DAG.getTargetConstant(Value >> 16, ValueType);
1534   }
1535
1536   return SDValue();
1537 }
1538
1539 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1540 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1541   if (ConstantSDNode *CN = getVecImm(N)) {
1542     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1543   }
1544
1545   return SDValue();
1546 }
1547
1548 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1549 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1550   if (ConstantSDNode *CN = getVecImm(N)) {
1551     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1552   }
1553
1554   return SDValue();
1555 }
1556
1557 //! Lower a BUILD_VECTOR instruction creatively:
1558 static SDValue
1559 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1560   EVT VT = Op.getValueType();
1561   EVT EltVT = VT.getVectorElementType();
1562   DebugLoc dl = Op.getDebugLoc();
1563   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1564   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1565   unsigned minSplatBits = EltVT.getSizeInBits();
1566
1567   if (minSplatBits < 16)
1568     minSplatBits = 16;
1569
1570   APInt APSplatBits, APSplatUndef;
1571   unsigned SplatBitSize;
1572   bool HasAnyUndefs;
1573
1574   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1575                             HasAnyUndefs, minSplatBits)
1576       || minSplatBits < SplatBitSize)
1577     return SDValue();   // Wasn't a constant vector or splat exceeded min
1578
1579   uint64_t SplatBits = APSplatBits.getZExtValue();
1580
1581   switch (VT.getSimpleVT().SimpleTy) {
1582   default:
1583     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1584                        Twine(VT.getEVTString()));
1585     /*NOTREACHED*/
1586   case MVT::v4f32: {
1587     uint32_t Value32 = uint32_t(SplatBits);
1588     assert(SplatBitSize == 32
1589            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1590     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591     SDValue T = DAG.getConstant(Value32, MVT::i32);
1592     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1593                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1594     break;
1595   }
1596   case MVT::v2f64: {
1597     uint64_t f64val = uint64_t(SplatBits);
1598     assert(SplatBitSize == 64
1599            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1600     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1601     SDValue T = DAG.getConstant(f64val, MVT::i64);
1602     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1603                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1604     break;
1605   }
1606   case MVT::v16i8: {
1607    // 8-bit constants have to be expanded to 16-bits
1608    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1609    SmallVector<SDValue, 8> Ops;
1610
1611    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1612    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1613                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1614   }
1615   case MVT::v8i16: {
1616     unsigned short Value16 = SplatBits;
1617     SDValue T = DAG.getConstant(Value16, EltVT);
1618     SmallVector<SDValue, 8> Ops;
1619
1620     Ops.assign(8, T);
1621     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1622   }
1623   case MVT::v4i32: {
1624     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1625     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1626   }
1627   case MVT::v2f32:
1628   case MVT::v2i32: {
1629     return SDValue();
1630   }
1631   case MVT::v2i64: {
1632     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1633   }
1634   }
1635
1636   return SDValue();
1637 }
1638
1639 /*!
1640  */
1641 SDValue
1642 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1643                      DebugLoc dl) {
1644   uint32_t upper = uint32_t(SplatVal >> 32);
1645   uint32_t lower = uint32_t(SplatVal);
1646
1647   if (upper == lower) {
1648     // Magic constant that can be matched by IL, ILA, et. al.
1649     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1650     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1651                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1652                                    Val, Val, Val, Val));
1653   } else {
1654     bool upper_special, lower_special;
1655
1656     // NOTE: This code creates common-case shuffle masks that can be easily
1657     // detected as common expressions. It is not attempting to create highly
1658     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659
1660     // Detect if the upper or lower half is a special shuffle mask pattern:
1661     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1662     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1663
1664     // Both upper and lower are special, lower to a constant pool load:
1665     if (lower_special && upper_special) {
1666       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1667       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1668                          SplatValCN, SplatValCN);
1669     }
1670
1671     SDValue LO32;
1672     SDValue HI32;
1673     SmallVector<SDValue, 16> ShufBytes;
1674     SDValue Result;
1675
1676     // Create lower vector if not a special pattern
1677     if (!lower_special) {
1678       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1679       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1680                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1681                                      LO32C, LO32C, LO32C, LO32C));
1682     }
1683
1684     // Create upper vector if not a special pattern
1685     if (!upper_special) {
1686       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1687       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1688                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1689                                      HI32C, HI32C, HI32C, HI32C));
1690     }
1691
1692     // If either upper or lower are special, then the two input operands are
1693     // the same (basically, one of them is a "don't care")
1694     if (lower_special)
1695       LO32 = HI32;
1696     if (upper_special)
1697       HI32 = LO32;
1698
1699     for (int i = 0; i < 4; ++i) {
1700       uint64_t val = 0;
1701       for (int j = 0; j < 4; ++j) {
1702         SDValue V;
1703         bool process_upper, process_lower;
1704         val <<= 8;
1705         process_upper = (upper_special && (i & 1) == 0);
1706         process_lower = (lower_special && (i & 1) == 1);
1707
1708         if (process_upper || process_lower) {
1709           if ((process_upper && upper == 0)
1710                   || (process_lower && lower == 0))
1711             val |= 0x80;
1712           else if ((process_upper && upper == 0xffffffff)
1713                   || (process_lower && lower == 0xffffffff))
1714             val |= 0xc0;
1715           else if ((process_upper && upper == 0x80000000)
1716                   || (process_lower && lower == 0x80000000))
1717             val |= (j == 0 ? 0xe0 : 0x80);
1718         } else
1719           val |= i * 4 + j + ((i & 1) * 16);
1720       }
1721
1722       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1723     }
1724
1725     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1726                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1727                                    &ShufBytes[0], ShufBytes.size()));
1728   }
1729 }
1730
1731 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1732 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1733 /// permutation vector, V3, is monotonically increasing with one "exception"
1734 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1735 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1736 /// In either case, the net result is going to eventually invoke SHUFB to
1737 /// permute/shuffle the bytes from V1 and V2.
1738 /// \note
1739 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1740 /// control word for byte/halfword/word insertion. This takes care of a single
1741 /// element move from V2 into V1.
1742 /// \note
1743 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1744 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1745   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1746   SDValue V1 = Op.getOperand(0);
1747   SDValue V2 = Op.getOperand(1);
1748   DebugLoc dl = Op.getDebugLoc();
1749
1750   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1751
1752   // If we have a single element being moved from V1 to V2, this can be handled
1753   // using the C*[DX] compute mask instructions, but the vector elements have
1754   // to be monotonically increasing with one exception element.
1755   EVT VecVT = V1.getValueType();
1756   EVT EltVT = VecVT.getVectorElementType();
1757   unsigned EltsFromV2 = 0;
1758   unsigned V2Elt = 0;
1759   unsigned V2EltIdx0 = 0;
1760   unsigned CurrElt = 0;
1761   unsigned MaxElts = VecVT.getVectorNumElements();
1762   unsigned PrevElt = 0;
1763   unsigned V0Elt = 0;
1764   bool monotonic = true;
1765   bool rotate = true;
1766   EVT maskVT;             // which of the c?d instructions to use
1767
1768   if (EltVT == MVT::i8) {
1769     V2EltIdx0 = 16;
1770     maskVT = MVT::v16i8;
1771   } else if (EltVT == MVT::i16) {
1772     V2EltIdx0 = 8;
1773     maskVT = MVT::v8i16;
1774   } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) {
1775     V2EltIdx0 = 2;
1776     maskVT = MVT::v4i32;
1777   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1778     V2EltIdx0 = 4;
1779     maskVT = MVT::v4i32;
1780   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1781     V2EltIdx0 = 2;
1782     maskVT = MVT::v2i64;
1783   } else
1784     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1785
1786   for (unsigned i = 0; i != MaxElts; ++i) {
1787     if (SVN->getMaskElt(i) < 0)
1788       continue;
1789
1790     unsigned SrcElt = SVN->getMaskElt(i);
1791
1792     if (monotonic) {
1793       if (SrcElt >= V2EltIdx0) {
1794         if (1 >= (++EltsFromV2)) {
1795           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1796         }
1797       } else if (CurrElt != SrcElt) {
1798         monotonic = false;
1799       }
1800
1801       ++CurrElt;
1802     }
1803
1804     if (rotate) {
1805       if (PrevElt > 0 && SrcElt < MaxElts) {
1806         if ((PrevElt == SrcElt - 1)
1807             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1808           PrevElt = SrcElt;
1809           if (SrcElt == 0)
1810             V0Elt = i;
1811         } else {
1812           rotate = false;
1813         }
1814       } else if (i == 0) {
1815         // First time through, need to keep track of previous element
1816         PrevElt = SrcElt;
1817       } else {
1818         // This isn't a rotation, takes elements from vector 2
1819         rotate = false;
1820       }
1821     }
1822   }
1823
1824   if (EltsFromV2 == 1 && monotonic) {
1825     // Compute mask and shuffle
1826     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827
1828     // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1829     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1830     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1831                                 DAG.getRegister(SPU::R1, PtrVT),
1832                                 DAG.getConstant(V2Elt, MVT::i32));
1833     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1834                                      maskVT, Pointer);
1835
1836     // Use shuffle mask in SHUFB synthetic instruction:
1837     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1838                        ShufMaskOp);
1839   } else if (rotate) {
1840     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1841
1842     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1843                        V1, DAG.getConstant(rotamt, MVT::i16));
1844   } else {
1845    // Convert the SHUFFLE_VECTOR mask's input element units to the
1846    // actual bytes.
1847     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1848
1849     SmallVector<SDValue, 16> ResultMask;
1850     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1851       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1852
1853       for (unsigned j = 0; j < BytesPerElement; ++j)
1854         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1855     }
1856     // For half vectors padd the mask with zeros for the second half.
1857     // This is needed because mask is assumed to be full vector elsewhere in
1858     // the SPU backend.
1859     if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32)
1860     for( unsigned i = 0; i < 2; ++i )
1861     {
1862       for (unsigned j = 0; j < BytesPerElement; ++j)
1863         ResultMask.push_back(DAG.getConstant(0,MVT::i8));
1864     }
1865
1866     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1867                                     &ResultMask[0], ResultMask.size());
1868     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1869   }
1870 }
1871
1872 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1873   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1874   DebugLoc dl = Op.getDebugLoc();
1875
1876   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1877     // For a constant, build the appropriate constant vector, which will
1878     // eventually simplify to a vector register load.
1879
1880     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1881     SmallVector<SDValue, 16> ConstVecValues;
1882     EVT VT;
1883     size_t n_copies;
1884
1885     // Create a constant vector:
1886     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1887     default: llvm_unreachable("Unexpected constant value type in "
1888                               "LowerSCALAR_TO_VECTOR");
1889     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1890     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1891     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1892     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1893     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1894     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1895     case MVT::v2i32: n_copies = 2; VT = MVT::i32; break;
1896     }
1897
1898     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1899     for (size_t j = 0; j < n_copies; ++j)
1900       ConstVecValues.push_back(CValue);
1901
1902     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1903                        &ConstVecValues[0], ConstVecValues.size());
1904   } else {
1905     // Otherwise, copy the value from one register to another:
1906     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1907     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1908     case MVT::i8:
1909     case MVT::i16:
1910     case MVT::i32:
1911     case MVT::i64:
1912     case MVT::f32:
1913     case MVT::f64:
1914       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1915     }
1916   }
1917
1918   return SDValue();
1919 }
1920
1921 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1922   EVT VT = Op.getValueType();
1923   SDValue N = Op.getOperand(0);
1924   SDValue Elt = Op.getOperand(1);
1925   DebugLoc dl = Op.getDebugLoc();
1926   SDValue retval;
1927
1928   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1929     // Constant argument:
1930     int EltNo = (int) C->getZExtValue();
1931
1932     // sanity checks:
1933     if (VT == MVT::i8 && EltNo >= 16)
1934       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1935     else if (VT == MVT::i16 && EltNo >= 8)
1936       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1937     else if (VT == MVT::i32 && EltNo >= 4)
1938       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1939     else if (VT == MVT::i64 && EltNo >= 2)
1940       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1941
1942     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1943       // i32 and i64: Element 0 is the preferred slot
1944       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1945     }
1946
1947     // Need to generate shuffle mask and extract:
1948     int prefslot_begin = -1, prefslot_end = -1;
1949     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1950
1951     switch (VT.getSimpleVT().SimpleTy) {
1952     default:
1953       assert(false && "Invalid value type!");
1954     case MVT::i8: {
1955       prefslot_begin = prefslot_end = 3;
1956       break;
1957     }
1958     case MVT::i16: {
1959       prefslot_begin = 2; prefslot_end = 3;
1960       break;
1961     }
1962     case MVT::i32:
1963     case MVT::f32: {
1964       prefslot_begin = 0; prefslot_end = 3;
1965       break;
1966     }
1967     case MVT::i64:
1968     case MVT::f64: {
1969       prefslot_begin = 0; prefslot_end = 7;
1970       break;
1971     }
1972     }
1973
1974     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1975            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1976
1977     unsigned int ShufBytes[16] = {
1978       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1979     };
1980     for (int i = 0; i < 16; ++i) {
1981       // zero fill uppper part of preferred slot, don't care about the
1982       // other slots:
1983       unsigned int mask_val;
1984       if (i <= prefslot_end) {
1985         mask_val =
1986           ((i < prefslot_begin)
1987            ? 0x80
1988            : elt_byte + (i - prefslot_begin));
1989
1990         ShufBytes[i] = mask_val;
1991       } else
1992         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1993     }
1994
1995     SDValue ShufMask[4];
1996     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1997       unsigned bidx = i * 4;
1998       unsigned int bits = ((ShufBytes[bidx] << 24) |
1999                            (ShufBytes[bidx+1] << 16) |
2000                            (ShufBytes[bidx+2] << 8) |
2001                            ShufBytes[bidx+3]);
2002       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2003     }
2004
2005     SDValue ShufMaskVec =
2006       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2007                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2008
2009     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2010                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2011                                      N, N, ShufMaskVec));
2012   } else {
2013     // Variable index: Rotate the requested element into slot 0, then replicate
2014     // slot 0 across the vector
2015     EVT VecVT = N.getValueType();
2016     if (!VecVT.isSimple() || !VecVT.isVector()) {
2017       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2018                         "vector type!");
2019     }
2020
2021     // Make life easier by making sure the index is zero-extended to i32
2022     if (Elt.getValueType() != MVT::i32)
2023       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2024
2025     // Scale the index to a bit/byte shift quantity
2026     APInt scaleFactor =
2027             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2028     unsigned scaleShift = scaleFactor.logBase2();
2029     SDValue vecShift;
2030
2031     if (scaleShift > 0) {
2032       // Scale the shift factor:
2033       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2034                         DAG.getConstant(scaleShift, MVT::i32));
2035     }
2036
2037     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2038
2039     // Replicate the bytes starting at byte 0 across the entire vector (for
2040     // consistency with the notion of a unified register set)
2041     SDValue replicate;
2042
2043     switch (VT.getSimpleVT().SimpleTy) {
2044     default:
2045       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2046                         "type");
2047       /*NOTREACHED*/
2048     case MVT::i8: {
2049       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2050       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2051                               factor, factor, factor, factor);
2052       break;
2053     }
2054     case MVT::i16: {
2055       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2056       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2057                               factor, factor, factor, factor);
2058       break;
2059     }
2060     case MVT::i32:
2061     case MVT::f32: {
2062       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2063       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2064                               factor, factor, factor, factor);
2065       break;
2066     }
2067     case MVT::i64:
2068     case MVT::f64: {
2069       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2070       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2071       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2072                               loFactor, hiFactor, loFactor, hiFactor);
2073       break;
2074     }
2075     }
2076
2077     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2078                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2079                                      vecShift, vecShift, replicate));
2080   }
2081
2082   return retval;
2083 }
2084
2085 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2086   SDValue VecOp = Op.getOperand(0);
2087   SDValue ValOp = Op.getOperand(1);
2088   SDValue IdxOp = Op.getOperand(2);
2089   DebugLoc dl = Op.getDebugLoc();
2090   EVT VT = Op.getValueType();
2091
2092   // use 0 when the lane to insert to is 'undef'
2093   int64_t Idx=0;
2094   if (IdxOp.getOpcode() != ISD::UNDEF) {
2095     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2096     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2097     Idx = (CN->getSExtValue());
2098   }
2099
2100   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2101   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2102   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2103                                 DAG.getRegister(SPU::R1, PtrVT),
2104                                 DAG.getConstant(Idx, PtrVT));
2105   // widen the mask when dealing with half vectors
2106   EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2107                                 128/ VT.getVectorElementType().getSizeInBits());
2108   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2109
2110   SDValue result =
2111     DAG.getNode(SPUISD::SHUFB, dl, VT,
2112                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2113                 VecOp,
2114                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2115
2116   return result;
2117 }
2118
2119 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2120                            const TargetLowering &TLI)
2121 {
2122   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2123   DebugLoc dl = Op.getDebugLoc();
2124   EVT ShiftVT = TLI.getShiftAmountTy();
2125
2126   assert(Op.getValueType() == MVT::i8);
2127   switch (Opc) {
2128   default:
2129     llvm_unreachable("Unhandled i8 math operator");
2130     /*NOTREACHED*/
2131     break;
2132   case ISD::ADD: {
2133     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2134     // the result:
2135     SDValue N1 = Op.getOperand(1);
2136     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2137     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2138     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2139                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2140
2141   }
2142
2143   case ISD::SUB: {
2144     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2145     // the result:
2146     SDValue N1 = Op.getOperand(1);
2147     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2148     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2149     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2150                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2151   }
2152   case ISD::ROTR:
2153   case ISD::ROTL: {
2154     SDValue N1 = Op.getOperand(1);
2155     EVT N1VT = N1.getValueType();
2156
2157     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2158     if (!N1VT.bitsEq(ShiftVT)) {
2159       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2160                        ? ISD::ZERO_EXTEND
2161                        : ISD::TRUNCATE;
2162       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2163     }
2164
2165     // Replicate lower 8-bits into upper 8:
2166     SDValue ExpandArg =
2167       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2168                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2169                               N0, DAG.getConstant(8, MVT::i32)));
2170
2171     // Truncate back down to i8
2172     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2173                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2174   }
2175   case ISD::SRL:
2176   case ISD::SHL: {
2177     SDValue N1 = Op.getOperand(1);
2178     EVT N1VT = N1.getValueType();
2179
2180     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2181     if (!N1VT.bitsEq(ShiftVT)) {
2182       unsigned N1Opc = ISD::ZERO_EXTEND;
2183
2184       if (N1.getValueType().bitsGT(ShiftVT))
2185         N1Opc = ISD::TRUNCATE;
2186
2187       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2188     }
2189
2190     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2191                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2192   }
2193   case ISD::SRA: {
2194     SDValue N1 = Op.getOperand(1);
2195     EVT N1VT = N1.getValueType();
2196
2197     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2198     if (!N1VT.bitsEq(ShiftVT)) {
2199       unsigned N1Opc = ISD::SIGN_EXTEND;
2200
2201       if (N1VT.bitsGT(ShiftVT))
2202         N1Opc = ISD::TRUNCATE;
2203       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2204     }
2205
2206     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2207                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2208   }
2209   case ISD::MUL: {
2210     SDValue N1 = Op.getOperand(1);
2211
2212     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2213     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2214     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2215                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2216     break;
2217   }
2218   }
2219
2220   return SDValue();
2221 }
2222
2223 //! Lower byte immediate operations for v16i8 vectors:
2224 static SDValue
2225 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2226   SDValue ConstVec;
2227   SDValue Arg;
2228   EVT VT = Op.getValueType();
2229   DebugLoc dl = Op.getDebugLoc();
2230
2231   ConstVec = Op.getOperand(0);
2232   Arg = Op.getOperand(1);
2233   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2234     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2235       ConstVec = ConstVec.getOperand(0);
2236     } else {
2237       ConstVec = Op.getOperand(1);
2238       Arg = Op.getOperand(0);
2239       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2240         ConstVec = ConstVec.getOperand(0);
2241       }
2242     }
2243   }
2244
2245   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2246     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2247     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2248
2249     APInt APSplatBits, APSplatUndef;
2250     unsigned SplatBitSize;
2251     bool HasAnyUndefs;
2252     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2253
2254     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2255                               HasAnyUndefs, minSplatBits)
2256         && minSplatBits <= SplatBitSize) {
2257       uint64_t SplatBits = APSplatBits.getZExtValue();
2258       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2259
2260       SmallVector<SDValue, 16> tcVec;
2261       tcVec.assign(16, tc);
2262       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2263                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2264     }
2265   }
2266
2267   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2268   // lowered.  Return the operation, rather than a null SDValue.
2269   return Op;
2270 }
2271
2272 //! Custom lowering for CTPOP (count population)
2273 /*!
2274   Custom lowering code that counts the number ones in the input
2275   operand. SPU has such an instruction, but it counts the number of
2276   ones per byte, which then have to be accumulated.
2277 */
2278 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2279   EVT VT = Op.getValueType();
2280   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2281                                VT, (128 / VT.getSizeInBits()));
2282   DebugLoc dl = Op.getDebugLoc();
2283
2284   switch (VT.getSimpleVT().SimpleTy) {
2285   default:
2286     assert(false && "Invalid value type!");
2287   case MVT::i8: {
2288     SDValue N = Op.getOperand(0);
2289     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2290
2291     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2292     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2293
2294     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2295   }
2296
2297   case MVT::i16: {
2298     MachineFunction &MF = DAG.getMachineFunction();
2299     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2300
2301     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2302
2303     SDValue N = Op.getOperand(0);
2304     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2305     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2306     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2307
2308     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2309     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2310
2311     // CNTB_result becomes the chain to which all of the virtual registers
2312     // CNTB_reg, SUM1_reg become associated:
2313     SDValue CNTB_result =
2314       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2315
2316     SDValue CNTB_rescopy =
2317       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2318
2319     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2320
2321     return DAG.getNode(ISD::AND, dl, MVT::i16,
2322                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2323                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2324                                                Tmp1, Shift1),
2325                                    Tmp1),
2326                        Mask0);
2327   }
2328
2329   case MVT::i32: {
2330     MachineFunction &MF = DAG.getMachineFunction();
2331     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2332
2333     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2334     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2335
2336     SDValue N = Op.getOperand(0);
2337     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2338     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2339     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2340     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2341
2342     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2343     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2344
2345     // CNTB_result becomes the chain to which all of the virtual registers
2346     // CNTB_reg, SUM1_reg become associated:
2347     SDValue CNTB_result =
2348       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2349
2350     SDValue CNTB_rescopy =
2351       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2352
2353     SDValue Comp1 =
2354       DAG.getNode(ISD::SRL, dl, MVT::i32,
2355                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2356                   Shift1);
2357
2358     SDValue Sum1 =
2359       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2360                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2361
2362     SDValue Sum1_rescopy =
2363       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2364
2365     SDValue Comp2 =
2366       DAG.getNode(ISD::SRL, dl, MVT::i32,
2367                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2368                   Shift2);
2369     SDValue Sum2 =
2370       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2371                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2372
2373     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2374   }
2375
2376   case MVT::i64:
2377     break;
2378   }
2379
2380   return SDValue();
2381 }
2382
2383 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2384 /*!
2385  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2386  All conversions to i64 are expanded to a libcall.
2387  */
2388 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2389                               const SPUTargetLowering &TLI) {
2390   EVT OpVT = Op.getValueType();
2391   SDValue Op0 = Op.getOperand(0);
2392   EVT Op0VT = Op0.getValueType();
2393
2394   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2395       || OpVT == MVT::i64) {
2396     // Convert f32 / f64 to i32 / i64 via libcall.
2397     RTLIB::Libcall LC =
2398             (Op.getOpcode() == ISD::FP_TO_SINT)
2399              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2400              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2401     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2402     SDValue Dummy;
2403     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2404   }
2405
2406   return Op;
2407 }
2408
2409 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2410 /*!
2411  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2412  All conversions from i64 are expanded to a libcall.
2413  */
2414 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2415                               const SPUTargetLowering &TLI) {
2416   EVT OpVT = Op.getValueType();
2417   SDValue Op0 = Op.getOperand(0);
2418   EVT Op0VT = Op0.getValueType();
2419
2420   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2421       || Op0VT == MVT::i64) {
2422     // Convert i32, i64 to f64 via libcall:
2423     RTLIB::Libcall LC =
2424             (Op.getOpcode() == ISD::SINT_TO_FP)
2425              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2426              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2427     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2428     SDValue Dummy;
2429     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2430   }
2431
2432   return Op;
2433 }
2434
2435 //! Lower ISD::SETCC
2436 /*!
2437  This handles MVT::f64 (double floating point) condition lowering
2438  */
2439 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2440                           const TargetLowering &TLI) {
2441   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2442   DebugLoc dl = Op.getDebugLoc();
2443   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2444
2445   SDValue lhs = Op.getOperand(0);
2446   SDValue rhs = Op.getOperand(1);
2447   EVT lhsVT = lhs.getValueType();
2448   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2449
2450   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2451   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2452   EVT IntVT(MVT::i64);
2453
2454   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2455   // selected to a NOP:
2456   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2457   SDValue lhsHi32 =
2458           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2459                       DAG.getNode(ISD::SRL, dl, IntVT,
2460                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2461   SDValue lhsHi32abs =
2462           DAG.getNode(ISD::AND, dl, MVT::i32,
2463                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2464   SDValue lhsLo32 =
2465           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2466
2467   // SETO and SETUO only use the lhs operand:
2468   if (CC->get() == ISD::SETO) {
2469     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2470     // SETUO
2471     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2472     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2473                        DAG.getSetCC(dl, ccResultVT,
2474                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2475                                     ISD::SETUO),
2476                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2477   } else if (CC->get() == ISD::SETUO) {
2478     // Evaluates to true if Op0 is [SQ]NaN
2479     return DAG.getNode(ISD::AND, dl, ccResultVT,
2480                        DAG.getSetCC(dl, ccResultVT,
2481                                     lhsHi32abs,
2482                                     DAG.getConstant(0x7ff00000, MVT::i32),
2483                                     ISD::SETGE),
2484                        DAG.getSetCC(dl, ccResultVT,
2485                                     lhsLo32,
2486                                     DAG.getConstant(0, MVT::i32),
2487                                     ISD::SETGT));
2488   }
2489
2490   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2491   SDValue rhsHi32 =
2492           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2493                       DAG.getNode(ISD::SRL, dl, IntVT,
2494                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2495
2496   // If a value is negative, subtract from the sign magnitude constant:
2497   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2498
2499   // Convert the sign-magnitude representation into 2's complement:
2500   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2501                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2502   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2503   SDValue lhsSelect =
2504           DAG.getNode(ISD::SELECT, dl, IntVT,
2505                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2506
2507   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2508                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2509   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2510   SDValue rhsSelect =
2511           DAG.getNode(ISD::SELECT, dl, IntVT,
2512                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2513
2514   unsigned compareOp;
2515
2516   switch (CC->get()) {
2517   case ISD::SETOEQ:
2518   case ISD::SETUEQ:
2519     compareOp = ISD::SETEQ; break;
2520   case ISD::SETOGT:
2521   case ISD::SETUGT:
2522     compareOp = ISD::SETGT; break;
2523   case ISD::SETOGE:
2524   case ISD::SETUGE:
2525     compareOp = ISD::SETGE; break;
2526   case ISD::SETOLT:
2527   case ISD::SETULT:
2528     compareOp = ISD::SETLT; break;
2529   case ISD::SETOLE:
2530   case ISD::SETULE:
2531     compareOp = ISD::SETLE; break;
2532   case ISD::SETUNE:
2533   case ISD::SETONE:
2534     compareOp = ISD::SETNE; break;
2535   default:
2536     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2537   }
2538
2539   SDValue result =
2540           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2541                        (ISD::CondCode) compareOp);
2542
2543   if ((CC->get() & 0x8) == 0) {
2544     // Ordered comparison:
2545     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2546                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2547                                   ISD::SETO);
2548     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2549                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2550                                   ISD::SETO);
2551     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2552
2553     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2554   }
2555
2556   return result;
2557 }
2558
2559 //! Lower ISD::SELECT_CC
2560 /*!
2561   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2562   SELB instruction.
2563
2564   \note Need to revisit this in the future: if the code path through the true
2565   and false value computations is longer than the latency of a branch (6
2566   cycles), then it would be more advantageous to branch and insert a new basic
2567   block and branch on the condition. However, this code does not make that
2568   assumption, given the simplisitc uses so far.
2569  */
2570
2571 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2572                               const TargetLowering &TLI) {
2573   EVT VT = Op.getValueType();
2574   SDValue lhs = Op.getOperand(0);
2575   SDValue rhs = Op.getOperand(1);
2576   SDValue trueval = Op.getOperand(2);
2577   SDValue falseval = Op.getOperand(3);
2578   SDValue condition = Op.getOperand(4);
2579   DebugLoc dl = Op.getDebugLoc();
2580
2581   // NOTE: SELB's arguments: $rA, $rB, $mask
2582   //
2583   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2584   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2585   // condition was true and 0s where the condition was false. Hence, the
2586   // arguments to SELB get reversed.
2587
2588   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2589   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2590   // with another "cannot select select_cc" assert:
2591
2592   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2593                                 TLI.getSetCCResultType(Op.getValueType()),
2594                                 lhs, rhs, condition);
2595   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2596 }
2597
2598 //! Custom lower ISD::TRUNCATE
2599 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2600 {
2601   // Type to truncate to
2602   EVT VT = Op.getValueType();
2603   MVT simpleVT = VT.getSimpleVT();
2604   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2605                                VT, (128 / VT.getSizeInBits()));
2606   DebugLoc dl = Op.getDebugLoc();
2607
2608   // Type to truncate from
2609   SDValue Op0 = Op.getOperand(0);
2610   EVT Op0VT = Op0.getValueType();
2611
2612   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2613     // Create shuffle mask, least significant doubleword of quadword
2614     unsigned maskHigh = 0x08090a0b;
2615     unsigned maskLow = 0x0c0d0e0f;
2616     // Use a shuffle to perform the truncation
2617     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2618                                    DAG.getConstant(maskHigh, MVT::i32),
2619                                    DAG.getConstant(maskLow, MVT::i32),
2620                                    DAG.getConstant(maskHigh, MVT::i32),
2621                                    DAG.getConstant(maskLow, MVT::i32));
2622
2623     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2624                                        Op0, Op0, shufMask);
2625
2626     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2627   }
2628
2629   return SDValue();             // Leave the truncate unmolested
2630 }
2631
2632 /*!
2633  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2634  * algorithm is to duplicate the sign bit using rotmai to generate at
2635  * least one byte full of sign bits. Then propagate the "sign-byte" into
2636  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2637  *
2638  * @param Op The sext operand
2639  * @param DAG The current DAG
2640  * @return The SDValue with the entire instruction sequence
2641  */
2642 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2643 {
2644   DebugLoc dl = Op.getDebugLoc();
2645
2646   // Type to extend to
2647   MVT OpVT = Op.getValueType().getSimpleVT();
2648
2649   // Type to extend from
2650   SDValue Op0 = Op.getOperand(0);
2651   MVT Op0VT = Op0.getValueType().getSimpleVT();
2652
2653   // The type to extend to needs to be a i128 and
2654   // the type to extend from needs to be i64 or i32.
2655   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2656           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2657
2658   // Create shuffle mask
2659   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2660   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2661   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2662   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2663                                  DAG.getConstant(mask1, MVT::i32),
2664                                  DAG.getConstant(mask1, MVT::i32),
2665                                  DAG.getConstant(mask2, MVT::i32),
2666                                  DAG.getConstant(mask3, MVT::i32));
2667
2668   // Word wise arithmetic right shift to generate at least one byte
2669   // that contains sign bits.
2670   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2671   SDValue sraVal = DAG.getNode(ISD::SRA,
2672                  dl,
2673                  mvt,
2674                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2675                  DAG.getConstant(31, MVT::i32));
2676
2677   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2678   // and the input value into the lower 64 bits.
2679   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2680       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2681
2682   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2683 }
2684
2685 //! Custom (target-specific) lowering entry point
2686 /*!
2687   This is where LLVM's DAG selection process calls to do target-specific
2688   lowering of nodes.
2689  */
2690 SDValue
2691 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2692 {
2693   unsigned Opc = (unsigned) Op.getOpcode();
2694   EVT VT = Op.getValueType();
2695
2696   switch (Opc) {
2697   default: {
2698 #ifndef NDEBUG
2699     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2700     errs() << "Op.getOpcode() = " << Opc << "\n";
2701     errs() << "*Op.getNode():\n";
2702     Op.getNode()->dump();
2703 #endif
2704     llvm_unreachable(0);
2705   }
2706   case ISD::LOAD:
2707   case ISD::EXTLOAD:
2708   case ISD::SEXTLOAD:
2709   case ISD::ZEXTLOAD:
2710     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2711   case ISD::STORE:
2712     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2713   case ISD::ConstantPool:
2714     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2715   case ISD::GlobalAddress:
2716     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2717   case ISD::JumpTable:
2718     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2719   case ISD::ConstantFP:
2720     return LowerConstantFP(Op, DAG);
2721
2722   // i8, i64 math ops:
2723   case ISD::ADD:
2724   case ISD::SUB:
2725   case ISD::ROTR:
2726   case ISD::ROTL:
2727   case ISD::SRL:
2728   case ISD::SHL:
2729   case ISD::SRA: {
2730     if (VT == MVT::i8)
2731       return LowerI8Math(Op, DAG, Opc, *this);
2732     break;
2733   }
2734
2735   case ISD::FP_TO_SINT:
2736   case ISD::FP_TO_UINT:
2737     return LowerFP_TO_INT(Op, DAG, *this);
2738
2739   case ISD::SINT_TO_FP:
2740   case ISD::UINT_TO_FP:
2741     return LowerINT_TO_FP(Op, DAG, *this);
2742
2743   // Vector-related lowering.
2744   case ISD::BUILD_VECTOR:
2745     return LowerBUILD_VECTOR(Op, DAG);
2746   case ISD::SCALAR_TO_VECTOR:
2747     return LowerSCALAR_TO_VECTOR(Op, DAG);
2748   case ISD::VECTOR_SHUFFLE:
2749     return LowerVECTOR_SHUFFLE(Op, DAG);
2750   case ISD::EXTRACT_VECTOR_ELT:
2751     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2752   case ISD::INSERT_VECTOR_ELT:
2753     return LowerINSERT_VECTOR_ELT(Op, DAG);
2754
2755   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2756   case ISD::AND:
2757   case ISD::OR:
2758   case ISD::XOR:
2759     return LowerByteImmed(Op, DAG);
2760
2761   // Vector and i8 multiply:
2762   case ISD::MUL:
2763     if (VT == MVT::i8)
2764       return LowerI8Math(Op, DAG, Opc, *this);
2765
2766   case ISD::CTPOP:
2767     return LowerCTPOP(Op, DAG);
2768
2769   case ISD::SELECT_CC:
2770     return LowerSELECT_CC(Op, DAG, *this);
2771
2772   case ISD::SETCC:
2773     return LowerSETCC(Op, DAG, *this);
2774
2775   case ISD::TRUNCATE:
2776     return LowerTRUNCATE(Op, DAG);
2777
2778   case ISD::SIGN_EXTEND:
2779     return LowerSIGN_EXTEND(Op, DAG);
2780   }
2781
2782   return SDValue();
2783 }
2784
2785 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2786                                            SmallVectorImpl<SDValue>&Results,
2787                                            SelectionDAG &DAG) const
2788 {
2789 #if 0
2790   unsigned Opc = (unsigned) N->getOpcode();
2791   EVT OpVT = N->getValueType(0);
2792
2793   switch (Opc) {
2794   default: {
2795     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2796     errs() << "Op.getOpcode() = " << Opc << "\n";
2797     errs() << "*Op.getNode():\n";
2798     N->dump();
2799     abort();
2800     /*NOTREACHED*/
2801   }
2802   }
2803 #endif
2804
2805   /* Otherwise, return unchanged */
2806 }
2807
2808 //===----------------------------------------------------------------------===//
2809 // Target Optimization Hooks
2810 //===----------------------------------------------------------------------===//
2811
2812 SDValue
2813 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2814 {
2815 #if 0
2816   TargetMachine &TM = getTargetMachine();
2817 #endif
2818   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2819   SelectionDAG &DAG = DCI.DAG;
2820   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2821   EVT NodeVT = N->getValueType(0);      // The node's value type
2822   EVT Op0VT = Op0.getValueType();       // The first operand's result
2823   SDValue Result;                       // Initially, empty result
2824   DebugLoc dl = N->getDebugLoc();
2825
2826   switch (N->getOpcode()) {
2827   default: break;
2828   case ISD::ADD: {
2829     SDValue Op1 = N->getOperand(1);
2830
2831     if (Op0.getOpcode() == SPUISD::IndirectAddr
2832         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2833       // Normalize the operands to reduce repeated code
2834       SDValue IndirectArg = Op0, AddArg = Op1;
2835
2836       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2837         IndirectArg = Op1;
2838         AddArg = Op0;
2839       }
2840
2841       if (isa<ConstantSDNode>(AddArg)) {
2842         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2843         SDValue IndOp1 = IndirectArg.getOperand(1);
2844
2845         if (CN0->isNullValue()) {
2846           // (add (SPUindirect <arg>, <arg>), 0) ->
2847           // (SPUindirect <arg>, <arg>)
2848
2849 #if !defined(NDEBUG)
2850           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2851             errs() << "\n"
2852                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2853                  << "With:    (SPUindirect <arg>, <arg>)\n";
2854           }
2855 #endif
2856
2857           return IndirectArg;
2858         } else if (isa<ConstantSDNode>(IndOp1)) {
2859           // (add (SPUindirect <arg>, <const>), <const>) ->
2860           // (SPUindirect <arg>, <const + const>)
2861           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2862           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2863           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2864
2865 #if !defined(NDEBUG)
2866           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2867             errs() << "\n"
2868                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2869                  << "), " << CN0->getSExtValue() << ")\n"
2870                  << "With:    (SPUindirect <arg>, "
2871                  << combinedConst << ")\n";
2872           }
2873 #endif
2874
2875           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2876                              IndirectArg, combinedValue);
2877         }
2878       }
2879     }
2880     break;
2881   }
2882   case ISD::SIGN_EXTEND:
2883   case ISD::ZERO_EXTEND:
2884   case ISD::ANY_EXTEND: {
2885     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2886       // (any_extend (SPUextract_elt0 <arg>)) ->
2887       // (SPUextract_elt0 <arg>)
2888       // Types must match, however...
2889 #if !defined(NDEBUG)
2890       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2891         errs() << "\nReplace: ";
2892         N->dump(&DAG);
2893         errs() << "\nWith:    ";
2894         Op0.getNode()->dump(&DAG);
2895         errs() << "\n";
2896       }
2897 #endif
2898
2899       return Op0;
2900     }
2901     break;
2902   }
2903   case SPUISD::IndirectAddr: {
2904     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2905       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2906       if (CN != 0 && CN->isNullValue()) {
2907         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2908         // (SPUaform <addr>, 0)
2909
2910         DEBUG(errs() << "Replace: ");
2911         DEBUG(N->dump(&DAG));
2912         DEBUG(errs() << "\nWith:    ");
2913         DEBUG(Op0.getNode()->dump(&DAG));
2914         DEBUG(errs() << "\n");
2915
2916         return Op0;
2917       }
2918     } else if (Op0.getOpcode() == ISD::ADD) {
2919       SDValue Op1 = N->getOperand(1);
2920       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2921         // (SPUindirect (add <arg>, <arg>), 0) ->
2922         // (SPUindirect <arg>, <arg>)
2923         if (CN1->isNullValue()) {
2924
2925 #if !defined(NDEBUG)
2926           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2927             errs() << "\n"
2928                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2929                  << "With:    (SPUindirect <arg>, <arg>)\n";
2930           }
2931 #endif
2932
2933           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2934                              Op0.getOperand(0), Op0.getOperand(1));
2935         }
2936       }
2937     }
2938     break;
2939   }
2940   case SPUISD::SHLQUAD_L_BITS:
2941   case SPUISD::SHLQUAD_L_BYTES:
2942   case SPUISD::ROTBYTES_LEFT: {
2943     SDValue Op1 = N->getOperand(1);
2944
2945     // Kill degenerate vector shifts:
2946     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2947       if (CN->isNullValue()) {
2948         Result = Op0;
2949       }
2950     }
2951     break;
2952   }
2953   case SPUISD::PREFSLOT2VEC: {
2954     switch (Op0.getOpcode()) {
2955     default:
2956       break;
2957     case ISD::ANY_EXTEND:
2958     case ISD::ZERO_EXTEND:
2959     case ISD::SIGN_EXTEND: {
2960       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2961       // <arg>
2962       // but only if the SPUprefslot2vec and <arg> types match.
2963       SDValue Op00 = Op0.getOperand(0);
2964       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2965         SDValue Op000 = Op00.getOperand(0);
2966         if (Op000.getValueType() == NodeVT) {
2967           Result = Op000;
2968         }
2969       }
2970       break;
2971     }
2972     case SPUISD::VEC2PREFSLOT: {
2973       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2974       // <arg>
2975       Result = Op0.getOperand(0);
2976       break;
2977     }
2978     }
2979     break;
2980   }
2981   }
2982
2983   // Otherwise, return unchanged.
2984 #ifndef NDEBUG
2985   if (Result.getNode()) {
2986     DEBUG(errs() << "\nReplace.SPU: ");
2987     DEBUG(N->dump(&DAG));
2988     DEBUG(errs() << "\nWith:        ");
2989     DEBUG(Result.getNode()->dump(&DAG));
2990     DEBUG(errs() << "\n");
2991   }
2992 #endif
2993
2994   return Result;
2995 }
2996
2997 //===----------------------------------------------------------------------===//
2998 // Inline Assembly Support
2999 //===----------------------------------------------------------------------===//
3000
3001 /// getConstraintType - Given a constraint letter, return the type of
3002 /// constraint it is for this target.
3003 SPUTargetLowering::ConstraintType
3004 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3005   if (ConstraintLetter.size() == 1) {
3006     switch (ConstraintLetter[0]) {
3007     default: break;
3008     case 'b':
3009     case 'r':
3010     case 'f':
3011     case 'v':
3012     case 'y':
3013       return C_RegisterClass;
3014     }
3015   }
3016   return TargetLowering::getConstraintType(ConstraintLetter);
3017 }
3018
3019 std::pair<unsigned, const TargetRegisterClass*>
3020 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3021                                                 EVT VT) const
3022 {
3023   if (Constraint.size() == 1) {
3024     // GCC RS6000 Constraint Letters
3025     switch (Constraint[0]) {
3026     case 'b':   // R1-R31
3027     case 'r':   // R0-R31
3028       if (VT == MVT::i64)
3029         return std::make_pair(0U, SPU::R64CRegisterClass);
3030       return std::make_pair(0U, SPU::R32CRegisterClass);
3031     case 'f':
3032       if (VT == MVT::f32)
3033         return std::make_pair(0U, SPU::R32FPRegisterClass);
3034       else if (VT == MVT::f64)
3035         return std::make_pair(0U, SPU::R64FPRegisterClass);
3036       break;
3037     case 'v':
3038       return std::make_pair(0U, SPU::GPRCRegisterClass);
3039     }
3040   }
3041
3042   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3043 }
3044
3045 //! Compute used/known bits for a SPU operand
3046 void
3047 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3048                                                   const APInt &Mask,
3049                                                   APInt &KnownZero,
3050                                                   APInt &KnownOne,
3051                                                   const SelectionDAG &DAG,
3052                                                   unsigned Depth ) const {
3053 #if 0
3054   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3055
3056   switch (Op.getOpcode()) {
3057   default:
3058     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3059     break;
3060   case CALL:
3061   case SHUFB:
3062   case SHUFFLE_MASK:
3063   case CNTB:
3064   case SPUISD::PREFSLOT2VEC:
3065   case SPUISD::LDRESULT:
3066   case SPUISD::VEC2PREFSLOT:
3067   case SPUISD::SHLQUAD_L_BITS:
3068   case SPUISD::SHLQUAD_L_BYTES:
3069   case SPUISD::VEC_ROTL:
3070   case SPUISD::VEC_ROTR:
3071   case SPUISD::ROTBYTES_LEFT:
3072   case SPUISD::SELECT_MASK:
3073   case SPUISD::SELB:
3074   }
3075 #endif
3076 }
3077
3078 unsigned
3079 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3080                                                    unsigned Depth) const {
3081   switch (Op.getOpcode()) {
3082   default:
3083     return 1;
3084
3085   case ISD::SETCC: {
3086     EVT VT = Op.getValueType();
3087
3088     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3089       VT = MVT::i32;
3090     }
3091     return VT.getSizeInBits();
3092   }
3093   }
3094 }
3095
3096 // LowerAsmOperandForConstraint
3097 void
3098 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3099                                                 char ConstraintLetter,
3100                                                 std::vector<SDValue> &Ops,
3101                                                 SelectionDAG &DAG) const {
3102   // Default, for the time being, to the base class handler
3103   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3104 }
3105
3106 /// isLegalAddressImmediate - Return true if the integer value can be used
3107 /// as the offset of the target addressing mode.
3108 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3109                                                 const Type *Ty) const {
3110   // SPU's addresses are 256K:
3111   return (V > -(1 << 18) && V < (1 << 18) - 1);
3112 }
3113
3114 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3115   return false;
3116 }
3117
3118 bool
3119 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3120   // The SPU target isn't yet aware of offsets.
3121   return false;
3122 }