lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "SPUMachineFunction.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Intrinsics.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/Target/TargetOptions.h"
  31 #include "llvm/ADT/VectorExtras.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include "llvm/Support/raw_ostream.h"
  36 #include <map>
  37
  38 using namespace llvm;
  39
  40 // Used in getTargetNodeName() below
  41 namespace {
  42   std::map<unsigned, const char *> node_names;
  43
  44   //! EVT mapping to useful data for Cell SPU
  45   struct valtype_map_s {
  46     EVT   valtype;
  47     int   prefslot_byte;
  48   };
  49
  50   const valtype_map_s valtype_map[] = {
  51     { MVT::i1,   3 },
  52     { MVT::i8,   3 },
  53     { MVT::i16,  2 },
  54     { MVT::i32,  0 },
  55     { MVT::f32,  0 },
  56     { MVT::i64,  0 },
  57     { MVT::f64,  0 },
  58     { MVT::i128, 0 }
  59   };
  60
  61   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  62
  63   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  64     const valtype_map_s *retval = 0;
  65
  66     for (size_t i = 0; i < n_valtype_map; ++i) {
  67       if (valtype_map[i].valtype == VT) {
  68         retval = valtype_map + i;
  69         break;
  70       }
  71     }
  72
  73 #ifndef NDEBUG
  74     if (retval == 0) {
  75       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  76                          Twine(VT.getEVTString()));
  77     }
  78 #endif
  79
  80     return retval;
  81   }
  82
  83   //! Expand a library call into an actual call DAG node
  84   /*!
  85    \note
  86    This code is taken from SelectionDAGLegalize, since it is not exposed as
  87    part of the LLVM SelectionDAG API.
  88    */
  89
  90   SDValue
  91   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  92                 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
  93     // The input chain to this libcall is the entry node of the function.
  94     // Legalizing the call will automatically add the previous call to the
  95     // dependence.
  96     SDValue InChain = DAG.getEntryNode();
  97
  98     TargetLowering::ArgListTy Args;
  99     TargetLowering::ArgListEntry Entry;
 100     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 101       EVT ArgVT = Op.getOperand(i).getValueType();
 102       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 103       Entry.Node = Op.getOperand(i);
 104       Entry.Ty = ArgTy;
 105       Entry.isSExt = isSigned;
 106       Entry.isZExt = !isSigned;
 107       Args.push_back(Entry);
 108     }
 109     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 110                                            TLI.getPointerTy());
 111
 112     // Splice the libcall in wherever FindInputOutputChains tells us to.
 113     const Type *RetTy =
 114                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 115     std::pair<SDValue, SDValue> CallInfo =
 116             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 117                             0, TLI.getLibcallCallingConv(LC), false,
 118                             /*isReturnValueUsed=*/true,
 119                             Callee, Args, DAG, Op.getDebugLoc());
 120
 121     return CallInfo.first;
 122   }
 123 }
 124
 125 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 126   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 127     SPUTM(TM) {
 128   // Fold away setcc operations if possible.
 129   setPow2DivIsCheap();
 130
 131   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 132   setUseUnderscoreSetJmp(true);
 133   setUseUnderscoreLongJmp(true);
 134
 135   // Set RTLIB libcall names as used by SPU:
 136   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 137
 138   // Set up the SPU's register classes:
 139   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 140   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 141   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 142   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 143   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 144   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 145   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 146
 147   // SPU has no sign or zero extended loads for i1, i8, i16:
 148   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 149   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 150   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 151
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 153   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 154
 155   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 156   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 159
 160   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 161
 162   // SPU constant load actions are custom lowered:
 163   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 164   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 165
 166   // SPU's loads and stores have to be custom lowered:
 167   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 168        ++sctype) {
 169     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 170
 171     setOperationAction(ISD::LOAD,   VT, Custom);
 172     setOperationAction(ISD::STORE,  VT, Custom);
 173     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 174     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 175     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 176
 177     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 178       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 179       setTruncStoreAction(VT, StoreVT, Expand);
 180     }
 181   }
 182
 183   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 184        ++sctype) {
 185     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 186
 187     setOperationAction(ISD::LOAD,   VT, Custom);
 188     setOperationAction(ISD::STORE,  VT, Custom);
 189
 190     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 191       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 192       setTruncStoreAction(VT, StoreVT, Expand);
 193     }
 194   }
 195
 196   // Expand the jumptable branches
 197   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 198   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 199
 200   // Custom lower SELECT_CC for most cases, but expand by default
 201   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 202   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 206
 207   // SPU has no intrinsics for these particular operations:
 208   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 209
 210   // SPU has no division/remainder instructions
 211   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 212   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 213   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 214   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 215   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 216   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 217   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 218   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 219   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 220   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 221   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 222   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 223   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 224   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 225   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 226   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 227   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 228   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 229   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 230   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 231   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 232   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 233   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 234   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 235   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 236   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 237   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 238   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 239   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 240   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 241
 242   // We don't support sin/cos/sqrt/fmod
 243   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 244   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 245   setOperationAction(ISD::FREM , MVT::f64, Expand);
 246   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 247   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 248   setOperationAction(ISD::FREM , MVT::f32, Expand);
 249
 250   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 251   // for f32!)
 252   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 253   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 254
 255   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 256   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 257
 258   // SPU can do rotate right and left, so legalize it... but customize for i8
 259   // because instructions don't exist.
 260
 261   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 262   //        .td files.
 263   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 264   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 265   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 266
 267   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 268   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 269   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 270
 271   // SPU has no native version of shift left/right for i8
 272   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 273   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 274   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 275
 276   // Make these operations legal and handle them during instruction selection:
 277   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 279   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 280
 281   // Custom lower i8, i32 and i64 multiplications
 282   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 283   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 284   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 285
 286   // Expand double-width multiplication
 287   // FIXME: It would probably be reasonable to support some of these operations
 288   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 289   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 290   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 291   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 292   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 293   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 294   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 295   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 298   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 299   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 300   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 301   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 302   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 303   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 304
 305   // Need to custom handle (some) common i8, i64 math ops
 306   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 307   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 308   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 309   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 310
 311   // SPU does not have BSWAP. It does have i32 support CTLZ.
 312   // CTPOP has to be custom lowered.
 313   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 314   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 315
 316   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 317   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 321
 322   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 323   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 327
 328   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 329   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 330   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 331   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 332   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 333
 334   // SPU has a version of select that implements (a&~c)|(b&c), just like
 335   // select ought to work:
 336   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 337   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 338   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 339   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 340
 341   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 342   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 343   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 346
 347   // Custom lower i128 -> i64 truncates
 348   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 349
 350   // Custom lower i32/i64 -> i128 sign extend
 351   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 352
 353   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 354   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 355   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 356   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 357   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 358   // to expand to a libcall, hence the custom lowering:
 359   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 360   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 361   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 362   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 363   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 364   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 365
 366   // FDIV on SPU requires custom lowering
 367   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 368
 369   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 373   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 376   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 377   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 378
 379   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 380   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 383
 384   // We cannot sextinreg(i1).  Expand to shifts.
 385   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 386
 387   // We want to legalize GlobalAddress and ConstantPool nodes into the
 388   // appropriate instructions to materialize the address.
 389   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 390        ++sctype) {
 391     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 392
 393     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 394     setOperationAction(ISD::ConstantPool,   VT, Custom);
 395     setOperationAction(ISD::JumpTable,      VT, Custom);
 396   }
 397
 398   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 399   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 400
 401   // Use the default implementation.
 402   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 403   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 404   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 405   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 406   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 407   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 408   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 409
 410   // Cell SPU has instructions for converting between i64 and fp.
 411   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 412   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 413
 414   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 415   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 416
 417   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 418   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 419
 420   // First set operation action for all vector types to expand. Then we
 421   // will selectively turn on ones that can be effectively codegen'd.
 422   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 423   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 428
 429   // "Odd size" vector classes that we're willing to support:
 430   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 431
 432   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 433        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 434     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 435
 436     // add/sub are legal for all supported vector VT's.
 437     setOperationAction(ISD::ADD,     VT, Legal);
 438     setOperationAction(ISD::SUB,     VT, Legal);
 439     // mul has to be custom lowered.
 440     setOperationAction(ISD::MUL,     VT, Legal);
 441
 442     setOperationAction(ISD::AND,     VT, Legal);
 443     setOperationAction(ISD::OR,      VT, Legal);
 444     setOperationAction(ISD::XOR,     VT, Legal);
 445     setOperationAction(ISD::LOAD,    VT, Legal);
 446     setOperationAction(ISD::SELECT,  VT, Legal);
 447     setOperationAction(ISD::STORE,   VT, Legal);
 448
 449     // These operations need to be expanded:
 450     setOperationAction(ISD::SDIV,    VT, Expand);
 451     setOperationAction(ISD::SREM,    VT, Expand);
 452     setOperationAction(ISD::UDIV,    VT, Expand);
 453     setOperationAction(ISD::UREM,    VT, Expand);
 454
 455     // Custom lower build_vector, constant pool spills, insert and
 456     // extract vector elements:
 457     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 458     setOperationAction(ISD::ConstantPool, VT, Custom);
 459     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 460     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 461     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 462     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 463   }
 464
 465   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 466   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 467   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 468   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 469
 470   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 471
 472   setShiftAmountType(MVT::i32);
 473   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 474
 475   setStackPointerRegisterToSaveRestore(SPU::R1);
 476
 477   // We have target-specific dag combine patterns for the following nodes:
 478   setTargetDAGCombine(ISD::ADD);
 479   setTargetDAGCombine(ISD::ZERO_EXTEND);
 480   setTargetDAGCombine(ISD::SIGN_EXTEND);
 481   setTargetDAGCombine(ISD::ANY_EXTEND);
 482
 483   computeRegisterProperties();
 484
 485   // Set pre-RA register scheduler default to BURR, which produces slightly
 486   // better code than the default (could also be TDRR, but TargetLowering.h
 487   // needs a mod to support that model):
 488   setSchedulingPreference(Sched::RegPressure);
 489 }
 490
 491 const char *
 492 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 493 {
 494   if (node_names.empty()) {
 495     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 496     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 497     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 498     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 499     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 500     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 501     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 502     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 503     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 504     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 505     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 506     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 507     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 508     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 509     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 510     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 511     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 512     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 513     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 514             "SPUISD::ROTBYTES_LEFT_BITS";
 515     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 516     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 517     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 518     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 519     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 520   }
 521
 522   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 523
 524   return ((i != node_names.end()) ? i->second : 0);
 525 }
 526
 527 /// getFunctionAlignment - Return the Log2 alignment of this function.
 528 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 529   return 3;
 530 }
 531
 532 //===----------------------------------------------------------------------===//
 533 // Return the Cell SPU's SETCC result type
 534 //===----------------------------------------------------------------------===//
 535
 536 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 537   // i16 and i32 are valid SETCC result types
 538   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 539     VT.getSimpleVT().SimpleTy :
 540     MVT::i32);
 541 }
 542
 543 //===----------------------------------------------------------------------===//
 544 // Calling convention code:
 545 //===----------------------------------------------------------------------===//
 546
 547 #include "SPUGenCallingConv.inc"
 548
 549 //===----------------------------------------------------------------------===//
 550 //  LowerOperation implementation
 551 //===----------------------------------------------------------------------===//
 552
 553 /// Custom lower loads for CellSPU
 554 /*!
 555  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 556  within a 16-byte block, we have to rotate to extract the requested element.
 557
 558  For extending loads, we also want to ensure that the following sequence is
 559  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 560
 561 \verbatim
 562 %1  v16i8,ch = load
 563 %2  v16i8,ch = rotate %1
 564 %3  v4f8, ch = bitconvert %2
 565 %4  f32      = vec2perfslot %3
 566 %5  f64      = fp_extend %4
 567 \endverbatim
 568 */
 569 static SDValue
 570 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 571   LoadSDNode *LN = cast<LoadSDNode>(Op);
 572   SDValue the_chain = LN->getChain();
 573   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 574   EVT InVT = LN->getMemoryVT();
 575   EVT OutVT = Op.getValueType();
 576   ISD::LoadExtType ExtType = LN->getExtensionType();
 577   unsigned alignment = LN->getAlignment();
 578   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 579   DebugLoc dl = Op.getDebugLoc();
 580
 581   switch (LN->getAddressingMode()) {
 582   case ISD::UNINDEXED: {
 583     SDValue result;
 584     SDValue basePtr = LN->getBasePtr();
 585     SDValue rotate;
 586
 587     if (alignment == 16) {
 588       ConstantSDNode *CN;
 589
 590       // Special cases for a known aligned load to simplify the base pointer
 591       // and the rotation amount:
 592       if (basePtr.getOpcode() == ISD::ADD
 593           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 594         // Known offset into basePtr
 595         int64_t offset = CN->getSExtValue();
 596         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 597
 598         if (rotamt < 0)
 599           rotamt += 16;
 600
 601         rotate = DAG.getConstant(rotamt, MVT::i16);
 602
 603         // Simplify the base pointer for this case:
 604         basePtr = basePtr.getOperand(0);
 605         if ((offset & ~0xf) > 0) {
 606           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 607                                 basePtr,
 608                                 DAG.getConstant((offset & ~0xf), PtrVT));
 609         }
 610       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 611                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 612                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 613                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 614         // Plain aligned a-form address: rotate into preferred slot
 615         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 616         int64_t rotamt = -vtm->prefslot_byte;
 617         if (rotamt < 0)
 618           rotamt += 16;
 619         rotate = DAG.getConstant(rotamt, MVT::i16);
 620       } else {
 621         // Offset the rotate amount by the basePtr and the preferred slot
 622         // byte offset
 623         int64_t rotamt = -vtm->prefslot_byte;
 624         if (rotamt < 0)
 625           rotamt += 16;
 626         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 627                              basePtr,
 628                              DAG.getConstant(rotamt, PtrVT));
 629       }
 630     } else {
 631       // Unaligned load: must be more pessimistic about addressing modes:
 632       if (basePtr.getOpcode() == ISD::ADD) {
 633         MachineFunction &MF = DAG.getMachineFunction();
 634         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 635         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 636         SDValue Flag;
 637
 638         SDValue Op0 = basePtr.getOperand(0);
 639         SDValue Op1 = basePtr.getOperand(1);
 640
 641         if (isa<ConstantSDNode>(Op1)) {
 642           // Convert the (add <ptr>, <const>) to an indirect address contained
 643           // in a register. Note that this is done because we need to avoid
 644           // creating a 0(reg) d-form address due to the SPU's block loads.
 645           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 646           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 647           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 648         } else {
 649           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 650           // will likely be lowered as a reg(reg) x-form address.
 651           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 652         }
 653       } else {
 654         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 655                               basePtr,
 656                               DAG.getConstant(0, PtrVT));
 657       }
 658
 659       // Offset the rotate amount by the basePtr and the preferred slot
 660       // byte offset
 661       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 662                            basePtr,
 663                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 664     }
 665
 666     // Re-emit as a v16i8 vector load
 667     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 668                          LN->getSrcValue(), LN->getSrcValueOffset(),
 669                          LN->isVolatile(), LN->isNonTemporal(), 16);
 670
 671     // Update the chain
 672     the_chain = result.getValue(1);
 673
 674     // Rotate into the preferred slot:
 675     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 676                          result.getValue(0), rotate);
 677
 678     // Convert the loaded v16i8 vector to the appropriate vector type
 679     // specified by the operand:
 680     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 681                                  InVT, (128 / InVT.getSizeInBits()));
 682     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 683                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 684
 685     // Handle extending loads by extending the scalar result:
 686     if (ExtType == ISD::SEXTLOAD) {
 687       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 688     } else if (ExtType == ISD::ZEXTLOAD) {
 689       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 690     } else if (ExtType == ISD::EXTLOAD) {
 691       unsigned NewOpc = ISD::ANY_EXTEND;
 692
 693       if (OutVT.isFloatingPoint())
 694         NewOpc = ISD::FP_EXTEND;
 695
 696       result = DAG.getNode(NewOpc, dl, OutVT, result);
 697     }
 698
 699     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 700     SDValue retops[2] = {
 701       result,
 702       the_chain
 703     };
 704
 705     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 706                          retops, sizeof(retops) / sizeof(retops[0]));
 707     return result;
 708   }
 709   case ISD::PRE_INC:
 710   case ISD::PRE_DEC:
 711   case ISD::POST_INC:
 712   case ISD::POST_DEC:
 713   case ISD::LAST_INDEXED_MODE:
 714     {
 715       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 716                          "than UNINDEXED\n" +
 717                          Twine((unsigned)LN->getAddressingMode()));
 718       /*NOTREACHED*/
 719     }
 720   }
 721
 722   return SDValue();
 723 }
 724
 725 /// Custom lower stores for CellSPU
 726 /*!
 727  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 728  within a 16-byte block, we have to generate a shuffle to insert the
 729  requested element into its place, then store the resulting block.
 730  */
 731 static SDValue
 732 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 733   StoreSDNode *SN = cast<StoreSDNode>(Op);
 734   SDValue Value = SN->getValue();
 735   EVT VT = Value.getValueType();
 736   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 737   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 738   DebugLoc dl = Op.getDebugLoc();
 739   unsigned alignment = SN->getAlignment();
 740
 741   switch (SN->getAddressingMode()) {
 742   case ISD::UNINDEXED: {
 743     // The vector type we really want to load from the 16-byte chunk.
 744     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 745                                  VT, (128 / VT.getSizeInBits()));
 746
 747     SDValue alignLoadVec;
 748     SDValue basePtr = SN->getBasePtr();
 749     SDValue the_chain = SN->getChain();
 750     SDValue insertEltOffs;
 751
 752     if (alignment == 16) {
 753       ConstantSDNode *CN;
 754
 755       // Special cases for a known aligned load to simplify the base pointer
 756       // and insertion byte:
 757       if (basePtr.getOpcode() == ISD::ADD
 758           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 759         // Known offset into basePtr
 760         int64_t offset = CN->getSExtValue();
 761
 762         // Simplify the base pointer for this case:
 763         basePtr = basePtr.getOperand(0);
 764         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 765                                     basePtr,
 766                                     DAG.getConstant((offset & 0xf), PtrVT));
 767
 768         if ((offset & ~0xf) > 0) {
 769           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 770                                 basePtr,
 771                                 DAG.getConstant((offset & ~0xf), PtrVT));
 772         }
 773       } else {
 774         // Otherwise, assume it's at byte 0 of basePtr
 775         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 776                                     basePtr,
 777                                     DAG.getConstant(0, PtrVT));
 778       }
 779     } else {
 780       // Unaligned load: must be more pessimistic about addressing modes:
 781       if (basePtr.getOpcode() == ISD::ADD) {
 782         MachineFunction &MF = DAG.getMachineFunction();
 783         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 784         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 785         SDValue Flag;
 786
 787         SDValue Op0 = basePtr.getOperand(0);
 788         SDValue Op1 = basePtr.getOperand(1);
 789
 790         if (isa<ConstantSDNode>(Op1)) {
 791           // Convert the (add <ptr>, <const>) to an indirect address contained
 792           // in a register. Note that this is done because we need to avoid
 793           // creating a 0(reg) d-form address due to the SPU's block loads.
 794           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 795           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 796           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 797         } else {
 798           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 799           // will likely be lowered as a reg(reg) x-form address.
 800           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 801         }
 802       } else {
 803         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 804                               basePtr,
 805                               DAG.getConstant(0, PtrVT));
 806       }
 807
 808       // Insertion point is solely determined by basePtr's contents
 809       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 810                                   basePtr,
 811                                   DAG.getConstant(0, PtrVT));
 812     }
 813
 814     // Re-emit as a v16i8 vector load
 815     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 816                                SN->getSrcValue(), SN->getSrcValueOffset(),
 817                                SN->isVolatile(), SN->isNonTemporal(), 16);
 818
 819     // Update the chain
 820     the_chain = alignLoadVec.getValue(1);
 821
 822     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 823     SDValue theValue = SN->getValue();
 824     SDValue result;
 825
 826     if (StVT != VT
 827         && (theValue.getOpcode() == ISD::AssertZext
 828             || theValue.getOpcode() == ISD::AssertSext)) {
 829       // Drill down and get the value for zero- and sign-extended
 830       // quantities
 831       theValue = theValue.getOperand(0);
 832     }
 833
 834     // If the base pointer is already a D-form address, then just create
 835     // a new D-form address with a slot offset and the orignal base pointer.
 836     // Otherwise generate a D-form address with the slot offset relative
 837     // to the stack pointer, which is always aligned.
 838 #if !defined(NDEBUG)
 839       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 840         errs() << "CellSPU LowerSTORE: basePtr = ";
 841         basePtr.getNode()->dump(&DAG);
 842         errs() << "\n";
 843       }
 844 #endif
 845
 846     SDValue insertEltOp =
 847             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 848     SDValue vectorizeOp =
 849             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 850
 851     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 852                          vectorizeOp, alignLoadVec,
 853                          DAG.getNode(ISD::BIT_CONVERT, dl,
 854                                      MVT::v4i32, insertEltOp));
 855
 856     result = DAG.getStore(the_chain, dl, result, basePtr,
 857                           LN->getSrcValue(), LN->getSrcValueOffset(),
 858                           LN->isVolatile(), LN->isNonTemporal(),
 859                           LN->getAlignment());
 860
 861 #if 0 && !defined(NDEBUG)
 862     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 863       const SDValue &currentRoot = DAG.getRoot();
 864
 865       DAG.setRoot(result);
 866       errs() << "------- CellSPU:LowerStore result:\n";
 867       DAG.dump();
 868       errs() << "-------\n";
 869       DAG.setRoot(currentRoot);
 870     }
 871 #endif
 872
 873     return result;
 874     /*UNREACHED*/
 875   }
 876   case ISD::PRE_INC:
 877   case ISD::PRE_DEC:
 878   case ISD::POST_INC:
 879   case ISD::POST_DEC:
 880   case ISD::LAST_INDEXED_MODE:
 881     {
 882       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 883                          "than UNINDEXED\n" +
 884                          Twine((unsigned)SN->getAddressingMode()));
 885       /*NOTREACHED*/
 886     }
 887   }
 888
 889   return SDValue();
 890 }
 891
 892 //! Generate the address of a constant pool entry.
 893 static SDValue
 894 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 895   EVT PtrVT = Op.getValueType();
 896   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 897   const Constant *C = CP->getConstVal();
 898   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 899   SDValue Zero = DAG.getConstant(0, PtrVT);
 900   const TargetMachine &TM = DAG.getTarget();
 901   // FIXME there is no actual debug info here
 902   DebugLoc dl = Op.getDebugLoc();
 903
 904   if (TM.getRelocationModel() == Reloc::Static) {
 905     if (!ST->usingLargeMem()) {
 906       // Just return the SDValue with the constant pool address in it.
 907       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 908     } else {
 909       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 910       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 911       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 912     }
 913   }
 914
 915   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 916                    " not supported.");
 917   return SDValue();
 918 }
 919
 920 //! Alternate entry point for generating the address of a constant pool entry
 921 SDValue
 922 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 923   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 924 }
 925
 926 static SDValue
 927 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 928   EVT PtrVT = Op.getValueType();
 929   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 930   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 931   SDValue Zero = DAG.getConstant(0, PtrVT);
 932   const TargetMachine &TM = DAG.getTarget();
 933   // FIXME there is no actual debug info here
 934   DebugLoc dl = Op.getDebugLoc();
 935
 936   if (TM.getRelocationModel() == Reloc::Static) {
 937     if (!ST->usingLargeMem()) {
 938       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 939     } else {
 940       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 941       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 942       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 943     }
 944   }
 945
 946   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 947                    " not supported.");
 948   return SDValue();
 949 }
 950
 951 static SDValue
 952 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 953   EVT PtrVT = Op.getValueType();
 954   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 955   const GlobalValue *GV = GSDN->getGlobal();
 956   SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
 957                                           PtrVT, GSDN->getOffset());
 958   const TargetMachine &TM = DAG.getTarget();
 959   SDValue Zero = DAG.getConstant(0, PtrVT);
 960   // FIXME there is no actual debug info here
 961   DebugLoc dl = Op.getDebugLoc();
 962
 963   if (TM.getRelocationModel() == Reloc::Static) {
 964     if (!ST->usingLargeMem()) {
 965       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 966     } else {
 967       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 968       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 969       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 970     }
 971   } else {
 972     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 973                       "not supported.");
 974     /*NOTREACHED*/
 975   }
 976
 977   return SDValue();
 978 }
 979
 980 //! Custom lower double precision floating point constants
 981 static SDValue
 982 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 983   EVT VT = Op.getValueType();
 984   // FIXME there is no actual debug info here
 985   DebugLoc dl = Op.getDebugLoc();
 986
 987   if (VT == MVT::f64) {
 988     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 989
 990     assert((FP != 0) &&
 991            "LowerConstantFP: Node is not ConstantFPSDNode");
 992
 993     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 994     SDValue T = DAG.getConstant(dbits, MVT::i64);
 995     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 996     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 997                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 998   }
 999
1000   return SDValue();
1001 }
1002
1003 SDValue
1004 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1005                                         CallingConv::ID CallConv, bool isVarArg,
1006                                         const SmallVectorImpl<ISD::InputArg>
1007                                           &Ins,
1008                                         DebugLoc dl, SelectionDAG &DAG,
1009                                         SmallVectorImpl<SDValue> &InVals)
1010                                           const {
1011
1012   MachineFunction &MF = DAG.getMachineFunction();
1013   MachineFrameInfo *MFI = MF.getFrameInfo();
1014   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1015   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1016
1017   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1018   unsigned ArgRegIdx = 0;
1019   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1020
1021   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1022
1023   SmallVector<CCValAssign, 16> ArgLocs;
1024   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1025                  *DAG.getContext());
1026   // FIXME: allow for other calling conventions
1027   CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1028
1029   // Add DAG nodes to load the arguments or copy them out of registers.
1030   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1031     EVT ObjectVT = Ins[ArgNo].VT;
1032     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1033     SDValue ArgVal;
1034     CCValAssign &VA = ArgLocs[ArgNo];
1035
1036     if (VA.isRegLoc()) {
1037       const TargetRegisterClass *ArgRegClass;
1038
1039       switch (ObjectVT.getSimpleVT().SimpleTy) {
1040       default:
1041         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1042                            Twine(ObjectVT.getEVTString()));
1043       case MVT::i8:
1044         ArgRegClass = &SPU::R8CRegClass;
1045         break;
1046       case MVT::i16:
1047         ArgRegClass = &SPU::R16CRegClass;
1048         break;
1049       case MVT::i32:
1050         ArgRegClass = &SPU::R32CRegClass;
1051         break;
1052       case MVT::i64:
1053         ArgRegClass = &SPU::R64CRegClass;
1054         break;
1055       case MVT::i128:
1056         ArgRegClass = &SPU::GPRCRegClass;
1057         break;
1058       case MVT::f32:
1059         ArgRegClass = &SPU::R32FPRegClass;
1060         break;
1061       case MVT::f64:
1062         ArgRegClass = &SPU::R64FPRegClass;
1063         break;
1064       case MVT::v2f64:
1065       case MVT::v4f32:
1066       case MVT::v2i64:
1067       case MVT::v4i32:
1068       case MVT::v8i16:
1069       case MVT::v16i8:
1070       case MVT::v2i32:
1071         ArgRegClass = &SPU::VECREGRegClass;
1072         break;
1073       }
1074
1075       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1076       RegInfo.addLiveIn(VA.getLocReg(), VReg);
1077       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1078       ++ArgRegIdx;
1079     } else {
1080       // We need to load the argument to a virtual register if we determined
1081       // above that we ran out of physical registers of the appropriate type
1082       // or we're forced to do vararg
1083       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1084       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1085       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1086       ArgOffset += StackSlotSize;
1087     }
1088
1089     InVals.push_back(ArgVal);
1090     // Update the chain
1091     Chain = ArgVal.getOperand(0);
1092   }
1093
1094   // vararg handling:
1095   if (isVarArg) {
1096     // FIXME: we should be able to query the argument registers from
1097     //        tablegen generated code.
1098     static const unsigned ArgRegs[] = {
1099       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1100       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1101       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1102       SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1103       SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1104       SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1105       SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1106       SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1107       SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1108       SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1109       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1110     };
1111     // size of ArgRegs array
1112     unsigned NumArgRegs = 77;
1113
1114     // We will spill (79-3)+1 registers to the stack
1115     SmallVector<SDValue, 79-3+1> MemOps;
1116
1117     // Create the frame slot
1118     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1119       FuncInfo->setVarArgsFrameIndex(
1120         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1121       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1122       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1123       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1124       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1125                                    false, false, 0);
1126       Chain = Store.getOperand(0);
1127       MemOps.push_back(Store);
1128
1129       // Increment address by stack slot size for the next stored argument
1130       ArgOffset += StackSlotSize;
1131     }
1132     if (!MemOps.empty())
1133       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1134                           &MemOps[0], MemOps.size());
1135   }
1136
1137   return Chain;
1138 }
1139
1140 /// isLSAAddress - Return the immediate to use if the specified
1141 /// value is representable as a LSA address.
1142 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1143   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1144   if (!C) return 0;
1145
1146   int Addr = C->getZExtValue();
1147   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1148       (Addr << 14 >> 14) != Addr)
1149     return 0;  // Top 14 bits have to be sext of immediate.
1150
1151   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1152 }
1153
1154 SDValue
1155 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1156                              CallingConv::ID CallConv, bool isVarArg,
1157                              bool &isTailCall,
1158                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1159                              const SmallVectorImpl<SDValue> &OutVals,
1160                              const SmallVectorImpl<ISD::InputArg> &Ins,
1161                              DebugLoc dl, SelectionDAG &DAG,
1162                              SmallVectorImpl<SDValue> &InVals) const {
1163   // CellSPU target does not yet support tail call optimization.
1164   isTailCall = false;
1165
1166   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1167   unsigned NumOps     = Outs.size();
1168   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1169
1170   SmallVector<CCValAssign, 16> ArgLocs;
1171   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1172                  *DAG.getContext());
1173   // FIXME: allow for other calling conventions
1174   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1175
1176   const unsigned NumArgRegs = ArgLocs.size();
1177
1178
1179   // Handy pointer type
1180   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1181
1182   // Set up a copy of the stack pointer for use loading and storing any
1183   // arguments that may not fit in the registers available for argument
1184   // passing.
1185   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1186
1187   // Figure out which arguments are going to go in registers, and which in
1188   // memory.
1189   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1190   unsigned ArgRegIdx = 0;
1191
1192   // Keep track of registers passing arguments
1193   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1194   // And the arguments passed on the stack
1195   SmallVector<SDValue, 8> MemOpChains;
1196
1197   for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1198     SDValue Arg = OutVals[ArgRegIdx];
1199     CCValAssign &VA = ArgLocs[ArgRegIdx];
1200
1201     // PtrOff will be used to store the current argument to the stack if a
1202     // register cannot be found for it.
1203     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1204     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1205
1206     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1207     default: llvm_unreachable("Unexpected ValueType for argument!");
1208     case MVT::i8:
1209     case MVT::i16:
1210     case MVT::i32:
1211     case MVT::i64:
1212     case MVT::i128:
1213     case MVT::f32:
1214     case MVT::f64:
1215     case MVT::v2i64:
1216     case MVT::v2f64:
1217     case MVT::v4f32:
1218     case MVT::v4i32:
1219     case MVT::v8i16:
1220     case MVT::v16i8:
1221       if (ArgRegIdx != NumArgRegs) {
1222         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1223       } else {
1224         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1225                                            false, false, 0));
1226         ArgOffset += StackSlotSize;
1227       }
1228       break;
1229     }
1230   }
1231
1232   // Accumulate how many bytes are to be pushed on the stack, including the
1233   // linkage area, and parameter passing area.  According to the SPU ABI,
1234   // we minimally need space for [LR] and [SP].
1235   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1236
1237   // Insert a call sequence start
1238   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1239                                                             true));
1240
1241   if (!MemOpChains.empty()) {
1242     // Adjust the stack pointer for the stack arguments.
1243     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1244                         &MemOpChains[0], MemOpChains.size());
1245   }
1246
1247   // Build a sequence of copy-to-reg nodes chained together with token chain
1248   // and flag operands which copy the outgoing args into the appropriate regs.
1249   SDValue InFlag;
1250   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1251     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1252                              RegsToPass[i].second, InFlag);
1253     InFlag = Chain.getValue(1);
1254   }
1255
1256   SmallVector<SDValue, 8> Ops;
1257   unsigned CallOpc = SPUISD::CALL;
1258
1259   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1260   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1261   // node so that legalize doesn't hack it.
1262   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1263     const GlobalValue *GV = G->getGlobal();
1264     EVT CalleeVT = Callee.getValueType();
1265     SDValue Zero = DAG.getConstant(0, PtrVT);
1266     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1267
1268     if (!ST->usingLargeMem()) {
1269       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1270       // style calls, otherwise, external symbols are BRASL calls. This assumes
1271       // that declared/defined symbols are in the same compilation unit and can
1272       // be reached through PC-relative jumps.
1273       //
1274       // NOTE:
1275       // This may be an unsafe assumption for JIT and really large compilation
1276       // units.
1277       if (GV->isDeclaration()) {
1278         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1279       } else {
1280         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1281       }
1282     } else {
1283       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1284       // address pairs:
1285       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1286     }
1287   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1288     EVT CalleeVT = Callee.getValueType();
1289     SDValue Zero = DAG.getConstant(0, PtrVT);
1290     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1291         Callee.getValueType());
1292
1293     if (!ST->usingLargeMem()) {
1294       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1295     } else {
1296       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1297     }
1298   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1299     // If this is an absolute destination address that appears to be a legal
1300     // local store address, use the munged value.
1301     Callee = SDValue(Dest, 0);
1302   }
1303
1304   Ops.push_back(Chain);
1305   Ops.push_back(Callee);
1306
1307   // Add argument registers to the end of the list so that they are known live
1308   // into the call.
1309   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1310     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1311                                   RegsToPass[i].second.getValueType()));
1312
1313   if (InFlag.getNode())
1314     Ops.push_back(InFlag);
1315   // Returns a chain and a flag for retval copy to use.
1316   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1317                       &Ops[0], Ops.size());
1318   InFlag = Chain.getValue(1);
1319
1320   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1321                              DAG.getIntPtrConstant(0, true), InFlag);
1322   if (!Ins.empty())
1323     InFlag = Chain.getValue(1);
1324
1325   // If the function returns void, just return the chain.
1326   if (Ins.empty())
1327     return Chain;
1328
1329   // If the call has results, copy the values out of the ret val registers.
1330   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1331   default: llvm_unreachable("Unexpected ret value!");
1332   case MVT::Other: break;
1333   case MVT::i32:
1334     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1335       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1336                                  MVT::i32, InFlag).getValue(1);
1337       InVals.push_back(Chain.getValue(0));
1338       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1339                                  Chain.getValue(2)).getValue(1);
1340       InVals.push_back(Chain.getValue(0));
1341     } else {
1342       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1343                                  InFlag).getValue(1);
1344       InVals.push_back(Chain.getValue(0));
1345     }
1346     break;
1347   case MVT::i8:
1348   case MVT::i16:
1349   case MVT::i64:
1350   case MVT::i128:
1351   case MVT::f32:
1352   case MVT::f64:
1353   case MVT::v2f64:
1354   case MVT::v2i64:
1355   case MVT::v4f32:
1356   case MVT::v4i32:
1357   case MVT::v8i16:
1358   case MVT::v16i8:
1359     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1360                                    InFlag).getValue(1);
1361     InVals.push_back(Chain.getValue(0));
1362     break;
1363   }
1364
1365   return Chain;
1366 }
1367
1368 SDValue
1369 SPUTargetLowering::LowerReturn(SDValue Chain,
1370                                CallingConv::ID CallConv, bool isVarArg,
1371                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1372                                const SmallVectorImpl<SDValue> &OutVals,
1373                                DebugLoc dl, SelectionDAG &DAG) const {
1374
1375   SmallVector<CCValAssign, 16> RVLocs;
1376   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1377                  RVLocs, *DAG.getContext());
1378   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1379
1380   // If this is the first return lowered for this function, add the regs to the
1381   // liveout set for the function.
1382   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1383     for (unsigned i = 0; i != RVLocs.size(); ++i)
1384       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1385   }
1386
1387   SDValue Flag;
1388
1389   // Copy the result values into the output registers.
1390   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1391     CCValAssign &VA = RVLocs[i];
1392     assert(VA.isRegLoc() && "Can only return in registers!");
1393     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1394                              OutVals[i], Flag);
1395     Flag = Chain.getValue(1);
1396   }
1397
1398   if (Flag.getNode())
1399     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1400   else
1401     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1402 }
1403
1404
1405 //===----------------------------------------------------------------------===//
1406 // Vector related lowering:
1407 //===----------------------------------------------------------------------===//
1408
1409 static ConstantSDNode *
1410 getVecImm(SDNode *N) {
1411   SDValue OpVal(0, 0);
1412
1413   // Check to see if this buildvec has a single non-undef value in its elements.
1414   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1415     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1416     if (OpVal.getNode() == 0)
1417       OpVal = N->getOperand(i);
1418     else if (OpVal != N->getOperand(i))
1419       return 0;
1420   }
1421
1422   if (OpVal.getNode() != 0) {
1423     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1424       return CN;
1425     }
1426   }
1427
1428   return 0;
1429 }
1430
1431 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1432 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1433 /// constant
1434 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1435                               EVT ValueType) {
1436   if (ConstantSDNode *CN = getVecImm(N)) {
1437     uint64_t Value = CN->getZExtValue();
1438     if (ValueType == MVT::i64) {
1439       uint64_t UValue = CN->getZExtValue();
1440       uint32_t upper = uint32_t(UValue >> 32);
1441       uint32_t lower = uint32_t(UValue);
1442       if (upper != lower)
1443         return SDValue();
1444       Value = Value >> 32;
1445     }
1446     if (Value <= 0x3ffff)
1447       return DAG.getTargetConstant(Value, ValueType);
1448   }
1449
1450   return SDValue();
1451 }
1452
1453 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1454 /// and the value fits into a signed 16-bit constant, and if so, return the
1455 /// constant
1456 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1457                               EVT ValueType) {
1458   if (ConstantSDNode *CN = getVecImm(N)) {
1459     int64_t Value = CN->getSExtValue();
1460     if (ValueType == MVT::i64) {
1461       uint64_t UValue = CN->getZExtValue();
1462       uint32_t upper = uint32_t(UValue >> 32);
1463       uint32_t lower = uint32_t(UValue);
1464       if (upper != lower)
1465         return SDValue();
1466       Value = Value >> 32;
1467     }
1468     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1469       return DAG.getTargetConstant(Value, ValueType);
1470     }
1471   }
1472
1473   return SDValue();
1474 }
1475
1476 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1477 /// and the value fits into a signed 10-bit constant, and if so, return the
1478 /// constant
1479 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1480                               EVT ValueType) {
1481   if (ConstantSDNode *CN = getVecImm(N)) {
1482     int64_t Value = CN->getSExtValue();
1483     if (ValueType == MVT::i64) {
1484       uint64_t UValue = CN->getZExtValue();
1485       uint32_t upper = uint32_t(UValue >> 32);
1486       uint32_t lower = uint32_t(UValue);
1487       if (upper != lower)
1488         return SDValue();
1489       Value = Value >> 32;
1490     }
1491     if (isInt<10>(Value))
1492       return DAG.getTargetConstant(Value, ValueType);
1493   }
1494
1495   return SDValue();
1496 }
1497
1498 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1499 /// and the value fits into a signed 8-bit constant, and if so, return the
1500 /// constant.
1501 ///
1502 /// @note: The incoming vector is v16i8 because that's the only way we can load
1503 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1504 /// same value.
1505 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1506                              EVT ValueType) {
1507   if (ConstantSDNode *CN = getVecImm(N)) {
1508     int Value = (int) CN->getZExtValue();
1509     if (ValueType == MVT::i16
1510         && Value <= 0xffff                 /* truncated from uint64_t */
1511         && ((short) Value >> 8) == ((short) Value & 0xff))
1512       return DAG.getTargetConstant(Value & 0xff, ValueType);
1513     else if (ValueType == MVT::i8
1514              && (Value & 0xff) == Value)
1515       return DAG.getTargetConstant(Value, ValueType);
1516   }
1517
1518   return SDValue();
1519 }
1520
1521 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1522 /// and the value fits into a signed 16-bit constant, and if so, return the
1523 /// constant
1524 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1525                                EVT ValueType) {
1526   if (ConstantSDNode *CN = getVecImm(N)) {
1527     uint64_t Value = CN->getZExtValue();
1528     if ((ValueType == MVT::i32
1529           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1530         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1531       return DAG.getTargetConstant(Value >> 16, ValueType);
1532   }
1533
1534   return SDValue();
1535 }
1536
1537 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1538 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1539   if (ConstantSDNode *CN = getVecImm(N)) {
1540     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1541   }
1542
1543   return SDValue();
1544 }
1545
1546 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1547 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1548   if (ConstantSDNode *CN = getVecImm(N)) {
1549     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1550   }
1551
1552   return SDValue();
1553 }
1554
1555 //! Lower a BUILD_VECTOR instruction creatively:
1556 static SDValue
1557 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1558   EVT VT = Op.getValueType();
1559   EVT EltVT = VT.getVectorElementType();
1560   DebugLoc dl = Op.getDebugLoc();
1561   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1562   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1563   unsigned minSplatBits = EltVT.getSizeInBits();
1564
1565   if (minSplatBits < 16)
1566     minSplatBits = 16;
1567
1568   APInt APSplatBits, APSplatUndef;
1569   unsigned SplatBitSize;
1570   bool HasAnyUndefs;
1571
1572   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1573                             HasAnyUndefs, minSplatBits)
1574       || minSplatBits < SplatBitSize)
1575     return SDValue();   // Wasn't a constant vector or splat exceeded min
1576
1577   uint64_t SplatBits = APSplatBits.getZExtValue();
1578
1579   switch (VT.getSimpleVT().SimpleTy) {
1580   default:
1581     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1582                        Twine(VT.getEVTString()));
1583     /*NOTREACHED*/
1584   case MVT::v4f32: {
1585     uint32_t Value32 = uint32_t(SplatBits);
1586     assert(SplatBitSize == 32
1587            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1588     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1589     SDValue T = DAG.getConstant(Value32, MVT::i32);
1590     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1591                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1592     break;
1593   }
1594   case MVT::v2f64: {
1595     uint64_t f64val = uint64_t(SplatBits);
1596     assert(SplatBitSize == 64
1597            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1598     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599     SDValue T = DAG.getConstant(f64val, MVT::i64);
1600     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1601                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1602     break;
1603   }
1604   case MVT::v16i8: {
1605    // 8-bit constants have to be expanded to 16-bits
1606    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1607    SmallVector<SDValue, 8> Ops;
1608
1609    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1610    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1611                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1612   }
1613   case MVT::v8i16: {
1614     unsigned short Value16 = SplatBits;
1615     SDValue T = DAG.getConstant(Value16, EltVT);
1616     SmallVector<SDValue, 8> Ops;
1617
1618     Ops.assign(8, T);
1619     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1620   }
1621   case MVT::v4i32: {
1622     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1623     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1624   }
1625   case MVT::v2i32: {
1626     return SDValue();
1627   }
1628   case MVT::v2i64: {
1629     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1630   }
1631   }
1632
1633   return SDValue();
1634 }
1635
1636 /*!
1637  */
1638 SDValue
1639 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1640                      DebugLoc dl) {
1641   uint32_t upper = uint32_t(SplatVal >> 32);
1642   uint32_t lower = uint32_t(SplatVal);
1643
1644   if (upper == lower) {
1645     // Magic constant that can be matched by IL, ILA, et. al.
1646     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1647     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1648                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1649                                    Val, Val, Val, Val));
1650   } else {
1651     bool upper_special, lower_special;
1652
1653     // NOTE: This code creates common-case shuffle masks that can be easily
1654     // detected as common expressions. It is not attempting to create highly
1655     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1656
1657     // Detect if the upper or lower half is a special shuffle mask pattern:
1658     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1659     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1660
1661     // Both upper and lower are special, lower to a constant pool load:
1662     if (lower_special && upper_special) {
1663       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1664       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1665                          SplatValCN, SplatValCN);
1666     }
1667
1668     SDValue LO32;
1669     SDValue HI32;
1670     SmallVector<SDValue, 16> ShufBytes;
1671     SDValue Result;
1672
1673     // Create lower vector if not a special pattern
1674     if (!lower_special) {
1675       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1676       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1677                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1678                                      LO32C, LO32C, LO32C, LO32C));
1679     }
1680
1681     // Create upper vector if not a special pattern
1682     if (!upper_special) {
1683       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1684       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686                                      HI32C, HI32C, HI32C, HI32C));
1687     }
1688
1689     // If either upper or lower are special, then the two input operands are
1690     // the same (basically, one of them is a "don't care")
1691     if (lower_special)
1692       LO32 = HI32;
1693     if (upper_special)
1694       HI32 = LO32;
1695
1696     for (int i = 0; i < 4; ++i) {
1697       uint64_t val = 0;
1698       for (int j = 0; j < 4; ++j) {
1699         SDValue V;
1700         bool process_upper, process_lower;
1701         val <<= 8;
1702         process_upper = (upper_special && (i & 1) == 0);
1703         process_lower = (lower_special && (i & 1) == 1);
1704
1705         if (process_upper || process_lower) {
1706           if ((process_upper && upper == 0)
1707                   || (process_lower && lower == 0))
1708             val |= 0x80;
1709           else if ((process_upper && upper == 0xffffffff)
1710                   || (process_lower && lower == 0xffffffff))
1711             val |= 0xc0;
1712           else if ((process_upper && upper == 0x80000000)
1713                   || (process_lower && lower == 0x80000000))
1714             val |= (j == 0 ? 0xe0 : 0x80);
1715         } else
1716           val |= i * 4 + j + ((i & 1) * 16);
1717       }
1718
1719       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1720     }
1721
1722     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1723                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1724                                    &ShufBytes[0], ShufBytes.size()));
1725   }
1726 }
1727
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1735 /// \note
1736 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1739 /// \note
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1742   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1743   SDValue V1 = Op.getOperand(0);
1744   SDValue V2 = Op.getOperand(1);
1745   DebugLoc dl = Op.getDebugLoc();
1746
1747   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748
1749   // If we have a single element being moved from V1 to V2, this can be handled
1750   // using the C*[DX] compute mask instructions, but the vector elements have
1751   // to be monotonically increasing with one exception element.
1752   EVT VecVT = V1.getValueType();
1753   EVT EltVT = VecVT.getVectorElementType();
1754   unsigned EltsFromV2 = 0;
1755   unsigned V2Elt = 0;
1756   unsigned V2EltIdx0 = 0;
1757   unsigned CurrElt = 0;
1758   unsigned MaxElts = VecVT.getVectorNumElements();
1759   unsigned PrevElt = 0;
1760   unsigned V0Elt = 0;
1761   bool monotonic = true;
1762   bool rotate = true;
1763   EVT maskVT;             // which of the c?d instructions to use
1764
1765   if (EltVT == MVT::i8) {
1766     V2EltIdx0 = 16;
1767     maskVT = MVT::v16i8;
1768   } else if (EltVT == MVT::i16) {
1769     V2EltIdx0 = 8;
1770     maskVT = MVT::v8i16;
1771   } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) {
1772     V2EltIdx0 = 2;
1773     maskVT = MVT::v4i32;
1774   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1775     V2EltIdx0 = 4;
1776     maskVT = MVT::v4i32;
1777   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1778     V2EltIdx0 = 2;
1779     maskVT = MVT::v2i64;
1780   } else
1781     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1782
1783   for (unsigned i = 0; i != MaxElts; ++i) {
1784     if (SVN->getMaskElt(i) < 0)
1785       continue;
1786
1787     unsigned SrcElt = SVN->getMaskElt(i);
1788
1789     if (monotonic) {
1790       if (SrcElt >= V2EltIdx0) {
1791         if (1 >= (++EltsFromV2)) {
1792           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1793         }
1794       } else if (CurrElt != SrcElt) {
1795         monotonic = false;
1796       }
1797
1798       ++CurrElt;
1799     }
1800
1801     if (rotate) {
1802       if (PrevElt > 0 && SrcElt < MaxElts) {
1803         if ((PrevElt == SrcElt - 1)
1804             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805           PrevElt = SrcElt;
1806           if (SrcElt == 0)
1807             V0Elt = i;
1808         } else {
1809           rotate = false;
1810         }
1811       } else if (i == 0) {
1812         // First time through, need to keep track of previous element
1813         PrevElt = SrcElt;
1814       } else {
1815         // This isn't a rotation, takes elements from vector 2
1816         rotate = false;
1817       }
1818     }
1819   }
1820
1821   if (EltsFromV2 == 1 && monotonic) {
1822     // Compute mask and shuffle
1823     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1824
1825     // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1826     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1827     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1828                                 DAG.getRegister(SPU::R1, PtrVT),
1829                                 DAG.getConstant(V2Elt, MVT::i32));
1830     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1831                                      maskVT, Pointer);
1832
1833     // Use shuffle mask in SHUFB synthetic instruction:
1834     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1835                        ShufMaskOp);
1836   } else if (rotate) {
1837     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1838
1839     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1840                        V1, DAG.getConstant(rotamt, MVT::i16));
1841   } else {
1842    // Convert the SHUFFLE_VECTOR mask's input element units to the
1843    // actual bytes.
1844     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1845
1846     SmallVector<SDValue, 16> ResultMask;
1847     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1848       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1849
1850       for (unsigned j = 0; j < BytesPerElement; ++j)
1851         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1852     }
1853     // For half vectors padd the mask with zeros for the second half.
1854     // This is needed because mask is assumed to be full vector elsewhere in
1855     // the SPU backend.
1856     if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32)
1857     for( unsigned i = 0; i < 2; ++i )
1858     {
1859       for (unsigned j = 0; j < BytesPerElement; ++j)
1860         ResultMask.push_back(DAG.getConstant(0,MVT::i8));
1861     }
1862
1863     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1864                                     &ResultMask[0], ResultMask.size());
1865     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1866   }
1867 }
1868
1869 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1870   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1871   DebugLoc dl = Op.getDebugLoc();
1872
1873   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1874     // For a constant, build the appropriate constant vector, which will
1875     // eventually simplify to a vector register load.
1876
1877     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1878     SmallVector<SDValue, 16> ConstVecValues;
1879     EVT VT;
1880     size_t n_copies;
1881
1882     // Create a constant vector:
1883     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1884     default: llvm_unreachable("Unexpected constant value type in "
1885                               "LowerSCALAR_TO_VECTOR");
1886     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1887     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1888     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1889     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1890     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1891     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1892     case MVT::v2i32: n_copies = 2; VT = MVT::i32; break;
1893     }
1894
1895     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1896     for (size_t j = 0; j < n_copies; ++j)
1897       ConstVecValues.push_back(CValue);
1898
1899     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1900                        &ConstVecValues[0], ConstVecValues.size());
1901   } else {
1902     // Otherwise, copy the value from one register to another:
1903     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1904     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1905     case MVT::i8:
1906     case MVT::i16:
1907     case MVT::i32:
1908     case MVT::i64:
1909     case MVT::f32:
1910     case MVT::f64:
1911       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1912     }
1913   }
1914
1915   return SDValue();
1916 }
1917
1918 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1919   EVT VT = Op.getValueType();
1920   SDValue N = Op.getOperand(0);
1921   SDValue Elt = Op.getOperand(1);
1922   DebugLoc dl = Op.getDebugLoc();
1923   SDValue retval;
1924
1925   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1926     // Constant argument:
1927     int EltNo = (int) C->getZExtValue();
1928
1929     // sanity checks:
1930     if (VT == MVT::i8 && EltNo >= 16)
1931       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1932     else if (VT == MVT::i16 && EltNo >= 8)
1933       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1934     else if (VT == MVT::i32 && EltNo >= 4)
1935       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1936     else if (VT == MVT::i64 && EltNo >= 2)
1937       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1938
1939     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1940       // i32 and i64: Element 0 is the preferred slot
1941       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1942     }
1943
1944     // Need to generate shuffle mask and extract:
1945     int prefslot_begin = -1, prefslot_end = -1;
1946     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1947
1948     switch (VT.getSimpleVT().SimpleTy) {
1949     default:
1950       assert(false && "Invalid value type!");
1951     case MVT::i8: {
1952       prefslot_begin = prefslot_end = 3;
1953       break;
1954     }
1955     case MVT::i16: {
1956       prefslot_begin = 2; prefslot_end = 3;
1957       break;
1958     }
1959     case MVT::i32:
1960     case MVT::f32: {
1961       prefslot_begin = 0; prefslot_end = 3;
1962       break;
1963     }
1964     case MVT::i64:
1965     case MVT::f64: {
1966       prefslot_begin = 0; prefslot_end = 7;
1967       break;
1968     }
1969     }
1970
1971     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1972            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1973
1974     unsigned int ShufBytes[16] = {
1975       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1976     };
1977     for (int i = 0; i < 16; ++i) {
1978       // zero fill uppper part of preferred slot, don't care about the
1979       // other slots:
1980       unsigned int mask_val;
1981       if (i <= prefslot_end) {
1982         mask_val =
1983           ((i < prefslot_begin)
1984            ? 0x80
1985            : elt_byte + (i - prefslot_begin));
1986
1987         ShufBytes[i] = mask_val;
1988       } else
1989         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1990     }
1991
1992     SDValue ShufMask[4];
1993     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1994       unsigned bidx = i * 4;
1995       unsigned int bits = ((ShufBytes[bidx] << 24) |
1996                            (ShufBytes[bidx+1] << 16) |
1997                            (ShufBytes[bidx+2] << 8) |
1998                            ShufBytes[bidx+3]);
1999       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2000     }
2001
2002     SDValue ShufMaskVec =
2003       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2004                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2005
2006     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2007                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2008                                      N, N, ShufMaskVec));
2009   } else {
2010     // Variable index: Rotate the requested element into slot 0, then replicate
2011     // slot 0 across the vector
2012     EVT VecVT = N.getValueType();
2013     if (!VecVT.isSimple() || !VecVT.isVector()) {
2014       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2015                         "vector type!");
2016     }
2017
2018     // Make life easier by making sure the index is zero-extended to i32
2019     if (Elt.getValueType() != MVT::i32)
2020       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2021
2022     // Scale the index to a bit/byte shift quantity
2023     APInt scaleFactor =
2024             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2025     unsigned scaleShift = scaleFactor.logBase2();
2026     SDValue vecShift;
2027
2028     if (scaleShift > 0) {
2029       // Scale the shift factor:
2030       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2031                         DAG.getConstant(scaleShift, MVT::i32));
2032     }
2033
2034     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2035
2036     // Replicate the bytes starting at byte 0 across the entire vector (for
2037     // consistency with the notion of a unified register set)
2038     SDValue replicate;
2039
2040     switch (VT.getSimpleVT().SimpleTy) {
2041     default:
2042       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2043                         "type");
2044       /*NOTREACHED*/
2045     case MVT::i8: {
2046       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2047       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2048                               factor, factor, factor, factor);
2049       break;
2050     }
2051     case MVT::i16: {
2052       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2053       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2054                               factor, factor, factor, factor);
2055       break;
2056     }
2057     case MVT::i32:
2058     case MVT::f32: {
2059       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2060       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2061                               factor, factor, factor, factor);
2062       break;
2063     }
2064     case MVT::i64:
2065     case MVT::f64: {
2066       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2067       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2068       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2069                               loFactor, hiFactor, loFactor, hiFactor);
2070       break;
2071     }
2072     }
2073
2074     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2075                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2076                                      vecShift, vecShift, replicate));
2077   }
2078
2079   return retval;
2080 }
2081
2082 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2083   SDValue VecOp = Op.getOperand(0);
2084   SDValue ValOp = Op.getOperand(1);
2085   SDValue IdxOp = Op.getOperand(2);
2086   DebugLoc dl = Op.getDebugLoc();
2087   EVT VT = Op.getValueType();
2088
2089   // use 0 when the lane to insert to is 'undef'
2090   int64_t Idx=0;
2091   if (IdxOp.getOpcode() != ISD::UNDEF) {
2092     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2093     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2094     Idx = (CN->getSExtValue());
2095   }
2096
2097   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2098   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2099   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2100                                 DAG.getRegister(SPU::R1, PtrVT),
2101                                 DAG.getConstant(Idx, PtrVT));
2102   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2103
2104   SDValue result =
2105     DAG.getNode(SPUISD::SHUFB, dl, VT,
2106                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2107                 VecOp,
2108                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2109
2110   return result;
2111 }
2112
2113 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2114                            const TargetLowering &TLI)
2115 {
2116   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2117   DebugLoc dl = Op.getDebugLoc();
2118   EVT ShiftVT = TLI.getShiftAmountTy();
2119
2120   assert(Op.getValueType() == MVT::i8);
2121   switch (Opc) {
2122   default:
2123     llvm_unreachable("Unhandled i8 math operator");
2124     /*NOTREACHED*/
2125     break;
2126   case ISD::ADD: {
2127     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2128     // the result:
2129     SDValue N1 = Op.getOperand(1);
2130     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2131     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2132     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2133                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2134
2135   }
2136
2137   case ISD::SUB: {
2138     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2139     // the result:
2140     SDValue N1 = Op.getOperand(1);
2141     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2142     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2143     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2144                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2145   }
2146   case ISD::ROTR:
2147   case ISD::ROTL: {
2148     SDValue N1 = Op.getOperand(1);
2149     EVT N1VT = N1.getValueType();
2150
2151     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2152     if (!N1VT.bitsEq(ShiftVT)) {
2153       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2154                        ? ISD::ZERO_EXTEND
2155                        : ISD::TRUNCATE;
2156       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2157     }
2158
2159     // Replicate lower 8-bits into upper 8:
2160     SDValue ExpandArg =
2161       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2162                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2163                               N0, DAG.getConstant(8, MVT::i32)));
2164
2165     // Truncate back down to i8
2166     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2167                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2168   }
2169   case ISD::SRL:
2170   case ISD::SHL: {
2171     SDValue N1 = Op.getOperand(1);
2172     EVT N1VT = N1.getValueType();
2173
2174     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2175     if (!N1VT.bitsEq(ShiftVT)) {
2176       unsigned N1Opc = ISD::ZERO_EXTEND;
2177
2178       if (N1.getValueType().bitsGT(ShiftVT))
2179         N1Opc = ISD::TRUNCATE;
2180
2181       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2182     }
2183
2184     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2185                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2186   }
2187   case ISD::SRA: {
2188     SDValue N1 = Op.getOperand(1);
2189     EVT N1VT = N1.getValueType();
2190
2191     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2192     if (!N1VT.bitsEq(ShiftVT)) {
2193       unsigned N1Opc = ISD::SIGN_EXTEND;
2194
2195       if (N1VT.bitsGT(ShiftVT))
2196         N1Opc = ISD::TRUNCATE;
2197       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2198     }
2199
2200     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2201                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2202   }
2203   case ISD::MUL: {
2204     SDValue N1 = Op.getOperand(1);
2205
2206     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2207     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2208     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2209                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2210     break;
2211   }
2212   }
2213
2214   return SDValue();
2215 }
2216
2217 //! Lower byte immediate operations for v16i8 vectors:
2218 static SDValue
2219 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2220   SDValue ConstVec;
2221   SDValue Arg;
2222   EVT VT = Op.getValueType();
2223   DebugLoc dl = Op.getDebugLoc();
2224
2225   ConstVec = Op.getOperand(0);
2226   Arg = Op.getOperand(1);
2227   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2228     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2229       ConstVec = ConstVec.getOperand(0);
2230     } else {
2231       ConstVec = Op.getOperand(1);
2232       Arg = Op.getOperand(0);
2233       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2234         ConstVec = ConstVec.getOperand(0);
2235       }
2236     }
2237   }
2238
2239   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2240     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2241     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2242
2243     APInt APSplatBits, APSplatUndef;
2244     unsigned SplatBitSize;
2245     bool HasAnyUndefs;
2246     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2247
2248     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2249                               HasAnyUndefs, minSplatBits)
2250         && minSplatBits <= SplatBitSize) {
2251       uint64_t SplatBits = APSplatBits.getZExtValue();
2252       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2253
2254       SmallVector<SDValue, 16> tcVec;
2255       tcVec.assign(16, tc);
2256       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2257                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2258     }
2259   }
2260
2261   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2262   // lowered.  Return the operation, rather than a null SDValue.
2263   return Op;
2264 }
2265
2266 //! Custom lowering for CTPOP (count population)
2267 /*!
2268   Custom lowering code that counts the number ones in the input
2269   operand. SPU has such an instruction, but it counts the number of
2270   ones per byte, which then have to be accumulated.
2271 */
2272 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2273   EVT VT = Op.getValueType();
2274   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2275                                VT, (128 / VT.getSizeInBits()));
2276   DebugLoc dl = Op.getDebugLoc();
2277
2278   switch (VT.getSimpleVT().SimpleTy) {
2279   default:
2280     assert(false && "Invalid value type!");
2281   case MVT::i8: {
2282     SDValue N = Op.getOperand(0);
2283     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2284
2285     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2286     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2287
2288     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2289   }
2290
2291   case MVT::i16: {
2292     MachineFunction &MF = DAG.getMachineFunction();
2293     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2294
2295     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2296
2297     SDValue N = Op.getOperand(0);
2298     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2299     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2300     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2301
2302     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2303     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2304
2305     // CNTB_result becomes the chain to which all of the virtual registers
2306     // CNTB_reg, SUM1_reg become associated:
2307     SDValue CNTB_result =
2308       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2309
2310     SDValue CNTB_rescopy =
2311       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2312
2313     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2314
2315     return DAG.getNode(ISD::AND, dl, MVT::i16,
2316                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2317                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2318                                                Tmp1, Shift1),
2319                                    Tmp1),
2320                        Mask0);
2321   }
2322
2323   case MVT::i32: {
2324     MachineFunction &MF = DAG.getMachineFunction();
2325     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2326
2327     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2328     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2329
2330     SDValue N = Op.getOperand(0);
2331     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2332     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2333     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2334     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2335
2336     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2337     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2338
2339     // CNTB_result becomes the chain to which all of the virtual registers
2340     // CNTB_reg, SUM1_reg become associated:
2341     SDValue CNTB_result =
2342       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2343
2344     SDValue CNTB_rescopy =
2345       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2346
2347     SDValue Comp1 =
2348       DAG.getNode(ISD::SRL, dl, MVT::i32,
2349                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2350                   Shift1);
2351
2352     SDValue Sum1 =
2353       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2354                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2355
2356     SDValue Sum1_rescopy =
2357       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2358
2359     SDValue Comp2 =
2360       DAG.getNode(ISD::SRL, dl, MVT::i32,
2361                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2362                   Shift2);
2363     SDValue Sum2 =
2364       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2365                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2366
2367     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2368   }
2369
2370   case MVT::i64:
2371     break;
2372   }
2373
2374   return SDValue();
2375 }
2376
2377 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2378 /*!
2379  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2380  All conversions to i64 are expanded to a libcall.
2381  */
2382 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2383                               const SPUTargetLowering &TLI) {
2384   EVT OpVT = Op.getValueType();
2385   SDValue Op0 = Op.getOperand(0);
2386   EVT Op0VT = Op0.getValueType();
2387
2388   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2389       || OpVT == MVT::i64) {
2390     // Convert f32 / f64 to i32 / i64 via libcall.
2391     RTLIB::Libcall LC =
2392             (Op.getOpcode() == ISD::FP_TO_SINT)
2393              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2394              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2395     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2396     SDValue Dummy;
2397     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2398   }
2399
2400   return Op;
2401 }
2402
2403 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2404 /*!
2405  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2406  All conversions from i64 are expanded to a libcall.
2407  */
2408 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2409                               const SPUTargetLowering &TLI) {
2410   EVT OpVT = Op.getValueType();
2411   SDValue Op0 = Op.getOperand(0);
2412   EVT Op0VT = Op0.getValueType();
2413
2414   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2415       || Op0VT == MVT::i64) {
2416     // Convert i32, i64 to f64 via libcall:
2417     RTLIB::Libcall LC =
2418             (Op.getOpcode() == ISD::SINT_TO_FP)
2419              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2420              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2421     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2422     SDValue Dummy;
2423     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2424   }
2425
2426   return Op;
2427 }
2428
2429 //! Lower ISD::SETCC
2430 /*!
2431  This handles MVT::f64 (double floating point) condition lowering
2432  */
2433 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2434                           const TargetLowering &TLI) {
2435   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2436   DebugLoc dl = Op.getDebugLoc();
2437   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2438
2439   SDValue lhs = Op.getOperand(0);
2440   SDValue rhs = Op.getOperand(1);
2441   EVT lhsVT = lhs.getValueType();
2442   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2443
2444   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2445   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2446   EVT IntVT(MVT::i64);
2447
2448   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2449   // selected to a NOP:
2450   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2451   SDValue lhsHi32 =
2452           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2453                       DAG.getNode(ISD::SRL, dl, IntVT,
2454                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2455   SDValue lhsHi32abs =
2456           DAG.getNode(ISD::AND, dl, MVT::i32,
2457                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2458   SDValue lhsLo32 =
2459           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2460
2461   // SETO and SETUO only use the lhs operand:
2462   if (CC->get() == ISD::SETO) {
2463     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2464     // SETUO
2465     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2466     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2467                        DAG.getSetCC(dl, ccResultVT,
2468                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2469                                     ISD::SETUO),
2470                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2471   } else if (CC->get() == ISD::SETUO) {
2472     // Evaluates to true if Op0 is [SQ]NaN
2473     return DAG.getNode(ISD::AND, dl, ccResultVT,
2474                        DAG.getSetCC(dl, ccResultVT,
2475                                     lhsHi32abs,
2476                                     DAG.getConstant(0x7ff00000, MVT::i32),
2477                                     ISD::SETGE),
2478                        DAG.getSetCC(dl, ccResultVT,
2479                                     lhsLo32,
2480                                     DAG.getConstant(0, MVT::i32),
2481                                     ISD::SETGT));
2482   }
2483
2484   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2485   SDValue rhsHi32 =
2486           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2487                       DAG.getNode(ISD::SRL, dl, IntVT,
2488                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2489
2490   // If a value is negative, subtract from the sign magnitude constant:
2491   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2492
2493   // Convert the sign-magnitude representation into 2's complement:
2494   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2495                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2496   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2497   SDValue lhsSelect =
2498           DAG.getNode(ISD::SELECT, dl, IntVT,
2499                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2500
2501   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2502                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2503   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2504   SDValue rhsSelect =
2505           DAG.getNode(ISD::SELECT, dl, IntVT,
2506                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2507
2508   unsigned compareOp;
2509
2510   switch (CC->get()) {
2511   case ISD::SETOEQ:
2512   case ISD::SETUEQ:
2513     compareOp = ISD::SETEQ; break;
2514   case ISD::SETOGT:
2515   case ISD::SETUGT:
2516     compareOp = ISD::SETGT; break;
2517   case ISD::SETOGE:
2518   case ISD::SETUGE:
2519     compareOp = ISD::SETGE; break;
2520   case ISD::SETOLT:
2521   case ISD::SETULT:
2522     compareOp = ISD::SETLT; break;
2523   case ISD::SETOLE:
2524   case ISD::SETULE:
2525     compareOp = ISD::SETLE; break;
2526   case ISD::SETUNE:
2527   case ISD::SETONE:
2528     compareOp = ISD::SETNE; break;
2529   default:
2530     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2531   }
2532
2533   SDValue result =
2534           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2535                        (ISD::CondCode) compareOp);
2536
2537   if ((CC->get() & 0x8) == 0) {
2538     // Ordered comparison:
2539     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2540                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2541                                   ISD::SETO);
2542     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2543                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2544                                   ISD::SETO);
2545     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2546
2547     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2548   }
2549
2550   return result;
2551 }
2552
2553 //! Lower ISD::SELECT_CC
2554 /*!
2555   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2556   SELB instruction.
2557
2558   \note Need to revisit this in the future: if the code path through the true
2559   and false value computations is longer than the latency of a branch (6
2560   cycles), then it would be more advantageous to branch and insert a new basic
2561   block and branch on the condition. However, this code does not make that
2562   assumption, given the simplisitc uses so far.
2563  */
2564
2565 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2566                               const TargetLowering &TLI) {
2567   EVT VT = Op.getValueType();
2568   SDValue lhs = Op.getOperand(0);
2569   SDValue rhs = Op.getOperand(1);
2570   SDValue trueval = Op.getOperand(2);
2571   SDValue falseval = Op.getOperand(3);
2572   SDValue condition = Op.getOperand(4);
2573   DebugLoc dl = Op.getDebugLoc();
2574
2575   // NOTE: SELB's arguments: $rA, $rB, $mask
2576   //
2577   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2578   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2579   // condition was true and 0s where the condition was false. Hence, the
2580   // arguments to SELB get reversed.
2581
2582   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2583   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2584   // with another "cannot select select_cc" assert:
2585
2586   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2587                                 TLI.getSetCCResultType(Op.getValueType()),
2588                                 lhs, rhs, condition);
2589   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2590 }
2591
2592 //! Custom lower ISD::TRUNCATE
2593 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2594 {
2595   // Type to truncate to
2596   EVT VT = Op.getValueType();
2597   MVT simpleVT = VT.getSimpleVT();
2598   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2599                                VT, (128 / VT.getSizeInBits()));
2600   DebugLoc dl = Op.getDebugLoc();
2601
2602   // Type to truncate from
2603   SDValue Op0 = Op.getOperand(0);
2604   EVT Op0VT = Op0.getValueType();
2605
2606   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2607     // Create shuffle mask, least significant doubleword of quadword
2608     unsigned maskHigh = 0x08090a0b;
2609     unsigned maskLow = 0x0c0d0e0f;
2610     // Use a shuffle to perform the truncation
2611     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2612                                    DAG.getConstant(maskHigh, MVT::i32),
2613                                    DAG.getConstant(maskLow, MVT::i32),
2614                                    DAG.getConstant(maskHigh, MVT::i32),
2615                                    DAG.getConstant(maskLow, MVT::i32));
2616
2617     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2618                                        Op0, Op0, shufMask);
2619
2620     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2621   }
2622
2623   return SDValue();             // Leave the truncate unmolested
2624 }
2625
2626 /*!
2627  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2628  * algorithm is to duplicate the sign bit using rotmai to generate at
2629  * least one byte full of sign bits. Then propagate the "sign-byte" into
2630  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2631  *
2632  * @param Op The sext operand
2633  * @param DAG The current DAG
2634  * @return The SDValue with the entire instruction sequence
2635  */
2636 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2637 {
2638   DebugLoc dl = Op.getDebugLoc();
2639
2640   // Type to extend to
2641   MVT OpVT = Op.getValueType().getSimpleVT();
2642
2643   // Type to extend from
2644   SDValue Op0 = Op.getOperand(0);
2645   MVT Op0VT = Op0.getValueType().getSimpleVT();
2646
2647   // The type to extend to needs to be a i128 and
2648   // the type to extend from needs to be i64 or i32.
2649   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2650           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2651
2652   // Create shuffle mask
2653   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2654   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2655   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2656   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2657                                  DAG.getConstant(mask1, MVT::i32),
2658                                  DAG.getConstant(mask1, MVT::i32),
2659                                  DAG.getConstant(mask2, MVT::i32),
2660                                  DAG.getConstant(mask3, MVT::i32));
2661
2662   // Word wise arithmetic right shift to generate at least one byte
2663   // that contains sign bits.
2664   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2665   SDValue sraVal = DAG.getNode(ISD::SRA,
2666                  dl,
2667                  mvt,
2668                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2669                  DAG.getConstant(31, MVT::i32));
2670
2671   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2672   // and the input value into the lower 64 bits.
2673   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2674       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2675
2676   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2677 }
2678
2679 //! Custom (target-specific) lowering entry point
2680 /*!
2681   This is where LLVM's DAG selection process calls to do target-specific
2682   lowering of nodes.
2683  */
2684 SDValue
2685 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2686 {
2687   unsigned Opc = (unsigned) Op.getOpcode();
2688   EVT VT = Op.getValueType();
2689
2690   switch (Opc) {
2691   default: {
2692 #ifndef NDEBUG
2693     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2694     errs() << "Op.getOpcode() = " << Opc << "\n";
2695     errs() << "*Op.getNode():\n";
2696     Op.getNode()->dump();
2697 #endif
2698     llvm_unreachable(0);
2699   }
2700   case ISD::LOAD:
2701   case ISD::EXTLOAD:
2702   case ISD::SEXTLOAD:
2703   case ISD::ZEXTLOAD:
2704     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2705   case ISD::STORE:
2706     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2707   case ISD::ConstantPool:
2708     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2709   case ISD::GlobalAddress:
2710     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2711   case ISD::JumpTable:
2712     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2713   case ISD::ConstantFP:
2714     return LowerConstantFP(Op, DAG);
2715
2716   // i8, i64 math ops:
2717   case ISD::ADD:
2718   case ISD::SUB:
2719   case ISD::ROTR:
2720   case ISD::ROTL:
2721   case ISD::SRL:
2722   case ISD::SHL:
2723   case ISD::SRA: {
2724     if (VT == MVT::i8)
2725       return LowerI8Math(Op, DAG, Opc, *this);
2726     break;
2727   }
2728
2729   case ISD::FP_TO_SINT:
2730   case ISD::FP_TO_UINT:
2731     return LowerFP_TO_INT(Op, DAG, *this);
2732
2733   case ISD::SINT_TO_FP:
2734   case ISD::UINT_TO_FP:
2735     return LowerINT_TO_FP(Op, DAG, *this);
2736
2737   // Vector-related lowering.
2738   case ISD::BUILD_VECTOR:
2739     return LowerBUILD_VECTOR(Op, DAG);
2740   case ISD::SCALAR_TO_VECTOR:
2741     return LowerSCALAR_TO_VECTOR(Op, DAG);
2742   case ISD::VECTOR_SHUFFLE:
2743     return LowerVECTOR_SHUFFLE(Op, DAG);
2744   case ISD::EXTRACT_VECTOR_ELT:
2745     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2746   case ISD::INSERT_VECTOR_ELT:
2747     return LowerINSERT_VECTOR_ELT(Op, DAG);
2748
2749   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2750   case ISD::AND:
2751   case ISD::OR:
2752   case ISD::XOR:
2753     return LowerByteImmed(Op, DAG);
2754
2755   // Vector and i8 multiply:
2756   case ISD::MUL:
2757     if (VT == MVT::i8)
2758       return LowerI8Math(Op, DAG, Opc, *this);
2759
2760   case ISD::CTPOP:
2761     return LowerCTPOP(Op, DAG);
2762
2763   case ISD::SELECT_CC:
2764     return LowerSELECT_CC(Op, DAG, *this);
2765
2766   case ISD::SETCC:
2767     return LowerSETCC(Op, DAG, *this);
2768
2769   case ISD::TRUNCATE:
2770     return LowerTRUNCATE(Op, DAG);
2771
2772   case ISD::SIGN_EXTEND:
2773     return LowerSIGN_EXTEND(Op, DAG);
2774   }
2775
2776   return SDValue();
2777 }
2778
2779 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2780                                            SmallVectorImpl<SDValue>&Results,
2781                                            SelectionDAG &DAG) const
2782 {
2783 #if 0
2784   unsigned Opc = (unsigned) N->getOpcode();
2785   EVT OpVT = N->getValueType(0);
2786
2787   switch (Opc) {
2788   default: {
2789     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2790     errs() << "Op.getOpcode() = " << Opc << "\n";
2791     errs() << "*Op.getNode():\n";
2792     N->dump();
2793     abort();
2794     /*NOTREACHED*/
2795   }
2796   }
2797 #endif
2798
2799   /* Otherwise, return unchanged */
2800 }
2801
2802 //===----------------------------------------------------------------------===//
2803 // Target Optimization Hooks
2804 //===----------------------------------------------------------------------===//
2805
2806 SDValue
2807 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2808 {
2809 #if 0
2810   TargetMachine &TM = getTargetMachine();
2811 #endif
2812   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2813   SelectionDAG &DAG = DCI.DAG;
2814   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2815   EVT NodeVT = N->getValueType(0);      // The node's value type
2816   EVT Op0VT = Op0.getValueType();       // The first operand's result
2817   SDValue Result;                       // Initially, empty result
2818   DebugLoc dl = N->getDebugLoc();
2819
2820   switch (N->getOpcode()) {
2821   default: break;
2822   case ISD::ADD: {
2823     SDValue Op1 = N->getOperand(1);
2824
2825     if (Op0.getOpcode() == SPUISD::IndirectAddr
2826         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2827       // Normalize the operands to reduce repeated code
2828       SDValue IndirectArg = Op0, AddArg = Op1;
2829
2830       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2831         IndirectArg = Op1;
2832         AddArg = Op0;
2833       }
2834
2835       if (isa<ConstantSDNode>(AddArg)) {
2836         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2837         SDValue IndOp1 = IndirectArg.getOperand(1);
2838
2839         if (CN0->isNullValue()) {
2840           // (add (SPUindirect <arg>, <arg>), 0) ->
2841           // (SPUindirect <arg>, <arg>)
2842
2843 #if !defined(NDEBUG)
2844           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2845             errs() << "\n"
2846                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2847                  << "With:    (SPUindirect <arg>, <arg>)\n";
2848           }
2849 #endif
2850
2851           return IndirectArg;
2852         } else if (isa<ConstantSDNode>(IndOp1)) {
2853           // (add (SPUindirect <arg>, <const>), <const>) ->
2854           // (SPUindirect <arg>, <const + const>)
2855           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2856           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2857           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2858
2859 #if !defined(NDEBUG)
2860           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2861             errs() << "\n"
2862                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2863                  << "), " << CN0->getSExtValue() << ")\n"
2864                  << "With:    (SPUindirect <arg>, "
2865                  << combinedConst << ")\n";
2866           }
2867 #endif
2868
2869           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2870                              IndirectArg, combinedValue);
2871         }
2872       }
2873     }
2874     break;
2875   }
2876   case ISD::SIGN_EXTEND:
2877   case ISD::ZERO_EXTEND:
2878   case ISD::ANY_EXTEND: {
2879     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2880       // (any_extend (SPUextract_elt0 <arg>)) ->
2881       // (SPUextract_elt0 <arg>)
2882       // Types must match, however...
2883 #if !defined(NDEBUG)
2884       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2885         errs() << "\nReplace: ";
2886         N->dump(&DAG);
2887         errs() << "\nWith:    ";
2888         Op0.getNode()->dump(&DAG);
2889         errs() << "\n";
2890       }
2891 #endif
2892
2893       return Op0;
2894     }
2895     break;
2896   }
2897   case SPUISD::IndirectAddr: {
2898     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2899       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2900       if (CN != 0 && CN->isNullValue()) {
2901         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2902         // (SPUaform <addr>, 0)
2903
2904         DEBUG(errs() << "Replace: ");
2905         DEBUG(N->dump(&DAG));
2906         DEBUG(errs() << "\nWith:    ");
2907         DEBUG(Op0.getNode()->dump(&DAG));
2908         DEBUG(errs() << "\n");
2909
2910         return Op0;
2911       }
2912     } else if (Op0.getOpcode() == ISD::ADD) {
2913       SDValue Op1 = N->getOperand(1);
2914       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2915         // (SPUindirect (add <arg>, <arg>), 0) ->
2916         // (SPUindirect <arg>, <arg>)
2917         if (CN1->isNullValue()) {
2918
2919 #if !defined(NDEBUG)
2920           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2921             errs() << "\n"
2922                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2923                  << "With:    (SPUindirect <arg>, <arg>)\n";
2924           }
2925 #endif
2926
2927           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2928                              Op0.getOperand(0), Op0.getOperand(1));
2929         }
2930       }
2931     }
2932     break;
2933   }
2934   case SPUISD::SHLQUAD_L_BITS:
2935   case SPUISD::SHLQUAD_L_BYTES:
2936   case SPUISD::ROTBYTES_LEFT: {
2937     SDValue Op1 = N->getOperand(1);
2938
2939     // Kill degenerate vector shifts:
2940     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2941       if (CN->isNullValue()) {
2942         Result = Op0;
2943       }
2944     }
2945     break;
2946   }
2947   case SPUISD::PREFSLOT2VEC: {
2948     switch (Op0.getOpcode()) {
2949     default:
2950       break;
2951     case ISD::ANY_EXTEND:
2952     case ISD::ZERO_EXTEND:
2953     case ISD::SIGN_EXTEND: {
2954       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2955       // <arg>
2956       // but only if the SPUprefslot2vec and <arg> types match.
2957       SDValue Op00 = Op0.getOperand(0);
2958       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2959         SDValue Op000 = Op00.getOperand(0);
2960         if (Op000.getValueType() == NodeVT) {
2961           Result = Op000;
2962         }
2963       }
2964       break;
2965     }
2966     case SPUISD::VEC2PREFSLOT: {
2967       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2968       // <arg>
2969       Result = Op0.getOperand(0);
2970       break;
2971     }
2972     }
2973     break;
2974   }
2975   }
2976
2977   // Otherwise, return unchanged.
2978 #ifndef NDEBUG
2979   if (Result.getNode()) {
2980     DEBUG(errs() << "\nReplace.SPU: ");
2981     DEBUG(N->dump(&DAG));
2982     DEBUG(errs() << "\nWith:        ");
2983     DEBUG(Result.getNode()->dump(&DAG));
2984     DEBUG(errs() << "\n");
2985   }
2986 #endif
2987
2988   return Result;
2989 }
2990
2991 //===----------------------------------------------------------------------===//
2992 // Inline Assembly Support
2993 //===----------------------------------------------------------------------===//
2994
2995 /// getConstraintType - Given a constraint letter, return the type of
2996 /// constraint it is for this target.
2997 SPUTargetLowering::ConstraintType
2998 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2999   if (ConstraintLetter.size() == 1) {
3000     switch (ConstraintLetter[0]) {
3001     default: break;
3002     case 'b':
3003     case 'r':
3004     case 'f':
3005     case 'v':
3006     case 'y':
3007       return C_RegisterClass;
3008     }
3009   }
3010   return TargetLowering::getConstraintType(ConstraintLetter);
3011 }
3012
3013 std::pair<unsigned, const TargetRegisterClass*>
3014 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3015                                                 EVT VT) const
3016 {
3017   if (Constraint.size() == 1) {
3018     // GCC RS6000 Constraint Letters
3019     switch (Constraint[0]) {
3020     case 'b':   // R1-R31
3021     case 'r':   // R0-R31
3022       if (VT == MVT::i64)
3023         return std::make_pair(0U, SPU::R64CRegisterClass);
3024       return std::make_pair(0U, SPU::R32CRegisterClass);
3025     case 'f':
3026       if (VT == MVT::f32)
3027         return std::make_pair(0U, SPU::R32FPRegisterClass);
3028       else if (VT == MVT::f64)
3029         return std::make_pair(0U, SPU::R64FPRegisterClass);
3030       break;
3031     case 'v':
3032       return std::make_pair(0U, SPU::GPRCRegisterClass);
3033     }
3034   }
3035
3036   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3037 }
3038
3039 //! Compute used/known bits for a SPU operand
3040 void
3041 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3042                                                   const APInt &Mask,
3043                                                   APInt &KnownZero,
3044                                                   APInt &KnownOne,
3045                                                   const SelectionDAG &DAG,
3046                                                   unsigned Depth ) const {
3047 #if 0
3048   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3049
3050   switch (Op.getOpcode()) {
3051   default:
3052     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3053     break;
3054   case CALL:
3055   case SHUFB:
3056   case SHUFFLE_MASK:
3057   case CNTB:
3058   case SPUISD::PREFSLOT2VEC:
3059   case SPUISD::LDRESULT:
3060   case SPUISD::VEC2PREFSLOT:
3061   case SPUISD::SHLQUAD_L_BITS:
3062   case SPUISD::SHLQUAD_L_BYTES:
3063   case SPUISD::VEC_ROTL:
3064   case SPUISD::VEC_ROTR:
3065   case SPUISD::ROTBYTES_LEFT:
3066   case SPUISD::SELECT_MASK:
3067   case SPUISD::SELB:
3068   }
3069 #endif
3070 }
3071
3072 unsigned
3073 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3074                                                    unsigned Depth) const {
3075   switch (Op.getOpcode()) {
3076   default:
3077     return 1;
3078
3079   case ISD::SETCC: {
3080     EVT VT = Op.getValueType();
3081
3082     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3083       VT = MVT::i32;
3084     }
3085     return VT.getSizeInBits();
3086   }
3087   }
3088 }
3089
3090 // LowerAsmOperandForConstraint
3091 void
3092 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3093                                                 char ConstraintLetter,
3094                                                 std::vector<SDValue> &Ops,
3095                                                 SelectionDAG &DAG) const {
3096   // Default, for the time being, to the base class handler
3097   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3098 }
3099
3100 /// isLegalAddressImmediate - Return true if the integer value can be used
3101 /// as the offset of the target addressing mode.
3102 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3103                                                 const Type *Ty) const {
3104   // SPU's addresses are 256K:
3105   return (V > -(1 << 18) && V < (1 << 18) - 1);
3106 }
3107
3108 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3109   return false;
3110 }
3111
3112 bool
3113 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3114   // The SPU target isn't yet aware of offsets.
3115   return false;
3116 }