lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "llvm/Constants.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/Intrinsics.h"
  21 #include "llvm/CallingConv.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineRegisterInfo.h"
  27 #include "llvm/CodeGen/SelectionDAG.h"
  28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  29 #include "llvm/Target/TargetOptions.h"
  30 #include "llvm/ADT/VectorExtras.h"
  31 #include "llvm/Support/Debug.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include <map>
  36
  37 using namespace llvm;
  38
  39 // Used in getTargetNodeName() below
  40 namespace {
  41   std::map<unsigned, const char *> node_names;
  42
  43   //! EVT mapping to useful data for Cell SPU
  44   struct valtype_map_s {
  45     EVT   valtype;
  46     int   prefslot_byte;
  47   };
  48
  49   const valtype_map_s valtype_map[] = {
  50     { MVT::i1,   3 },
  51     { MVT::i8,   3 },
  52     { MVT::i16,  2 },
  53     { MVT::i32,  0 },
  54     { MVT::f32,  0 },
  55     { MVT::i64,  0 },
  56     { MVT::f64,  0 },
  57     { MVT::i128, 0 }
  58   };
  59
  60   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  61
  62   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  63     const valtype_map_s *retval = 0;
  64
  65     for (size_t i = 0; i < n_valtype_map; ++i) {
  66       if (valtype_map[i].valtype == VT) {
  67         retval = valtype_map + i;
  68         break;
  69       }
  70     }
  71
  72 #ifndef NDEBUG
  73     if (retval == 0) {
  74       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  75                          Twine(VT.getEVTString()));
  76     }
  77 #endif
  78
  79     return retval;
  80   }
  81
  82   //! Expand a library call into an actual call DAG node
  83   /*!
  84    \note
  85    This code is taken from SelectionDAGLegalize, since it is not exposed as
  86    part of the LLVM SelectionDAG API.
  87    */
  88
  89   SDValue
  90   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  91                 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
  92     // The input chain to this libcall is the entry node of the function.
  93     // Legalizing the call will automatically add the previous call to the
  94     // dependence.
  95     SDValue InChain = DAG.getEntryNode();
  96
  97     TargetLowering::ArgListTy Args;
  98     TargetLowering::ArgListEntry Entry;
  99     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 100       EVT ArgVT = Op.getOperand(i).getValueType();
 101       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 102       Entry.Node = Op.getOperand(i);
 103       Entry.Ty = ArgTy;
 104       Entry.isSExt = isSigned;
 105       Entry.isZExt = !isSigned;
 106       Args.push_back(Entry);
 107     }
 108     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 109                                            TLI.getPointerTy());
 110
 111     // Splice the libcall in wherever FindInputOutputChains tells us to.
 112     const Type *RetTy =
 113                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 114     std::pair<SDValue, SDValue> CallInfo =
 115             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 116                             0, TLI.getLibcallCallingConv(LC), false,
 117                             /*isReturnValueUsed=*/true,
 118                             Callee, Args, DAG, Op.getDebugLoc());
 119
 120     return CallInfo.first;
 121   }
 122 }
 123
 124 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 125   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 126     SPUTM(TM) {
 127   // Fold away setcc operations if possible.
 128   setPow2DivIsCheap();
 129
 130   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 131   setUseUnderscoreSetJmp(true);
 132   setUseUnderscoreLongJmp(true);
 133
 134   // Set RTLIB libcall names as used by SPU:
 135   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 136
 137   // Set up the SPU's register classes:
 138   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 139   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 140   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 141   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 142   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 143   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 144   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 145
 146   // SPU has no sign or zero extended loads for i1, i8, i16:
 147   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 148   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 149   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 150
 151   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 152   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 153
 154   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 155   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 156   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 158
 159   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 160
 161   // SPU constant load actions are custom lowered:
 162   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 163   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 164
 165   // SPU's loads and stores have to be custom lowered:
 166   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 167        ++sctype) {
 168     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 169
 170     setOperationAction(ISD::LOAD,   VT, Custom);
 171     setOperationAction(ISD::STORE,  VT, Custom);
 172     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 173     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 174     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 175
 176     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 177       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 178       setTruncStoreAction(VT, StoreVT, Expand);
 179     }
 180   }
 181
 182   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 183        ++sctype) {
 184     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 185
 186     setOperationAction(ISD::LOAD,   VT, Custom);
 187     setOperationAction(ISD::STORE,  VT, Custom);
 188
 189     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 190       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 191       setTruncStoreAction(VT, StoreVT, Expand);
 192     }
 193   }
 194
 195   // Expand the jumptable branches
 196   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 197   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 198
 199   // Custom lower SELECT_CC for most cases, but expand by default
 200   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 201   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 202   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 205
 206   // SPU has no intrinsics for these particular operations:
 207   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 208
 209   // SPU has no division/remainder instructions
 210   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 211   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 212   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 213   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 214   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 215   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 216   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 217   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 218   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 219   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 220   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 221   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 222   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 223   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 224   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 225   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 226   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 227   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 228   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 229   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 230   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 231   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 232   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 233   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 234   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 235   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 236   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 237   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 238   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 239   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 240
 241   // We don't support sin/cos/sqrt/fmod
 242   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 243   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 244   setOperationAction(ISD::FREM , MVT::f64, Expand);
 245   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 246   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 247   setOperationAction(ISD::FREM , MVT::f32, Expand);
 248
 249   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 250   // for f32!)
 251   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 252   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 253
 254   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 255   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 256
 257   // SPU can do rotate right and left, so legalize it... but customize for i8
 258   // because instructions don't exist.
 259
 260   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 261   //        .td files.
 262   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 263   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 264   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 265
 266   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 267   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 268   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 269
 270   // SPU has no native version of shift left/right for i8
 271   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 272   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 273   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 274
 275   // Make these operations legal and handle them during instruction selection:
 276   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 277   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 278   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 279
 280   // Custom lower i8, i32 and i64 multiplications
 281   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 282   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 283   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 284
 285   // Expand double-width multiplication
 286   // FIXME: It would probably be reasonable to support some of these operations
 287   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 288   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 289   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 290   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 291   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 292   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 293   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 294   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 295   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 296   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 297   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 298   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 299   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 300   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 301   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 302   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 303
 304   // Need to custom handle (some) common i8, i64 math ops
 305   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 306   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 307   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 308   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 309
 310   // SPU does not have BSWAP. It does have i32 support CTLZ.
 311   // CTPOP has to be custom lowered.
 312   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 313   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 314
 315   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 316   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 317   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 320
 321   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 322   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 323   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 326
 327   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 328   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 329   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 330   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 331   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 332
 333   // SPU has a version of select that implements (a&~c)|(b&c), just like
 334   // select ought to work:
 335   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 336   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 337   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 338   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 339
 340   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 341   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 342   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 343   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 345
 346   // Custom lower i128 -> i64 truncates
 347   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 348
 349   // Custom lower i32/i64 -> i128 sign extend
 350   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 351
 352   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 353   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 354   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 355   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 356   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 357   // to expand to a libcall, hence the custom lowering:
 358   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 359   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 360   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 361   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 364
 365   // FDIV on SPU requires custom lowering
 366   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 367
 368   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 369   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 370   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 372   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 373   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 375   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 377
 378   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 379   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 380   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 382
 383   // We cannot sextinreg(i1).  Expand to shifts.
 384   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 385
 386   // We want to legalize GlobalAddress and ConstantPool nodes into the
 387   // appropriate instructions to materialize the address.
 388   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 389        ++sctype) {
 390     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 391
 392     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 393     setOperationAction(ISD::ConstantPool,   VT, Custom);
 394     setOperationAction(ISD::JumpTable,      VT, Custom);
 395   }
 396
 397   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 398   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 399
 400   // Use the default implementation.
 401   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 402   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 403   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 404   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 405   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 406   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 407   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 408
 409   // Cell SPU has instructions for converting between i64 and fp.
 410   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 411   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 412
 413   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 414   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 415
 416   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 417   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 418
 419   // First set operation action for all vector types to expand. Then we
 420   // will selectively turn on ones that can be effectively codegen'd.
 421   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 422   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 423   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 427
 428   // "Odd size" vector classes that we're willing to support:
 429   addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
 430
 431   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 432        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 433     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 434
 435     // add/sub are legal for all supported vector VT's.
 436     setOperationAction(ISD::ADD,     VT, Legal);
 437     setOperationAction(ISD::SUB,     VT, Legal);
 438     // mul has to be custom lowered.
 439     setOperationAction(ISD::MUL,     VT, Legal);
 440
 441     setOperationAction(ISD::AND,     VT, Legal);
 442     setOperationAction(ISD::OR,      VT, Legal);
 443     setOperationAction(ISD::XOR,     VT, Legal);
 444     setOperationAction(ISD::LOAD,    VT, Legal);
 445     setOperationAction(ISD::SELECT,  VT, Legal);
 446     setOperationAction(ISD::STORE,   VT, Legal);
 447
 448     // These operations need to be expanded:
 449     setOperationAction(ISD::SDIV,    VT, Expand);
 450     setOperationAction(ISD::SREM,    VT, Expand);
 451     setOperationAction(ISD::UDIV,    VT, Expand);
 452     setOperationAction(ISD::UREM,    VT, Expand);
 453
 454     // Custom lower build_vector, constant pool spills, insert and
 455     // extract vector elements:
 456     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 457     setOperationAction(ISD::ConstantPool, VT, Custom);
 458     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 459     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 460     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 461     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 462   }
 463
 464   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 465   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 466   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 467   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 468
 469   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 470
 471   setShiftAmountType(MVT::i32);
 472   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 473
 474   setStackPointerRegisterToSaveRestore(SPU::R1);
 475
 476   // We have target-specific dag combine patterns for the following nodes:
 477   setTargetDAGCombine(ISD::ADD);
 478   setTargetDAGCombine(ISD::ZERO_EXTEND);
 479   setTargetDAGCombine(ISD::SIGN_EXTEND);
 480   setTargetDAGCombine(ISD::ANY_EXTEND);
 481
 482   computeRegisterProperties();
 483
 484   // Set pre-RA register scheduler default to BURR, which produces slightly
 485   // better code than the default (could also be TDRR, but TargetLowering.h
 486   // needs a mod to support that model):
 487   setSchedulingPreference(SchedulingForRegPressure);
 488 }
 489
 490 const char *
 491 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 492 {
 493   if (node_names.empty()) {
 494     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 495     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 496     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 497     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 498     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 499     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 500     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 501     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 502     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 503     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 504     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 505     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 506     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 507     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 508     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 509     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 510     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 511     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 512     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 513             "SPUISD::ROTBYTES_LEFT_BITS";
 514     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 515     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 516     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 517     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 518     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 519   }
 520
 521   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 522
 523   return ((i != node_names.end()) ? i->second : 0);
 524 }
 525
 526 /// getFunctionAlignment - Return the Log2 alignment of this function.
 527 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 528   return 3;
 529 }
 530
 531 //===----------------------------------------------------------------------===//
 532 // Return the Cell SPU's SETCC result type
 533 //===----------------------------------------------------------------------===//
 534
 535 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 536   // i16 and i32 are valid SETCC result types
 537   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 538     VT.getSimpleVT().SimpleTy :
 539     MVT::i32);
 540 }
 541
 542 //===----------------------------------------------------------------------===//
 543 // Calling convention code:
 544 //===----------------------------------------------------------------------===//
 545
 546 #include "SPUGenCallingConv.inc"
 547
 548 //===----------------------------------------------------------------------===//
 549 //  LowerOperation implementation
 550 //===----------------------------------------------------------------------===//
 551
 552 /// Custom lower loads for CellSPU
 553 /*!
 554  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 555  within a 16-byte block, we have to rotate to extract the requested element.
 556
 557  For extending loads, we also want to ensure that the following sequence is
 558  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 559
 560 \verbatim
 561 %1  v16i8,ch = load
 562 %2  v16i8,ch = rotate %1
 563 %3  v4f8, ch = bitconvert %2
 564 %4  f32      = vec2perfslot %3
 565 %5  f64      = fp_extend %4
 566 \endverbatim
 567 */
 568 static SDValue
 569 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 570   LoadSDNode *LN = cast<LoadSDNode>(Op);
 571   SDValue the_chain = LN->getChain();
 572   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 573   EVT InVT = LN->getMemoryVT();
 574   EVT OutVT = Op.getValueType();
 575   ISD::LoadExtType ExtType = LN->getExtensionType();
 576   unsigned alignment = LN->getAlignment();
 577   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 578   DebugLoc dl = Op.getDebugLoc();
 579
 580   switch (LN->getAddressingMode()) {
 581   case ISD::UNINDEXED: {
 582     SDValue result;
 583     SDValue basePtr = LN->getBasePtr();
 584     SDValue rotate;
 585
 586     if (alignment == 16) {
 587       ConstantSDNode *CN;
 588
 589       // Special cases for a known aligned load to simplify the base pointer
 590       // and the rotation amount:
 591       if (basePtr.getOpcode() == ISD::ADD
 592           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 593         // Known offset into basePtr
 594         int64_t offset = CN->getSExtValue();
 595         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 596
 597         if (rotamt < 0)
 598           rotamt += 16;
 599
 600         rotate = DAG.getConstant(rotamt, MVT::i16);
 601
 602         // Simplify the base pointer for this case:
 603         basePtr = basePtr.getOperand(0);
 604         if ((offset & ~0xf) > 0) {
 605           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 606                                 basePtr,
 607                                 DAG.getConstant((offset & ~0xf), PtrVT));
 608         }
 609       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 610                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 611                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 612                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 613         // Plain aligned a-form address: rotate into preferred slot
 614         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 615         int64_t rotamt = -vtm->prefslot_byte;
 616         if (rotamt < 0)
 617           rotamt += 16;
 618         rotate = DAG.getConstant(rotamt, MVT::i16);
 619       } else {
 620         // Offset the rotate amount by the basePtr and the preferred slot
 621         // byte offset
 622         int64_t rotamt = -vtm->prefslot_byte;
 623         if (rotamt < 0)
 624           rotamt += 16;
 625         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 626                              basePtr,
 627                              DAG.getConstant(rotamt, PtrVT));
 628       }
 629     } else {
 630       // Unaligned load: must be more pessimistic about addressing modes:
 631       if (basePtr.getOpcode() == ISD::ADD) {
 632         MachineFunction &MF = DAG.getMachineFunction();
 633         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 634         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 635         SDValue Flag;
 636
 637         SDValue Op0 = basePtr.getOperand(0);
 638         SDValue Op1 = basePtr.getOperand(1);
 639
 640         if (isa<ConstantSDNode>(Op1)) {
 641           // Convert the (add <ptr>, <const>) to an indirect address contained
 642           // in a register. Note that this is done because we need to avoid
 643           // creating a 0(reg) d-form address due to the SPU's block loads.
 644           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 645           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 646           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 647         } else {
 648           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 649           // will likely be lowered as a reg(reg) x-form address.
 650           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 651         }
 652       } else {
 653         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 654                               basePtr,
 655                               DAG.getConstant(0, PtrVT));
 656       }
 657
 658       // Offset the rotate amount by the basePtr and the preferred slot
 659       // byte offset
 660       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 661                            basePtr,
 662                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 663     }
 664
 665     // Re-emit as a v16i8 vector load
 666     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 667                          LN->getSrcValue(), LN->getSrcValueOffset(),
 668                          LN->isVolatile(), LN->isNonTemporal(), 16);
 669
 670     // Update the chain
 671     the_chain = result.getValue(1);
 672
 673     // Rotate into the preferred slot:
 674     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 675                          result.getValue(0), rotate);
 676
 677     // Convert the loaded v16i8 vector to the appropriate vector type
 678     // specified by the operand:
 679     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 680                                  InVT, (128 / InVT.getSizeInBits()));
 681     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 682                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 683
 684     // Handle extending loads by extending the scalar result:
 685     if (ExtType == ISD::SEXTLOAD) {
 686       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 687     } else if (ExtType == ISD::ZEXTLOAD) {
 688       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 689     } else if (ExtType == ISD::EXTLOAD) {
 690       unsigned NewOpc = ISD::ANY_EXTEND;
 691
 692       if (OutVT.isFloatingPoint())
 693         NewOpc = ISD::FP_EXTEND;
 694
 695       result = DAG.getNode(NewOpc, dl, OutVT, result);
 696     }
 697
 698     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 699     SDValue retops[2] = {
 700       result,
 701       the_chain
 702     };
 703
 704     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 705                          retops, sizeof(retops) / sizeof(retops[0]));
 706     return result;
 707   }
 708   case ISD::PRE_INC:
 709   case ISD::PRE_DEC:
 710   case ISD::POST_INC:
 711   case ISD::POST_DEC:
 712   case ISD::LAST_INDEXED_MODE:
 713     {
 714       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 715                          "than UNINDEXED\n" +
 716                          Twine((unsigned)LN->getAddressingMode()));
 717       /*NOTREACHED*/
 718     }
 719   }
 720
 721   return SDValue();
 722 }
 723
 724 /// Custom lower stores for CellSPU
 725 /*!
 726  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 727  within a 16-byte block, we have to generate a shuffle to insert the
 728  requested element into its place, then store the resulting block.
 729  */
 730 static SDValue
 731 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 732   StoreSDNode *SN = cast<StoreSDNode>(Op);
 733   SDValue Value = SN->getValue();
 734   EVT VT = Value.getValueType();
 735   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 736   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 737   DebugLoc dl = Op.getDebugLoc();
 738   unsigned alignment = SN->getAlignment();
 739
 740   switch (SN->getAddressingMode()) {
 741   case ISD::UNINDEXED: {
 742     // The vector type we really want to load from the 16-byte chunk.
 743     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 744                                  VT, (128 / VT.getSizeInBits()));
 745
 746     SDValue alignLoadVec;
 747     SDValue basePtr = SN->getBasePtr();
 748     SDValue the_chain = SN->getChain();
 749     SDValue insertEltOffs;
 750
 751     if (alignment == 16) {
 752       ConstantSDNode *CN;
 753
 754       // Special cases for a known aligned load to simplify the base pointer
 755       // and insertion byte:
 756       if (basePtr.getOpcode() == ISD::ADD
 757           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 758         // Known offset into basePtr
 759         int64_t offset = CN->getSExtValue();
 760
 761         // Simplify the base pointer for this case:
 762         basePtr = basePtr.getOperand(0);
 763         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 764                                     basePtr,
 765                                     DAG.getConstant((offset & 0xf), PtrVT));
 766
 767         if ((offset & ~0xf) > 0) {
 768           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 769                                 basePtr,
 770                                 DAG.getConstant((offset & ~0xf), PtrVT));
 771         }
 772       } else {
 773         // Otherwise, assume it's at byte 0 of basePtr
 774         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 775                                     basePtr,
 776                                     DAG.getConstant(0, PtrVT));
 777       }
 778     } else {
 779       // Unaligned load: must be more pessimistic about addressing modes:
 780       if (basePtr.getOpcode() == ISD::ADD) {
 781         MachineFunction &MF = DAG.getMachineFunction();
 782         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 783         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 784         SDValue Flag;
 785
 786         SDValue Op0 = basePtr.getOperand(0);
 787         SDValue Op1 = basePtr.getOperand(1);
 788
 789         if (isa<ConstantSDNode>(Op1)) {
 790           // Convert the (add <ptr>, <const>) to an indirect address contained
 791           // in a register. Note that this is done because we need to avoid
 792           // creating a 0(reg) d-form address due to the SPU's block loads.
 793           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 794           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 795           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 796         } else {
 797           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 798           // will likely be lowered as a reg(reg) x-form address.
 799           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 800         }
 801       } else {
 802         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 803                               basePtr,
 804                               DAG.getConstant(0, PtrVT));
 805       }
 806
 807       // Insertion point is solely determined by basePtr's contents
 808       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 809                                   basePtr,
 810                                   DAG.getConstant(0, PtrVT));
 811     }
 812
 813     // Re-emit as a v16i8 vector load
 814     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 815                                SN->getSrcValue(), SN->getSrcValueOffset(),
 816                                SN->isVolatile(), SN->isNonTemporal(), 16);
 817
 818     // Update the chain
 819     the_chain = alignLoadVec.getValue(1);
 820
 821     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 822     SDValue theValue = SN->getValue();
 823     SDValue result;
 824
 825     if (StVT != VT
 826         && (theValue.getOpcode() == ISD::AssertZext
 827             || theValue.getOpcode() == ISD::AssertSext)) {
 828       // Drill down and get the value for zero- and sign-extended
 829       // quantities
 830       theValue = theValue.getOperand(0);
 831     }
 832
 833     // If the base pointer is already a D-form address, then just create
 834     // a new D-form address with a slot offset and the orignal base pointer.
 835     // Otherwise generate a D-form address with the slot offset relative
 836     // to the stack pointer, which is always aligned.
 837 #if !defined(NDEBUG)
 838       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 839         errs() << "CellSPU LowerSTORE: basePtr = ";
 840         basePtr.getNode()->dump(&DAG);
 841         errs() << "\n";
 842       }
 843 #endif
 844
 845     SDValue insertEltOp =
 846             DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
 847     SDValue vectorizeOp =
 848             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
 849
 850     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 851                          vectorizeOp, alignLoadVec,
 852                          DAG.getNode(ISD::BIT_CONVERT, dl,
 853                                      MVT::v4i32, insertEltOp));
 854
 855     result = DAG.getStore(the_chain, dl, result, basePtr,
 856                           LN->getSrcValue(), LN->getSrcValueOffset(),
 857                           LN->isVolatile(), LN->isNonTemporal(),
 858                           LN->getAlignment());
 859
 860 #if 0 && !defined(NDEBUG)
 861     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 862       const SDValue &currentRoot = DAG.getRoot();
 863
 864       DAG.setRoot(result);
 865       errs() << "------- CellSPU:LowerStore result:\n";
 866       DAG.dump();
 867       errs() << "-------\n";
 868       DAG.setRoot(currentRoot);
 869     }
 870 #endif
 871
 872     return result;
 873     /*UNREACHED*/
 874   }
 875   case ISD::PRE_INC:
 876   case ISD::PRE_DEC:
 877   case ISD::POST_INC:
 878   case ISD::POST_DEC:
 879   case ISD::LAST_INDEXED_MODE:
 880     {
 881       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 882                          "than UNINDEXED\n" +
 883                          Twine((unsigned)SN->getAddressingMode()));
 884       /*NOTREACHED*/
 885     }
 886   }
 887
 888   return SDValue();
 889 }
 890
 891 //! Generate the address of a constant pool entry.
 892 static SDValue
 893 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 894   EVT PtrVT = Op.getValueType();
 895   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 896   const Constant *C = CP->getConstVal();
 897   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 898   SDValue Zero = DAG.getConstant(0, PtrVT);
 899   const TargetMachine &TM = DAG.getTarget();
 900   // FIXME there is no actual debug info here
 901   DebugLoc dl = Op.getDebugLoc();
 902
 903   if (TM.getRelocationModel() == Reloc::Static) {
 904     if (!ST->usingLargeMem()) {
 905       // Just return the SDValue with the constant pool address in it.
 906       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 907     } else {
 908       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 909       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 910       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 911     }
 912   }
 913
 914   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 915                    " not supported.");
 916   return SDValue();
 917 }
 918
 919 //! Alternate entry point for generating the address of a constant pool entry
 920 SDValue
 921 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 922   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 923 }
 924
 925 static SDValue
 926 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 927   EVT PtrVT = Op.getValueType();
 928   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 929   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 930   SDValue Zero = DAG.getConstant(0, PtrVT);
 931   const TargetMachine &TM = DAG.getTarget();
 932   // FIXME there is no actual debug info here
 933   DebugLoc dl = Op.getDebugLoc();
 934
 935   if (TM.getRelocationModel() == Reloc::Static) {
 936     if (!ST->usingLargeMem()) {
 937       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 938     } else {
 939       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 940       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 941       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 942     }
 943   }
 944
 945   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 946                    " not supported.");
 947   return SDValue();
 948 }
 949
 950 static SDValue
 951 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 952   EVT PtrVT = Op.getValueType();
 953   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 954   const GlobalValue *GV = GSDN->getGlobal();
 955   SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
 956   const TargetMachine &TM = DAG.getTarget();
 957   SDValue Zero = DAG.getConstant(0, PtrVT);
 958   // FIXME there is no actual debug info here
 959   DebugLoc dl = Op.getDebugLoc();
 960
 961   if (TM.getRelocationModel() == Reloc::Static) {
 962     if (!ST->usingLargeMem()) {
 963       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 964     } else {
 965       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 966       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 967       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 968     }
 969   } else {
 970     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 971                       "not supported.");
 972     /*NOTREACHED*/
 973   }
 974
 975   return SDValue();
 976 }
 977
 978 //! Custom lower double precision floating point constants
 979 static SDValue
 980 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 981   EVT VT = Op.getValueType();
 982   // FIXME there is no actual debug info here
 983   DebugLoc dl = Op.getDebugLoc();
 984
 985   if (VT == MVT::f64) {
 986     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 987
 988     assert((FP != 0) &&
 989            "LowerConstantFP: Node is not ConstantFPSDNode");
 990
 991     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 992     SDValue T = DAG.getConstant(dbits, MVT::i64);
 993     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 994     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 995                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 996   }
 997
 998   return SDValue();
 999 }
1000
1001 SDValue
1002 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1003                                         CallingConv::ID CallConv, bool isVarArg,
1004                                         const SmallVectorImpl<ISD::InputArg>
1005                                           &Ins,
1006                                         DebugLoc dl, SelectionDAG &DAG,
1007                                         SmallVectorImpl<SDValue> &InVals) {
1008
1009   MachineFunction &MF = DAG.getMachineFunction();
1010   MachineFrameInfo *MFI = MF.getFrameInfo();
1011   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1012
1013   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1014   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1015
1016   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1017   unsigned ArgRegIdx = 0;
1018   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1019
1020   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1021
1022   // Add DAG nodes to load the arguments or copy them out of registers.
1023   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1024     EVT ObjectVT = Ins[ArgNo].VT;
1025     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1026     SDValue ArgVal;
1027
1028     if (ArgRegIdx < NumArgRegs) {
1029       const TargetRegisterClass *ArgRegClass;
1030
1031       switch (ObjectVT.getSimpleVT().SimpleTy) {
1032       default:
1033         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1034                            Twine(ObjectVT.getEVTString()));
1035       case MVT::i8:
1036         ArgRegClass = &SPU::R8CRegClass;
1037         break;
1038       case MVT::i16:
1039         ArgRegClass = &SPU::R16CRegClass;
1040         break;
1041       case MVT::i32:
1042         ArgRegClass = &SPU::R32CRegClass;
1043         break;
1044       case MVT::i64:
1045         ArgRegClass = &SPU::R64CRegClass;
1046         break;
1047       case MVT::i128:
1048         ArgRegClass = &SPU::GPRCRegClass;
1049         break;
1050       case MVT::f32:
1051         ArgRegClass = &SPU::R32FPRegClass;
1052         break;
1053       case MVT::f64:
1054         ArgRegClass = &SPU::R64FPRegClass;
1055         break;
1056       case MVT::v2f64:
1057       case MVT::v4f32:
1058       case MVT::v2i64:
1059       case MVT::v4i32:
1060       case MVT::v8i16:
1061       case MVT::v16i8:
1062         ArgRegClass = &SPU::VECREGRegClass;
1063         break;
1064       }
1065
1066       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1067       RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1068       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1069       ++ArgRegIdx;
1070     } else {
1071       // We need to load the argument to a virtual register if we determined
1072       // above that we ran out of physical registers of the appropriate type
1073       // or we're forced to do vararg
1074       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1075       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1076       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1077       ArgOffset += StackSlotSize;
1078     }
1079
1080     InVals.push_back(ArgVal);
1081     // Update the chain
1082     Chain = ArgVal.getOperand(0);
1083   }
1084
1085   // vararg handling:
1086   if (isVarArg) {
1087     // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1088     // We will spill (79-3)+1 registers to the stack
1089     SmallVector<SDValue, 79-3+1> MemOps;
1090
1091     // Create the frame slot
1092
1093     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1094       VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1095                                                  true, false);
1096       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1097       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1098       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1099       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1100                                    false, false, 0);
1101       Chain = Store.getOperand(0);
1102       MemOps.push_back(Store);
1103
1104       // Increment address by stack slot size for the next stored argument
1105       ArgOffset += StackSlotSize;
1106     }
1107     if (!MemOps.empty())
1108       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1109                           &MemOps[0], MemOps.size());
1110   }
1111
1112   return Chain;
1113 }
1114
1115 /// isLSAAddress - Return the immediate to use if the specified
1116 /// value is representable as a LSA address.
1117 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1118   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1119   if (!C) return 0;
1120
1121   int Addr = C->getZExtValue();
1122   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1123       (Addr << 14 >> 14) != Addr)
1124     return 0;  // Top 14 bits have to be sext of immediate.
1125
1126   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1127 }
1128
1129 SDValue
1130 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1131                              CallingConv::ID CallConv, bool isVarArg,
1132                              bool &isTailCall,
1133                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1134                              const SmallVectorImpl<ISD::InputArg> &Ins,
1135                              DebugLoc dl, SelectionDAG &DAG,
1136                              SmallVectorImpl<SDValue> &InVals) {
1137   // CellSPU target does not yet support tail call optimization.
1138   isTailCall = false;
1139
1140   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1141   unsigned NumOps     = Outs.size();
1142   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1143   const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1144   const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1145
1146   // Handy pointer type
1147   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1148
1149   // Set up a copy of the stack pointer for use loading and storing any
1150   // arguments that may not fit in the registers available for argument
1151   // passing.
1152   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1153
1154   // Figure out which arguments are going to go in registers, and which in
1155   // memory.
1156   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1157   unsigned ArgRegIdx = 0;
1158
1159   // Keep track of registers passing arguments
1160   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1161   // And the arguments passed on the stack
1162   SmallVector<SDValue, 8> MemOpChains;
1163
1164   for (unsigned i = 0; i != NumOps; ++i) {
1165     SDValue Arg = Outs[i].Val;
1166
1167     // PtrOff will be used to store the current argument to the stack if a
1168     // register cannot be found for it.
1169     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1170     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1171
1172     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1173     default: llvm_unreachable("Unexpected ValueType for argument!");
1174     case MVT::i8:
1175     case MVT::i16:
1176     case MVT::i32:
1177     case MVT::i64:
1178     case MVT::i128:
1179       if (ArgRegIdx != NumArgRegs) {
1180         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1181       } else {
1182         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1183                                            false, false, 0));
1184         ArgOffset += StackSlotSize;
1185       }
1186       break;
1187     case MVT::f32:
1188     case MVT::f64:
1189       if (ArgRegIdx != NumArgRegs) {
1190         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1191       } else {
1192         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1193                                            false, false, 0));
1194         ArgOffset += StackSlotSize;
1195       }
1196       break;
1197     case MVT::v2i64:
1198     case MVT::v2f64:
1199     case MVT::v4f32:
1200     case MVT::v4i32:
1201     case MVT::v8i16:
1202     case MVT::v16i8:
1203       if (ArgRegIdx != NumArgRegs) {
1204         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1205       } else {
1206         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1207                                            false, false, 0));
1208         ArgOffset += StackSlotSize;
1209       }
1210       break;
1211     }
1212   }
1213
1214   // Accumulate how many bytes are to be pushed on the stack, including the
1215   // linkage area, and parameter passing area.  According to the SPU ABI,
1216   // we minimally need space for [LR] and [SP].
1217   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1218
1219   // Insert a call sequence start
1220   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1221                                                             true));
1222
1223   if (!MemOpChains.empty()) {
1224     // Adjust the stack pointer for the stack arguments.
1225     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1226                         &MemOpChains[0], MemOpChains.size());
1227   }
1228
1229   // Build a sequence of copy-to-reg nodes chained together with token chain
1230   // and flag operands which copy the outgoing args into the appropriate regs.
1231   SDValue InFlag;
1232   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1233     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1234                              RegsToPass[i].second, InFlag);
1235     InFlag = Chain.getValue(1);
1236   }
1237
1238   SmallVector<SDValue, 8> Ops;
1239   unsigned CallOpc = SPUISD::CALL;
1240
1241   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1242   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1243   // node so that legalize doesn't hack it.
1244   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1245     const GlobalValue *GV = G->getGlobal();
1246     EVT CalleeVT = Callee.getValueType();
1247     SDValue Zero = DAG.getConstant(0, PtrVT);
1248     SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1249
1250     if (!ST->usingLargeMem()) {
1251       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1252       // style calls, otherwise, external symbols are BRASL calls. This assumes
1253       // that declared/defined symbols are in the same compilation unit and can
1254       // be reached through PC-relative jumps.
1255       //
1256       // NOTE:
1257       // This may be an unsafe assumption for JIT and really large compilation
1258       // units.
1259       if (GV->isDeclaration()) {
1260         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1261       } else {
1262         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1263       }
1264     } else {
1265       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1266       // address pairs:
1267       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1268     }
1269   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1270     EVT CalleeVT = Callee.getValueType();
1271     SDValue Zero = DAG.getConstant(0, PtrVT);
1272     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1273         Callee.getValueType());
1274
1275     if (!ST->usingLargeMem()) {
1276       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1277     } else {
1278       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1279     }
1280   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1281     // If this is an absolute destination address that appears to be a legal
1282     // local store address, use the munged value.
1283     Callee = SDValue(Dest, 0);
1284   }
1285
1286   Ops.push_back(Chain);
1287   Ops.push_back(Callee);
1288
1289   // Add argument registers to the end of the list so that they are known live
1290   // into the call.
1291   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1292     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1293                                   RegsToPass[i].second.getValueType()));
1294
1295   if (InFlag.getNode())
1296     Ops.push_back(InFlag);
1297   // Returns a chain and a flag for retval copy to use.
1298   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1299                       &Ops[0], Ops.size());
1300   InFlag = Chain.getValue(1);
1301
1302   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1303                              DAG.getIntPtrConstant(0, true), InFlag);
1304   if (!Ins.empty())
1305     InFlag = Chain.getValue(1);
1306
1307   // If the function returns void, just return the chain.
1308   if (Ins.empty())
1309     return Chain;
1310
1311   // If the call has results, copy the values out of the ret val registers.
1312   switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1313   default: llvm_unreachable("Unexpected ret value!");
1314   case MVT::Other: break;
1315   case MVT::i32:
1316     if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1317       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1318                                  MVT::i32, InFlag).getValue(1);
1319       InVals.push_back(Chain.getValue(0));
1320       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1321                                  Chain.getValue(2)).getValue(1);
1322       InVals.push_back(Chain.getValue(0));
1323     } else {
1324       Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1325                                  InFlag).getValue(1);
1326       InVals.push_back(Chain.getValue(0));
1327     }
1328     break;
1329   case MVT::i64:
1330     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1331                                InFlag).getValue(1);
1332     InVals.push_back(Chain.getValue(0));
1333     break;
1334   case MVT::i128:
1335     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1336                                InFlag).getValue(1);
1337     InVals.push_back(Chain.getValue(0));
1338     break;
1339   case MVT::f32:
1340   case MVT::f64:
1341     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1342                                InFlag).getValue(1);
1343     InVals.push_back(Chain.getValue(0));
1344     break;
1345   case MVT::v2f64:
1346   case MVT::v2i64:
1347   case MVT::v4f32:
1348   case MVT::v4i32:
1349   case MVT::v8i16:
1350   case MVT::v16i8:
1351     Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1352                                    InFlag).getValue(1);
1353     InVals.push_back(Chain.getValue(0));
1354     break;
1355   }
1356
1357   return Chain;
1358 }
1359
1360 SDValue
1361 SPUTargetLowering::LowerReturn(SDValue Chain,
1362                                CallingConv::ID CallConv, bool isVarArg,
1363                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1364                                DebugLoc dl, SelectionDAG &DAG) {
1365
1366   SmallVector<CCValAssign, 16> RVLocs;
1367   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1368                  RVLocs, *DAG.getContext());
1369   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1370
1371   // If this is the first return lowered for this function, add the regs to the
1372   // liveout set for the function.
1373   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1374     for (unsigned i = 0; i != RVLocs.size(); ++i)
1375       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1376   }
1377
1378   SDValue Flag;
1379
1380   // Copy the result values into the output registers.
1381   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1382     CCValAssign &VA = RVLocs[i];
1383     assert(VA.isRegLoc() && "Can only return in registers!");
1384     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1385                              Outs[i].Val, Flag);
1386     Flag = Chain.getValue(1);
1387   }
1388
1389   if (Flag.getNode())
1390     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1391   else
1392     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1393 }
1394
1395
1396 //===----------------------------------------------------------------------===//
1397 // Vector related lowering:
1398 //===----------------------------------------------------------------------===//
1399
1400 static ConstantSDNode *
1401 getVecImm(SDNode *N) {
1402   SDValue OpVal(0, 0);
1403
1404   // Check to see if this buildvec has a single non-undef value in its elements.
1405   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1406     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1407     if (OpVal.getNode() == 0)
1408       OpVal = N->getOperand(i);
1409     else if (OpVal != N->getOperand(i))
1410       return 0;
1411   }
1412
1413   if (OpVal.getNode() != 0) {
1414     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1415       return CN;
1416     }
1417   }
1418
1419   return 0;
1420 }
1421
1422 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1423 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1424 /// constant
1425 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1426                               EVT ValueType) {
1427   if (ConstantSDNode *CN = getVecImm(N)) {
1428     uint64_t Value = CN->getZExtValue();
1429     if (ValueType == MVT::i64) {
1430       uint64_t UValue = CN->getZExtValue();
1431       uint32_t upper = uint32_t(UValue >> 32);
1432       uint32_t lower = uint32_t(UValue);
1433       if (upper != lower)
1434         return SDValue();
1435       Value = Value >> 32;
1436     }
1437     if (Value <= 0x3ffff)
1438       return DAG.getTargetConstant(Value, ValueType);
1439   }
1440
1441   return SDValue();
1442 }
1443
1444 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1445 /// and the value fits into a signed 16-bit constant, and if so, return the
1446 /// constant
1447 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1448                               EVT ValueType) {
1449   if (ConstantSDNode *CN = getVecImm(N)) {
1450     int64_t Value = CN->getSExtValue();
1451     if (ValueType == MVT::i64) {
1452       uint64_t UValue = CN->getZExtValue();
1453       uint32_t upper = uint32_t(UValue >> 32);
1454       uint32_t lower = uint32_t(UValue);
1455       if (upper != lower)
1456         return SDValue();
1457       Value = Value >> 32;
1458     }
1459     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1460       return DAG.getTargetConstant(Value, ValueType);
1461     }
1462   }
1463
1464   return SDValue();
1465 }
1466
1467 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1468 /// and the value fits into a signed 10-bit constant, and if so, return the
1469 /// constant
1470 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1471                               EVT ValueType) {
1472   if (ConstantSDNode *CN = getVecImm(N)) {
1473     int64_t Value = CN->getSExtValue();
1474     if (ValueType == MVT::i64) {
1475       uint64_t UValue = CN->getZExtValue();
1476       uint32_t upper = uint32_t(UValue >> 32);
1477       uint32_t lower = uint32_t(UValue);
1478       if (upper != lower)
1479         return SDValue();
1480       Value = Value >> 32;
1481     }
1482     if (isInt<10>(Value))
1483       return DAG.getTargetConstant(Value, ValueType);
1484   }
1485
1486   return SDValue();
1487 }
1488
1489 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1490 /// and the value fits into a signed 8-bit constant, and if so, return the
1491 /// constant.
1492 ///
1493 /// @note: The incoming vector is v16i8 because that's the only way we can load
1494 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1495 /// same value.
1496 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1497                              EVT ValueType) {
1498   if (ConstantSDNode *CN = getVecImm(N)) {
1499     int Value = (int) CN->getZExtValue();
1500     if (ValueType == MVT::i16
1501         && Value <= 0xffff                 /* truncated from uint64_t */
1502         && ((short) Value >> 8) == ((short) Value & 0xff))
1503       return DAG.getTargetConstant(Value & 0xff, ValueType);
1504     else if (ValueType == MVT::i8
1505              && (Value & 0xff) == Value)
1506       return DAG.getTargetConstant(Value, ValueType);
1507   }
1508
1509   return SDValue();
1510 }
1511
1512 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1513 /// and the value fits into a signed 16-bit constant, and if so, return the
1514 /// constant
1515 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1516                                EVT ValueType) {
1517   if (ConstantSDNode *CN = getVecImm(N)) {
1518     uint64_t Value = CN->getZExtValue();
1519     if ((ValueType == MVT::i32
1520           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1521         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1522       return DAG.getTargetConstant(Value >> 16, ValueType);
1523   }
1524
1525   return SDValue();
1526 }
1527
1528 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1529 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1530   if (ConstantSDNode *CN = getVecImm(N)) {
1531     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1532   }
1533
1534   return SDValue();
1535 }
1536
1537 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1538 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1539   if (ConstantSDNode *CN = getVecImm(N)) {
1540     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1541   }
1542
1543   return SDValue();
1544 }
1545
1546 //! Lower a BUILD_VECTOR instruction creatively:
1547 static SDValue
1548 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1549   EVT VT = Op.getValueType();
1550   EVT EltVT = VT.getVectorElementType();
1551   DebugLoc dl = Op.getDebugLoc();
1552   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1553   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1554   unsigned minSplatBits = EltVT.getSizeInBits();
1555
1556   if (minSplatBits < 16)
1557     minSplatBits = 16;
1558
1559   APInt APSplatBits, APSplatUndef;
1560   unsigned SplatBitSize;
1561   bool HasAnyUndefs;
1562
1563   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1564                             HasAnyUndefs, minSplatBits)
1565       || minSplatBits < SplatBitSize)
1566     return SDValue();   // Wasn't a constant vector or splat exceeded min
1567
1568   uint64_t SplatBits = APSplatBits.getZExtValue();
1569
1570   switch (VT.getSimpleVT().SimpleTy) {
1571   default:
1572     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1573                        Twine(VT.getEVTString()));
1574     /*NOTREACHED*/
1575   case MVT::v4f32: {
1576     uint32_t Value32 = uint32_t(SplatBits);
1577     assert(SplatBitSize == 32
1578            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1579     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1580     SDValue T = DAG.getConstant(Value32, MVT::i32);
1581     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1582                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1583     break;
1584   }
1585   case MVT::v2f64: {
1586     uint64_t f64val = uint64_t(SplatBits);
1587     assert(SplatBitSize == 64
1588            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1589     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1590     SDValue T = DAG.getConstant(f64val, MVT::i64);
1591     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1592                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1593     break;
1594   }
1595   case MVT::v16i8: {
1596    // 8-bit constants have to be expanded to 16-bits
1597    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1598    SmallVector<SDValue, 8> Ops;
1599
1600    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1601    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1602                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1603   }
1604   case MVT::v8i16: {
1605     unsigned short Value16 = SplatBits;
1606     SDValue T = DAG.getConstant(Value16, EltVT);
1607     SmallVector<SDValue, 8> Ops;
1608
1609     Ops.assign(8, T);
1610     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1611   }
1612   case MVT::v4i32: {
1613     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1614     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1615   }
1616   case MVT::v2i32: {
1617     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1618     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1619   }
1620   case MVT::v2i64: {
1621     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1622   }
1623   }
1624
1625   return SDValue();
1626 }
1627
1628 /*!
1629  */
1630 SDValue
1631 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1632                      DebugLoc dl) {
1633   uint32_t upper = uint32_t(SplatVal >> 32);
1634   uint32_t lower = uint32_t(SplatVal);
1635
1636   if (upper == lower) {
1637     // Magic constant that can be matched by IL, ILA, et. al.
1638     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1639     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1640                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1641                                    Val, Val, Val, Val));
1642   } else {
1643     bool upper_special, lower_special;
1644
1645     // NOTE: This code creates common-case shuffle masks that can be easily
1646     // detected as common expressions. It is not attempting to create highly
1647     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1648
1649     // Detect if the upper or lower half is a special shuffle mask pattern:
1650     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1651     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1652
1653     // Both upper and lower are special, lower to a constant pool load:
1654     if (lower_special && upper_special) {
1655       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1656       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1657                          SplatValCN, SplatValCN);
1658     }
1659
1660     SDValue LO32;
1661     SDValue HI32;
1662     SmallVector<SDValue, 16> ShufBytes;
1663     SDValue Result;
1664
1665     // Create lower vector if not a special pattern
1666     if (!lower_special) {
1667       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1668       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1669                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1670                                      LO32C, LO32C, LO32C, LO32C));
1671     }
1672
1673     // Create upper vector if not a special pattern
1674     if (!upper_special) {
1675       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1676       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1677                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1678                                      HI32C, HI32C, HI32C, HI32C));
1679     }
1680
1681     // If either upper or lower are special, then the two input operands are
1682     // the same (basically, one of them is a "don't care")
1683     if (lower_special)
1684       LO32 = HI32;
1685     if (upper_special)
1686       HI32 = LO32;
1687
1688     for (int i = 0; i < 4; ++i) {
1689       uint64_t val = 0;
1690       for (int j = 0; j < 4; ++j) {
1691         SDValue V;
1692         bool process_upper, process_lower;
1693         val <<= 8;
1694         process_upper = (upper_special && (i & 1) == 0);
1695         process_lower = (lower_special && (i & 1) == 1);
1696
1697         if (process_upper || process_lower) {
1698           if ((process_upper && upper == 0)
1699                   || (process_lower && lower == 0))
1700             val |= 0x80;
1701           else if ((process_upper && upper == 0xffffffff)
1702                   || (process_lower && lower == 0xffffffff))
1703             val |= 0xc0;
1704           else if ((process_upper && upper == 0x80000000)
1705                   || (process_lower && lower == 0x80000000))
1706             val |= (j == 0 ? 0xe0 : 0x80);
1707         } else
1708           val |= i * 4 + j + ((i & 1) * 16);
1709       }
1710
1711       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1712     }
1713
1714     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1715                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1716                                    &ShufBytes[0], ShufBytes.size()));
1717   }
1718 }
1719
1720 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1721 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1722 /// permutation vector, V3, is monotonically increasing with one "exception"
1723 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1724 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1725 /// In either case, the net result is going to eventually invoke SHUFB to
1726 /// permute/shuffle the bytes from V1 and V2.
1727 /// \note
1728 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1729 /// control word for byte/halfword/word insertion. This takes care of a single
1730 /// element move from V2 into V1.
1731 /// \note
1732 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1733 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1734   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1735   SDValue V1 = Op.getOperand(0);
1736   SDValue V2 = Op.getOperand(1);
1737   DebugLoc dl = Op.getDebugLoc();
1738
1739   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1740
1741   // If we have a single element being moved from V1 to V2, this can be handled
1742   // using the C*[DX] compute mask instructions, but the vector elements have
1743   // to be monotonically increasing with one exception element.
1744   EVT VecVT = V1.getValueType();
1745   EVT EltVT = VecVT.getVectorElementType();
1746   unsigned EltsFromV2 = 0;
1747   unsigned V2Elt = 0;
1748   unsigned V2EltIdx0 = 0;
1749   unsigned CurrElt = 0;
1750   unsigned MaxElts = VecVT.getVectorNumElements();
1751   unsigned PrevElt = 0;
1752   unsigned V0Elt = 0;
1753   bool monotonic = true;
1754   bool rotate = true;
1755
1756   if (EltVT == MVT::i8) {
1757     V2EltIdx0 = 16;
1758   } else if (EltVT == MVT::i16) {
1759     V2EltIdx0 = 8;
1760   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1761     V2EltIdx0 = 4;
1762   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1763     V2EltIdx0 = 2;
1764   } else
1765     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1766
1767   for (unsigned i = 0; i != MaxElts; ++i) {
1768     if (SVN->getMaskElt(i) < 0)
1769       continue;
1770
1771     unsigned SrcElt = SVN->getMaskElt(i);
1772
1773     if (monotonic) {
1774       if (SrcElt >= V2EltIdx0) {
1775         if (1 >= (++EltsFromV2)) {
1776           V2Elt = (V2EltIdx0 - SrcElt) << 2;
1777         }
1778       } else if (CurrElt != SrcElt) {
1779         monotonic = false;
1780       }
1781
1782       ++CurrElt;
1783     }
1784
1785     if (rotate) {
1786       if (PrevElt > 0 && SrcElt < MaxElts) {
1787         if ((PrevElt == SrcElt - 1)
1788             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1789           PrevElt = SrcElt;
1790           if (SrcElt == 0)
1791             V0Elt = i;
1792         } else {
1793           rotate = false;
1794         }
1795       } else if (PrevElt == 0) {
1796         // First time through, need to keep track of previous element
1797         PrevElt = SrcElt;
1798       } else {
1799         // This isn't a rotation, takes elements from vector 2
1800         rotate = false;
1801       }
1802     }
1803   }
1804
1805   if (EltsFromV2 == 1 && monotonic) {
1806     // Compute mask and shuffle
1807     MachineFunction &MF = DAG.getMachineFunction();
1808     MachineRegisterInfo &RegInfo = MF.getRegInfo();
1809     unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1810     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1811     // Initialize temporary register to 0
1812     SDValue InitTempReg =
1813       DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1814     // Copy register's contents as index in SHUFFLE_MASK:
1815     SDValue ShufMaskOp =
1816       DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1817                   DAG.getTargetConstant(V2Elt, MVT::i32),
1818                   DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1819     // Use shuffle mask in SHUFB synthetic instruction:
1820     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1821                        ShufMaskOp);
1822   } else if (rotate) {
1823     int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1824
1825     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1826                        V1, DAG.getConstant(rotamt, MVT::i16));
1827   } else {
1828    // Convert the SHUFFLE_VECTOR mask's input element units to the
1829    // actual bytes.
1830     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1831
1832     SmallVector<SDValue, 16> ResultMask;
1833     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1834       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1835
1836       for (unsigned j = 0; j < BytesPerElement; ++j)
1837         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1838     }
1839
1840     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1841                                     &ResultMask[0], ResultMask.size());
1842     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1843   }
1844 }
1845
1846 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1847   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1848   DebugLoc dl = Op.getDebugLoc();
1849
1850   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1851     // For a constant, build the appropriate constant vector, which will
1852     // eventually simplify to a vector register load.
1853
1854     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1855     SmallVector<SDValue, 16> ConstVecValues;
1856     EVT VT;
1857     size_t n_copies;
1858
1859     // Create a constant vector:
1860     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1861     default: llvm_unreachable("Unexpected constant value type in "
1862                               "LowerSCALAR_TO_VECTOR");
1863     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1864     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1865     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1866     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1867     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1868     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1869     }
1870
1871     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1872     for (size_t j = 0; j < n_copies; ++j)
1873       ConstVecValues.push_back(CValue);
1874
1875     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1876                        &ConstVecValues[0], ConstVecValues.size());
1877   } else {
1878     // Otherwise, copy the value from one register to another:
1879     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1880     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1881     case MVT::i8:
1882     case MVT::i16:
1883     case MVT::i32:
1884     case MVT::i64:
1885     case MVT::f32:
1886     case MVT::f64:
1887       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1888     }
1889   }
1890
1891   return SDValue();
1892 }
1893
1894 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1895   EVT VT = Op.getValueType();
1896   SDValue N = Op.getOperand(0);
1897   SDValue Elt = Op.getOperand(1);
1898   DebugLoc dl = Op.getDebugLoc();
1899   SDValue retval;
1900
1901   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1902     // Constant argument:
1903     int EltNo = (int) C->getZExtValue();
1904
1905     // sanity checks:
1906     if (VT == MVT::i8 && EltNo >= 16)
1907       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1908     else if (VT == MVT::i16 && EltNo >= 8)
1909       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1910     else if (VT == MVT::i32 && EltNo >= 4)
1911       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1912     else if (VT == MVT::i64 && EltNo >= 2)
1913       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1914
1915     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1916       // i32 and i64: Element 0 is the preferred slot
1917       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1918     }
1919
1920     // Need to generate shuffle mask and extract:
1921     int prefslot_begin = -1, prefslot_end = -1;
1922     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1923
1924     switch (VT.getSimpleVT().SimpleTy) {
1925     default:
1926       assert(false && "Invalid value type!");
1927     case MVT::i8: {
1928       prefslot_begin = prefslot_end = 3;
1929       break;
1930     }
1931     case MVT::i16: {
1932       prefslot_begin = 2; prefslot_end = 3;
1933       break;
1934     }
1935     case MVT::i32:
1936     case MVT::f32: {
1937       prefslot_begin = 0; prefslot_end = 3;
1938       break;
1939     }
1940     case MVT::i64:
1941     case MVT::f64: {
1942       prefslot_begin = 0; prefslot_end = 7;
1943       break;
1944     }
1945     }
1946
1947     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1948            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1949
1950     unsigned int ShufBytes[16] = {
1951       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1952     };
1953     for (int i = 0; i < 16; ++i) {
1954       // zero fill uppper part of preferred slot, don't care about the
1955       // other slots:
1956       unsigned int mask_val;
1957       if (i <= prefslot_end) {
1958         mask_val =
1959           ((i < prefslot_begin)
1960            ? 0x80
1961            : elt_byte + (i - prefslot_begin));
1962
1963         ShufBytes[i] = mask_val;
1964       } else
1965         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1966     }
1967
1968     SDValue ShufMask[4];
1969     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1970       unsigned bidx = i * 4;
1971       unsigned int bits = ((ShufBytes[bidx] << 24) |
1972                            (ShufBytes[bidx+1] << 16) |
1973                            (ShufBytes[bidx+2] << 8) |
1974                            ShufBytes[bidx+3]);
1975       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1976     }
1977
1978     SDValue ShufMaskVec =
1979       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1980                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1981
1982     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1983                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1984                                      N, N, ShufMaskVec));
1985   } else {
1986     // Variable index: Rotate the requested element into slot 0, then replicate
1987     // slot 0 across the vector
1988     EVT VecVT = N.getValueType();
1989     if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1990       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
1991                         "vector type!");
1992     }
1993
1994     // Make life easier by making sure the index is zero-extended to i32
1995     if (Elt.getValueType() != MVT::i32)
1996       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1997
1998     // Scale the index to a bit/byte shift quantity
1999     APInt scaleFactor =
2000             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2001     unsigned scaleShift = scaleFactor.logBase2();
2002     SDValue vecShift;
2003
2004     if (scaleShift > 0) {
2005       // Scale the shift factor:
2006       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2007                         DAG.getConstant(scaleShift, MVT::i32));
2008     }
2009
2010     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2011
2012     // Replicate the bytes starting at byte 0 across the entire vector (for
2013     // consistency with the notion of a unified register set)
2014     SDValue replicate;
2015
2016     switch (VT.getSimpleVT().SimpleTy) {
2017     default:
2018       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2019                         "type");
2020       /*NOTREACHED*/
2021     case MVT::i8: {
2022       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2023       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2024                               factor, factor, factor, factor);
2025       break;
2026     }
2027     case MVT::i16: {
2028       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2029       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2030                               factor, factor, factor, factor);
2031       break;
2032     }
2033     case MVT::i32:
2034     case MVT::f32: {
2035       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2036       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2037                               factor, factor, factor, factor);
2038       break;
2039     }
2040     case MVT::i64:
2041     case MVT::f64: {
2042       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2043       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2044       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2045                               loFactor, hiFactor, loFactor, hiFactor);
2046       break;
2047     }
2048     }
2049
2050     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2051                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2052                                      vecShift, vecShift, replicate));
2053   }
2054
2055   return retval;
2056 }
2057
2058 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2059   SDValue VecOp = Op.getOperand(0);
2060   SDValue ValOp = Op.getOperand(1);
2061   SDValue IdxOp = Op.getOperand(2);
2062   DebugLoc dl = Op.getDebugLoc();
2063   EVT VT = Op.getValueType();
2064
2065   ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2066   assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2067
2068   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2069   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2070   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2071                                 DAG.getRegister(SPU::R1, PtrVT),
2072                                 DAG.getConstant(CN->getSExtValue(), PtrVT));
2073   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2074
2075   SDValue result =
2076     DAG.getNode(SPUISD::SHUFB, dl, VT,
2077                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2078                 VecOp,
2079                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2080
2081   return result;
2082 }
2083
2084 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2085                            const TargetLowering &TLI)
2086 {
2087   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2088   DebugLoc dl = Op.getDebugLoc();
2089   EVT ShiftVT = TLI.getShiftAmountTy();
2090
2091   assert(Op.getValueType() == MVT::i8);
2092   switch (Opc) {
2093   default:
2094     llvm_unreachable("Unhandled i8 math operator");
2095     /*NOTREACHED*/
2096     break;
2097   case ISD::ADD: {
2098     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2099     // the result:
2100     SDValue N1 = Op.getOperand(1);
2101     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2102     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2103     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2104                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2105
2106   }
2107
2108   case ISD::SUB: {
2109     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2110     // the result:
2111     SDValue N1 = Op.getOperand(1);
2112     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2113     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2114     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2115                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2116   }
2117   case ISD::ROTR:
2118   case ISD::ROTL: {
2119     SDValue N1 = Op.getOperand(1);
2120     EVT N1VT = N1.getValueType();
2121
2122     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2123     if (!N1VT.bitsEq(ShiftVT)) {
2124       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2125                        ? ISD::ZERO_EXTEND
2126                        : ISD::TRUNCATE;
2127       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2128     }
2129
2130     // Replicate lower 8-bits into upper 8:
2131     SDValue ExpandArg =
2132       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2133                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2134                               N0, DAG.getConstant(8, MVT::i32)));
2135
2136     // Truncate back down to i8
2137     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2138                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2139   }
2140   case ISD::SRL:
2141   case ISD::SHL: {
2142     SDValue N1 = Op.getOperand(1);
2143     EVT N1VT = N1.getValueType();
2144
2145     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2146     if (!N1VT.bitsEq(ShiftVT)) {
2147       unsigned N1Opc = ISD::ZERO_EXTEND;
2148
2149       if (N1.getValueType().bitsGT(ShiftVT))
2150         N1Opc = ISD::TRUNCATE;
2151
2152       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2153     }
2154
2155     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2156                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2157   }
2158   case ISD::SRA: {
2159     SDValue N1 = Op.getOperand(1);
2160     EVT N1VT = N1.getValueType();
2161
2162     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2163     if (!N1VT.bitsEq(ShiftVT)) {
2164       unsigned N1Opc = ISD::SIGN_EXTEND;
2165
2166       if (N1VT.bitsGT(ShiftVT))
2167         N1Opc = ISD::TRUNCATE;
2168       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2169     }
2170
2171     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2173   }
2174   case ISD::MUL: {
2175     SDValue N1 = Op.getOperand(1);
2176
2177     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2178     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2179     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2180                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2181     break;
2182   }
2183   }
2184
2185   return SDValue();
2186 }
2187
2188 //! Lower byte immediate operations for v16i8 vectors:
2189 static SDValue
2190 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2191   SDValue ConstVec;
2192   SDValue Arg;
2193   EVT VT = Op.getValueType();
2194   DebugLoc dl = Op.getDebugLoc();
2195
2196   ConstVec = Op.getOperand(0);
2197   Arg = Op.getOperand(1);
2198   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2199     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2200       ConstVec = ConstVec.getOperand(0);
2201     } else {
2202       ConstVec = Op.getOperand(1);
2203       Arg = Op.getOperand(0);
2204       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2205         ConstVec = ConstVec.getOperand(0);
2206       }
2207     }
2208   }
2209
2210   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2211     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2212     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2213
2214     APInt APSplatBits, APSplatUndef;
2215     unsigned SplatBitSize;
2216     bool HasAnyUndefs;
2217     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2218
2219     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2220                               HasAnyUndefs, minSplatBits)
2221         && minSplatBits <= SplatBitSize) {
2222       uint64_t SplatBits = APSplatBits.getZExtValue();
2223       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2224
2225       SmallVector<SDValue, 16> tcVec;
2226       tcVec.assign(16, tc);
2227       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2228                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2229     }
2230   }
2231
2232   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2233   // lowered.  Return the operation, rather than a null SDValue.
2234   return Op;
2235 }
2236
2237 //! Custom lowering for CTPOP (count population)
2238 /*!
2239   Custom lowering code that counts the number ones in the input
2240   operand. SPU has such an instruction, but it counts the number of
2241   ones per byte, which then have to be accumulated.
2242 */
2243 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2244   EVT VT = Op.getValueType();
2245   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2246                                VT, (128 / VT.getSizeInBits()));
2247   DebugLoc dl = Op.getDebugLoc();
2248
2249   switch (VT.getSimpleVT().SimpleTy) {
2250   default:
2251     assert(false && "Invalid value type!");
2252   case MVT::i8: {
2253     SDValue N = Op.getOperand(0);
2254     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2255
2256     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2257     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2258
2259     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2260   }
2261
2262   case MVT::i16: {
2263     MachineFunction &MF = DAG.getMachineFunction();
2264     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2265
2266     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2267
2268     SDValue N = Op.getOperand(0);
2269     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2270     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2271     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2272
2273     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2274     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2275
2276     // CNTB_result becomes the chain to which all of the virtual registers
2277     // CNTB_reg, SUM1_reg become associated:
2278     SDValue CNTB_result =
2279       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2280
2281     SDValue CNTB_rescopy =
2282       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2283
2284     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2285
2286     return DAG.getNode(ISD::AND, dl, MVT::i16,
2287                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2288                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2289                                                Tmp1, Shift1),
2290                                    Tmp1),
2291                        Mask0);
2292   }
2293
2294   case MVT::i32: {
2295     MachineFunction &MF = DAG.getMachineFunction();
2296     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2297
2298     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2299     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2300
2301     SDValue N = Op.getOperand(0);
2302     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2303     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2304     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2305     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2306
2307     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2308     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2309
2310     // CNTB_result becomes the chain to which all of the virtual registers
2311     // CNTB_reg, SUM1_reg become associated:
2312     SDValue CNTB_result =
2313       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2314
2315     SDValue CNTB_rescopy =
2316       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2317
2318     SDValue Comp1 =
2319       DAG.getNode(ISD::SRL, dl, MVT::i32,
2320                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2321                   Shift1);
2322
2323     SDValue Sum1 =
2324       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2325                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2326
2327     SDValue Sum1_rescopy =
2328       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2329
2330     SDValue Comp2 =
2331       DAG.getNode(ISD::SRL, dl, MVT::i32,
2332                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2333                   Shift2);
2334     SDValue Sum2 =
2335       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2336                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2337
2338     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2339   }
2340
2341   case MVT::i64:
2342     break;
2343   }
2344
2345   return SDValue();
2346 }
2347
2348 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2349 /*!
2350  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2351  All conversions to i64 are expanded to a libcall.
2352  */
2353 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2354                               SPUTargetLowering &TLI) {
2355   EVT OpVT = Op.getValueType();
2356   SDValue Op0 = Op.getOperand(0);
2357   EVT Op0VT = Op0.getValueType();
2358
2359   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2360       || OpVT == MVT::i64) {
2361     // Convert f32 / f64 to i32 / i64 via libcall.
2362     RTLIB::Libcall LC =
2363             (Op.getOpcode() == ISD::FP_TO_SINT)
2364              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2365              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2366     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2367     SDValue Dummy;
2368     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2369   }
2370
2371   return Op;
2372 }
2373
2374 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2375 /*!
2376  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2377  All conversions from i64 are expanded to a libcall.
2378  */
2379 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2380                               SPUTargetLowering &TLI) {
2381   EVT OpVT = Op.getValueType();
2382   SDValue Op0 = Op.getOperand(0);
2383   EVT Op0VT = Op0.getValueType();
2384
2385   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2386       || Op0VT == MVT::i64) {
2387     // Convert i32, i64 to f64 via libcall:
2388     RTLIB::Libcall LC =
2389             (Op.getOpcode() == ISD::SINT_TO_FP)
2390              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2391              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2392     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2393     SDValue Dummy;
2394     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2395   }
2396
2397   return Op;
2398 }
2399
2400 //! Lower ISD::SETCC
2401 /*!
2402  This handles MVT::f64 (double floating point) condition lowering
2403  */
2404 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2405                           const TargetLowering &TLI) {
2406   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2407   DebugLoc dl = Op.getDebugLoc();
2408   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2409
2410   SDValue lhs = Op.getOperand(0);
2411   SDValue rhs = Op.getOperand(1);
2412   EVT lhsVT = lhs.getValueType();
2413   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2414
2415   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2416   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2417   EVT IntVT(MVT::i64);
2418
2419   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2420   // selected to a NOP:
2421   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2422   SDValue lhsHi32 =
2423           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2424                       DAG.getNode(ISD::SRL, dl, IntVT,
2425                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2426   SDValue lhsHi32abs =
2427           DAG.getNode(ISD::AND, dl, MVT::i32,
2428                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2429   SDValue lhsLo32 =
2430           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2431
2432   // SETO and SETUO only use the lhs operand:
2433   if (CC->get() == ISD::SETO) {
2434     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2435     // SETUO
2436     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2437     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2438                        DAG.getSetCC(dl, ccResultVT,
2439                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2440                                     ISD::SETUO),
2441                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2442   } else if (CC->get() == ISD::SETUO) {
2443     // Evaluates to true if Op0 is [SQ]NaN
2444     return DAG.getNode(ISD::AND, dl, ccResultVT,
2445                        DAG.getSetCC(dl, ccResultVT,
2446                                     lhsHi32abs,
2447                                     DAG.getConstant(0x7ff00000, MVT::i32),
2448                                     ISD::SETGE),
2449                        DAG.getSetCC(dl, ccResultVT,
2450                                     lhsLo32,
2451                                     DAG.getConstant(0, MVT::i32),
2452                                     ISD::SETGT));
2453   }
2454
2455   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2456   SDValue rhsHi32 =
2457           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2458                       DAG.getNode(ISD::SRL, dl, IntVT,
2459                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2460
2461   // If a value is negative, subtract from the sign magnitude constant:
2462   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2463
2464   // Convert the sign-magnitude representation into 2's complement:
2465   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2466                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2467   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2468   SDValue lhsSelect =
2469           DAG.getNode(ISD::SELECT, dl, IntVT,
2470                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2471
2472   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2473                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2474   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2475   SDValue rhsSelect =
2476           DAG.getNode(ISD::SELECT, dl, IntVT,
2477                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2478
2479   unsigned compareOp;
2480
2481   switch (CC->get()) {
2482   case ISD::SETOEQ:
2483   case ISD::SETUEQ:
2484     compareOp = ISD::SETEQ; break;
2485   case ISD::SETOGT:
2486   case ISD::SETUGT:
2487     compareOp = ISD::SETGT; break;
2488   case ISD::SETOGE:
2489   case ISD::SETUGE:
2490     compareOp = ISD::SETGE; break;
2491   case ISD::SETOLT:
2492   case ISD::SETULT:
2493     compareOp = ISD::SETLT; break;
2494   case ISD::SETOLE:
2495   case ISD::SETULE:
2496     compareOp = ISD::SETLE; break;
2497   case ISD::SETUNE:
2498   case ISD::SETONE:
2499     compareOp = ISD::SETNE; break;
2500   default:
2501     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2502   }
2503
2504   SDValue result =
2505           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2506                        (ISD::CondCode) compareOp);
2507
2508   if ((CC->get() & 0x8) == 0) {
2509     // Ordered comparison:
2510     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2511                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2512                                   ISD::SETO);
2513     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2514                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2515                                   ISD::SETO);
2516     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2517
2518     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2519   }
2520
2521   return result;
2522 }
2523
2524 //! Lower ISD::SELECT_CC
2525 /*!
2526   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2527   SELB instruction.
2528
2529   \note Need to revisit this in the future: if the code path through the true
2530   and false value computations is longer than the latency of a branch (6
2531   cycles), then it would be more advantageous to branch and insert a new basic
2532   block and branch on the condition. However, this code does not make that
2533   assumption, given the simplisitc uses so far.
2534  */
2535
2536 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2537                               const TargetLowering &TLI) {
2538   EVT VT = Op.getValueType();
2539   SDValue lhs = Op.getOperand(0);
2540   SDValue rhs = Op.getOperand(1);
2541   SDValue trueval = Op.getOperand(2);
2542   SDValue falseval = Op.getOperand(3);
2543   SDValue condition = Op.getOperand(4);
2544   DebugLoc dl = Op.getDebugLoc();
2545
2546   // NOTE: SELB's arguments: $rA, $rB, $mask
2547   //
2548   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2549   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2550   // condition was true and 0s where the condition was false. Hence, the
2551   // arguments to SELB get reversed.
2552
2553   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2554   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2555   // with another "cannot select select_cc" assert:
2556
2557   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2558                                 TLI.getSetCCResultType(Op.getValueType()),
2559                                 lhs, rhs, condition);
2560   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2561 }
2562
2563 //! Custom lower ISD::TRUNCATE
2564 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2565 {
2566   // Type to truncate to
2567   EVT VT = Op.getValueType();
2568   MVT simpleVT = VT.getSimpleVT();
2569   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2570                                VT, (128 / VT.getSizeInBits()));
2571   DebugLoc dl = Op.getDebugLoc();
2572
2573   // Type to truncate from
2574   SDValue Op0 = Op.getOperand(0);
2575   EVT Op0VT = Op0.getValueType();
2576
2577   if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2578     // Create shuffle mask, least significant doubleword of quadword
2579     unsigned maskHigh = 0x08090a0b;
2580     unsigned maskLow = 0x0c0d0e0f;
2581     // Use a shuffle to perform the truncation
2582     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2583                                    DAG.getConstant(maskHigh, MVT::i32),
2584                                    DAG.getConstant(maskLow, MVT::i32),
2585                                    DAG.getConstant(maskHigh, MVT::i32),
2586                                    DAG.getConstant(maskLow, MVT::i32));
2587
2588     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2589                                        Op0, Op0, shufMask);
2590
2591     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2592   }
2593
2594   return SDValue();             // Leave the truncate unmolested
2595 }
2596
2597 /*!
2598  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2599  * algorithm is to duplicate the sign bit using rotmai to generate at
2600  * least one byte full of sign bits. Then propagate the "sign-byte" into
2601  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2602  *
2603  * @param Op The sext operand
2604  * @param DAG The current DAG
2605  * @return The SDValue with the entire instruction sequence
2606  */
2607 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2608 {
2609   DebugLoc dl = Op.getDebugLoc();
2610
2611   // Type to extend to
2612   MVT OpVT = Op.getValueType().getSimpleVT();
2613
2614   // Type to extend from
2615   SDValue Op0 = Op.getOperand(0);
2616   MVT Op0VT = Op0.getValueType().getSimpleVT();
2617
2618   // The type to extend to needs to be a i128 and
2619   // the type to extend from needs to be i64 or i32.
2620   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2621           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2622
2623   // Create shuffle mask
2624   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2625   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2626   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2627   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2628                                  DAG.getConstant(mask1, MVT::i32),
2629                                  DAG.getConstant(mask1, MVT::i32),
2630                                  DAG.getConstant(mask2, MVT::i32),
2631                                  DAG.getConstant(mask3, MVT::i32));
2632
2633   // Word wise arithmetic right shift to generate at least one byte
2634   // that contains sign bits.
2635   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2636   SDValue sraVal = DAG.getNode(ISD::SRA,
2637                  dl,
2638                  mvt,
2639                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2640                  DAG.getConstant(31, MVT::i32));
2641
2642   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2643   // and the input value into the lower 64 bits.
2644   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2645       DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2646
2647   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2648 }
2649
2650 //! Custom (target-specific) lowering entry point
2651 /*!
2652   This is where LLVM's DAG selection process calls to do target-specific
2653   lowering of nodes.
2654  */
2655 SDValue
2656 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2657 {
2658   unsigned Opc = (unsigned) Op.getOpcode();
2659   EVT VT = Op.getValueType();
2660
2661   switch (Opc) {
2662   default: {
2663 #ifndef NDEBUG
2664     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2665     errs() << "Op.getOpcode() = " << Opc << "\n";
2666     errs() << "*Op.getNode():\n";
2667     Op.getNode()->dump();
2668 #endif
2669     llvm_unreachable(0);
2670   }
2671   case ISD::LOAD:
2672   case ISD::EXTLOAD:
2673   case ISD::SEXTLOAD:
2674   case ISD::ZEXTLOAD:
2675     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2676   case ISD::STORE:
2677     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2678   case ISD::ConstantPool:
2679     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2680   case ISD::GlobalAddress:
2681     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2682   case ISD::JumpTable:
2683     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2684   case ISD::ConstantFP:
2685     return LowerConstantFP(Op, DAG);
2686
2687   // i8, i64 math ops:
2688   case ISD::ADD:
2689   case ISD::SUB:
2690   case ISD::ROTR:
2691   case ISD::ROTL:
2692   case ISD::SRL:
2693   case ISD::SHL:
2694   case ISD::SRA: {
2695     if (VT == MVT::i8)
2696       return LowerI8Math(Op, DAG, Opc, *this);
2697     break;
2698   }
2699
2700   case ISD::FP_TO_SINT:
2701   case ISD::FP_TO_UINT:
2702     return LowerFP_TO_INT(Op, DAG, *this);
2703
2704   case ISD::SINT_TO_FP:
2705   case ISD::UINT_TO_FP:
2706     return LowerINT_TO_FP(Op, DAG, *this);
2707
2708   // Vector-related lowering.
2709   case ISD::BUILD_VECTOR:
2710     return LowerBUILD_VECTOR(Op, DAG);
2711   case ISD::SCALAR_TO_VECTOR:
2712     return LowerSCALAR_TO_VECTOR(Op, DAG);
2713   case ISD::VECTOR_SHUFFLE:
2714     return LowerVECTOR_SHUFFLE(Op, DAG);
2715   case ISD::EXTRACT_VECTOR_ELT:
2716     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2717   case ISD::INSERT_VECTOR_ELT:
2718     return LowerINSERT_VECTOR_ELT(Op, DAG);
2719
2720   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2721   case ISD::AND:
2722   case ISD::OR:
2723   case ISD::XOR:
2724     return LowerByteImmed(Op, DAG);
2725
2726   // Vector and i8 multiply:
2727   case ISD::MUL:
2728     if (VT == MVT::i8)
2729       return LowerI8Math(Op, DAG, Opc, *this);
2730
2731   case ISD::CTPOP:
2732     return LowerCTPOP(Op, DAG);
2733
2734   case ISD::SELECT_CC:
2735     return LowerSELECT_CC(Op, DAG, *this);
2736
2737   case ISD::SETCC:
2738     return LowerSETCC(Op, DAG, *this);
2739
2740   case ISD::TRUNCATE:
2741     return LowerTRUNCATE(Op, DAG);
2742
2743   case ISD::SIGN_EXTEND:
2744     return LowerSIGN_EXTEND(Op, DAG);
2745   }
2746
2747   return SDValue();
2748 }
2749
2750 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2751                                            SmallVectorImpl<SDValue>&Results,
2752                                            SelectionDAG &DAG)
2753 {
2754 #if 0
2755   unsigned Opc = (unsigned) N->getOpcode();
2756   EVT OpVT = N->getValueType(0);
2757
2758   switch (Opc) {
2759   default: {
2760     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2761     errs() << "Op.getOpcode() = " << Opc << "\n";
2762     errs() << "*Op.getNode():\n";
2763     N->dump();
2764     abort();
2765     /*NOTREACHED*/
2766   }
2767   }
2768 #endif
2769
2770   /* Otherwise, return unchanged */
2771 }
2772
2773 //===----------------------------------------------------------------------===//
2774 // Target Optimization Hooks
2775 //===----------------------------------------------------------------------===//
2776
2777 SDValue
2778 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2779 {
2780 #if 0
2781   TargetMachine &TM = getTargetMachine();
2782 #endif
2783   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2784   SelectionDAG &DAG = DCI.DAG;
2785   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2786   EVT NodeVT = N->getValueType(0);      // The node's value type
2787   EVT Op0VT = Op0.getValueType();       // The first operand's result
2788   SDValue Result;                       // Initially, empty result
2789   DebugLoc dl = N->getDebugLoc();
2790
2791   switch (N->getOpcode()) {
2792   default: break;
2793   case ISD::ADD: {
2794     SDValue Op1 = N->getOperand(1);
2795
2796     if (Op0.getOpcode() == SPUISD::IndirectAddr
2797         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2798       // Normalize the operands to reduce repeated code
2799       SDValue IndirectArg = Op0, AddArg = Op1;
2800
2801       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2802         IndirectArg = Op1;
2803         AddArg = Op0;
2804       }
2805
2806       if (isa<ConstantSDNode>(AddArg)) {
2807         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2808         SDValue IndOp1 = IndirectArg.getOperand(1);
2809
2810         if (CN0->isNullValue()) {
2811           // (add (SPUindirect <arg>, <arg>), 0) ->
2812           // (SPUindirect <arg>, <arg>)
2813
2814 #if !defined(NDEBUG)
2815           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2816             errs() << "\n"
2817                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2818                  << "With:    (SPUindirect <arg>, <arg>)\n";
2819           }
2820 #endif
2821
2822           return IndirectArg;
2823         } else if (isa<ConstantSDNode>(IndOp1)) {
2824           // (add (SPUindirect <arg>, <const>), <const>) ->
2825           // (SPUindirect <arg>, <const + const>)
2826           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2827           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2828           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2829
2830 #if !defined(NDEBUG)
2831           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832             errs() << "\n"
2833                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2834                  << "), " << CN0->getSExtValue() << ")\n"
2835                  << "With:    (SPUindirect <arg>, "
2836                  << combinedConst << ")\n";
2837           }
2838 #endif
2839
2840           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2841                              IndirectArg, combinedValue);
2842         }
2843       }
2844     }
2845     break;
2846   }
2847   case ISD::SIGN_EXTEND:
2848   case ISD::ZERO_EXTEND:
2849   case ISD::ANY_EXTEND: {
2850     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2851       // (any_extend (SPUextract_elt0 <arg>)) ->
2852       // (SPUextract_elt0 <arg>)
2853       // Types must match, however...
2854 #if !defined(NDEBUG)
2855       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2856         errs() << "\nReplace: ";
2857         N->dump(&DAG);
2858         errs() << "\nWith:    ";
2859         Op0.getNode()->dump(&DAG);
2860         errs() << "\n";
2861       }
2862 #endif
2863
2864       return Op0;
2865     }
2866     break;
2867   }
2868   case SPUISD::IndirectAddr: {
2869     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2870       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2871       if (CN != 0 && CN->getZExtValue() == 0) {
2872         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2873         // (SPUaform <addr>, 0)
2874
2875         DEBUG(errs() << "Replace: ");
2876         DEBUG(N->dump(&DAG));
2877         DEBUG(errs() << "\nWith:    ");
2878         DEBUG(Op0.getNode()->dump(&DAG));
2879         DEBUG(errs() << "\n");
2880
2881         return Op0;
2882       }
2883     } else if (Op0.getOpcode() == ISD::ADD) {
2884       SDValue Op1 = N->getOperand(1);
2885       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2886         // (SPUindirect (add <arg>, <arg>), 0) ->
2887         // (SPUindirect <arg>, <arg>)
2888         if (CN1->isNullValue()) {
2889
2890 #if !defined(NDEBUG)
2891           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2892             errs() << "\n"
2893                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2894                  << "With:    (SPUindirect <arg>, <arg>)\n";
2895           }
2896 #endif
2897
2898           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2899                              Op0.getOperand(0), Op0.getOperand(1));
2900         }
2901       }
2902     }
2903     break;
2904   }
2905   case SPUISD::SHLQUAD_L_BITS:
2906   case SPUISD::SHLQUAD_L_BYTES:
2907   case SPUISD::ROTBYTES_LEFT: {
2908     SDValue Op1 = N->getOperand(1);
2909
2910     // Kill degenerate vector shifts:
2911     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2912       if (CN->isNullValue()) {
2913         Result = Op0;
2914       }
2915     }
2916     break;
2917   }
2918   case SPUISD::PREFSLOT2VEC: {
2919     switch (Op0.getOpcode()) {
2920     default:
2921       break;
2922     case ISD::ANY_EXTEND:
2923     case ISD::ZERO_EXTEND:
2924     case ISD::SIGN_EXTEND: {
2925       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2926       // <arg>
2927       // but only if the SPUprefslot2vec and <arg> types match.
2928       SDValue Op00 = Op0.getOperand(0);
2929       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2930         SDValue Op000 = Op00.getOperand(0);
2931         if (Op000.getValueType() == NodeVT) {
2932           Result = Op000;
2933         }
2934       }
2935       break;
2936     }
2937     case SPUISD::VEC2PREFSLOT: {
2938       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2939       // <arg>
2940       Result = Op0.getOperand(0);
2941       break;
2942     }
2943     }
2944     break;
2945   }
2946   }
2947
2948   // Otherwise, return unchanged.
2949 #ifndef NDEBUG
2950   if (Result.getNode()) {
2951     DEBUG(errs() << "\nReplace.SPU: ");
2952     DEBUG(N->dump(&DAG));
2953     DEBUG(errs() << "\nWith:        ");
2954     DEBUG(Result.getNode()->dump(&DAG));
2955     DEBUG(errs() << "\n");
2956   }
2957 #endif
2958
2959   return Result;
2960 }
2961
2962 //===----------------------------------------------------------------------===//
2963 // Inline Assembly Support
2964 //===----------------------------------------------------------------------===//
2965
2966 /// getConstraintType - Given a constraint letter, return the type of
2967 /// constraint it is for this target.
2968 SPUTargetLowering::ConstraintType
2969 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2970   if (ConstraintLetter.size() == 1) {
2971     switch (ConstraintLetter[0]) {
2972     default: break;
2973     case 'b':
2974     case 'r':
2975     case 'f':
2976     case 'v':
2977     case 'y':
2978       return C_RegisterClass;
2979     }
2980   }
2981   return TargetLowering::getConstraintType(ConstraintLetter);
2982 }
2983
2984 std::pair<unsigned, const TargetRegisterClass*>
2985 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2986                                                 EVT VT) const
2987 {
2988   if (Constraint.size() == 1) {
2989     // GCC RS6000 Constraint Letters
2990     switch (Constraint[0]) {
2991     case 'b':   // R1-R31
2992     case 'r':   // R0-R31
2993       if (VT == MVT::i64)
2994         return std::make_pair(0U, SPU::R64CRegisterClass);
2995       return std::make_pair(0U, SPU::R32CRegisterClass);
2996     case 'f':
2997       if (VT == MVT::f32)
2998         return std::make_pair(0U, SPU::R32FPRegisterClass);
2999       else if (VT == MVT::f64)
3000         return std::make_pair(0U, SPU::R64FPRegisterClass);
3001       break;
3002     case 'v':
3003       return std::make_pair(0U, SPU::GPRCRegisterClass);
3004     }
3005   }
3006
3007   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3008 }
3009
3010 //! Compute used/known bits for a SPU operand
3011 void
3012 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3013                                                   const APInt &Mask,
3014                                                   APInt &KnownZero,
3015                                                   APInt &KnownOne,
3016                                                   const SelectionDAG &DAG,
3017                                                   unsigned Depth ) const {
3018 #if 0
3019   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3020
3021   switch (Op.getOpcode()) {
3022   default:
3023     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3024     break;
3025   case CALL:
3026   case SHUFB:
3027   case SHUFFLE_MASK:
3028   case CNTB:
3029   case SPUISD::PREFSLOT2VEC:
3030   case SPUISD::LDRESULT:
3031   case SPUISD::VEC2PREFSLOT:
3032   case SPUISD::SHLQUAD_L_BITS:
3033   case SPUISD::SHLQUAD_L_BYTES:
3034   case SPUISD::VEC_ROTL:
3035   case SPUISD::VEC_ROTR:
3036   case SPUISD::ROTBYTES_LEFT:
3037   case SPUISD::SELECT_MASK:
3038   case SPUISD::SELB:
3039   }
3040 #endif
3041 }
3042
3043 unsigned
3044 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3045                                                    unsigned Depth) const {
3046   switch (Op.getOpcode()) {
3047   default:
3048     return 1;
3049
3050   case ISD::SETCC: {
3051     EVT VT = Op.getValueType();
3052
3053     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3054       VT = MVT::i32;
3055     }
3056     return VT.getSizeInBits();
3057   }
3058   }
3059 }
3060
3061 // LowerAsmOperandForConstraint
3062 void
3063 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3064                                                 char ConstraintLetter,
3065                                                 bool hasMemory,
3066                                                 std::vector<SDValue> &Ops,
3067                                                 SelectionDAG &DAG) const {
3068   // Default, for the time being, to the base class handler
3069   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3070                                                Ops, DAG);
3071 }
3072
3073 /// isLegalAddressImmediate - Return true if the integer value can be used
3074 /// as the offset of the target addressing mode.
3075 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3076                                                 const Type *Ty) const {
3077   // SPU's addresses are 256K:
3078   return (V > -(1 << 18) && V < (1 << 18) - 1);
3079 }
3080
3081 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3082   return false;
3083 }
3084
3085 bool
3086 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3087   // The SPU target isn't yet aware of offsets.
3088   return false;
3089 }