lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Chris Lattner and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "PPCTargetMachine.h"
  16 #include "llvm/ADT/VectorExtras.h"
  17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  18 #include "llvm/CodeGen/MachineFrameInfo.h"
  19 #include "llvm/CodeGen/MachineFunction.h"
  20 #include "llvm/CodeGen/MachineInstrBuilder.h"
  21 #include "llvm/CodeGen/SelectionDAG.h"
  22 #include "llvm/CodeGen/SSARegMap.h"
  23 #include "llvm/Constants.h"
  24 #include "llvm/Function.h"
  25 #include "llvm/Intrinsics.h"
  26 #include "llvm/Support/MathExtras.h"
  27 #include "llvm/Target/TargetOptions.h"
  28 using namespace llvm;
  29
  30 PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
  31   : TargetLowering(TM) {
  32
  33   // Fold away setcc operations if possible.
  34   setSetCCIsExpensive();
  35   setPow2DivIsCheap();
  36
  37   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  38   setUseUnderscoreSetJmpLongJmp(true);
  39
  40   // Set up the register classes.
  41   addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
  42   addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
  43   addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
  44
  45   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
  46   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
  47
  48   // PowerPC has no intrinsics for these particular operations
  49   setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
  50   setOperationAction(ISD::MEMSET, MVT::Other, Expand);
  51   setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
  52
  53   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
  54   setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
  55   setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
  56
  57   // PowerPC has no SREM/UREM instructions
  58   setOperationAction(ISD::SREM, MVT::i32, Expand);
  59   setOperationAction(ISD::UREM, MVT::i32, Expand);
  60
  61   // We don't support sin/cos/sqrt/fmod
  62   setOperationAction(ISD::FSIN , MVT::f64, Expand);
  63   setOperationAction(ISD::FCOS , MVT::f64, Expand);
  64   setOperationAction(ISD::FREM , MVT::f64, Expand);
  65   setOperationAction(ISD::FSIN , MVT::f32, Expand);
  66   setOperationAction(ISD::FCOS , MVT::f32, Expand);
  67   setOperationAction(ISD::FREM , MVT::f32, Expand);
  68
  69   // If we're enabling GP optimizations, use hardware square root
  70   if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
  71     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
  72     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
  73   }
  74
  75   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
  76   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
  77
  78   // PowerPC does not have BSWAP, CTPOP or CTTZ
  79   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
  80   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
  81   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
  82
  83   // PowerPC does not have ROTR
  84   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
  85
  86   // PowerPC does not have Select
  87   setOperationAction(ISD::SELECT, MVT::i32, Expand);
  88   setOperationAction(ISD::SELECT, MVT::f32, Expand);
  89   setOperationAction(ISD::SELECT, MVT::f64, Expand);
  90
  91   // PowerPC wants to turn select_cc of FP into fsel when possible.
  92   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  93   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
  94
  95   // PowerPC wants to optimize integer setcc a bit
  96   setOperationAction(ISD::SETCC, MVT::i32, Custom);
  97
  98   // PowerPC does not have BRCOND which requires SetCC
  99   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 100
 101   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 102   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 103
 104   // PowerPC does not have [U|S]INT_TO_FP
 105   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 106   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 107
 108   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
 109   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
 110
 111   // PowerPC does not have truncstore for i1.
 112   setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
 113
 114   // Support label based line numbers.
 115   setOperationAction(ISD::LOCATION, MVT::Other, Expand);
 116   setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 117   // FIXME - use subtarget debug flags
 118   if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
 119     setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
 120
 121   // We want to legalize GlobalAddress and ConstantPool nodes into the
 122   // appropriate instructions to materialize the address.
 123   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 124   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 125
 126   // RET must be custom lowered, to meet ABI requirements
 127   setOperationAction(ISD::RET               , MVT::Other, Custom);
 128
 129   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 130   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 131
 132   // Use the default implementation.
 133   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 134   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 135   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 136   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 137   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 138   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 139
 140   // We want to custom lower some of our intrinsics.
 141   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 142
 143   if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
 144     // They also have instructions for converting between i64 and fp.
 145     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 146     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 147
 148     // FIXME: disable this lowered code.  This generates 64-bit register values,
 149     // and we don't model the fact that the top part is clobbered by calls.  We
 150     // need to flag these together so that the value isn't live across a call.
 151     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 152
 153     // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 154     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 155   } else {
 156     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 157     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 158   }
 159
 160   if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
 161     // 64 bit PowerPC implementations can support i64 types directly
 162     addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
 163     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 164     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 165   } else {
 166     // 32 bit PowerPC wants to expand i64 shifts itself.
 167     setOperationAction(ISD::SHL, MVT::i64, Custom);
 168     setOperationAction(ISD::SRL, MVT::i64, Custom);
 169     setOperationAction(ISD::SRA, MVT::i64, Custom);
 170   }
 171
 172   if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
 173     // First set operation action for all vector types to expand. Then we
 174     // will selectively turn on ones that can be effectively codegen'd.
 175     for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 176          VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
 177       // add/sub/and/or/xor are legal for all supported vector VT's.
 178       setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
 179       setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
 180       setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
 181       setOperationAction(ISD::OR  , (MVT::ValueType)VT, Legal);
 182       setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
 183
 184       // We can custom expand all VECTOR_SHUFFLEs to VPERM.
 185       setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
 186
 187       setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
 188       setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
 189       setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
 190       setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
 191       setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
 192       setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
 193       setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
 194       setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
 195     }
 196
 197     addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
 198     addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
 199     addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
 200     addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
 201
 202     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 203
 204     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 205     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 206
 207     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 208     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 209     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 210     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 211   }
 212
 213   setSetCCResultContents(ZeroOrOneSetCCResult);
 214   setStackPointerRegisterToSaveRestore(PPC::R1);
 215
 216   // We have target-specific dag combine patterns for the following nodes:
 217   setTargetDAGCombine(ISD::SINT_TO_FP);
 218   setTargetDAGCombine(ISD::STORE);
 219
 220   computeRegisterProperties();
 221 }
 222
 223 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
 224   switch (Opcode) {
 225   default: return 0;
 226   case PPCISD::FSEL:          return "PPCISD::FSEL";
 227   case PPCISD::FCFID:         return "PPCISD::FCFID";
 228   case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
 229   case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
 230   case PPCISD::STFIWX:        return "PPCISD::STFIWX";
 231   case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
 232   case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
 233   case PPCISD::VPERM:         return "PPCISD::VPERM";
 234   case PPCISD::Hi:            return "PPCISD::Hi";
 235   case PPCISD::Lo:            return "PPCISD::Lo";
 236   case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
 237   case PPCISD::SRL:           return "PPCISD::SRL";
 238   case PPCISD::SRA:           return "PPCISD::SRA";
 239   case PPCISD::SHL:           return "PPCISD::SHL";
 240   case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
 241   case PPCISD::STD_32:        return "PPCISD::STD_32";
 242   case PPCISD::CALL:          return "PPCISD::CALL";
 243   case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
 244   case PPCISD::MFCR:          return "PPCISD::MFCR";
 245   case PPCISD::VCMP:          return "PPCISD::VCMP";
 246   case PPCISD::VCMPo:         return "PPCISD::VCMPo";
 247   }
 248 }
 249
 250 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
 251 static bool isFloatingPointZero(SDOperand Op) {
 252   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
 253     return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
 254   else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
 255     // Maybe this has already been legalized into the constant pool?
 256     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
 257       if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
 258         return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
 259   }
 260   return false;
 261 }
 262
 263
 264 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
 265 /// specifies a splat of a single element that is suitable for input to
 266 /// VSPLTB/VSPLTH/VSPLTW.
 267 bool PPC::isSplatShuffleMask(SDNode *N) {
 268   assert(N->getOpcode() == ISD::BUILD_VECTOR);
 269
 270   // We can only splat 8-bit, 16-bit, and 32-bit quantities.
 271   if (N->getNumOperands() != 4 && N->getNumOperands() != 8 &&
 272       N->getNumOperands() != 16)
 273     return false;
 274
 275   // This is a splat operation if each element of the permute is the same, and
 276   // if the value doesn't reference the second vector.
 277   SDOperand Elt = N->getOperand(0);
 278   assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
 279   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
 280     assert(isa<ConstantSDNode>(N->getOperand(i)) &&
 281            "Invalid VECTOR_SHUFFLE mask!");
 282     if (N->getOperand(i) != Elt) return false;
 283   }
 284
 285   // Make sure it is a splat of the first vector operand.
 286   return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
 287 }
 288
 289 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 290 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 291 unsigned PPC::getVSPLTImmediate(SDNode *N) {
 292   assert(isSplatShuffleMask(N));
 293   return cast<ConstantSDNode>(N->getOperand(0))->getValue();
 294 }
 295
 296 /// isVecSplatImm - Return true if this is a build_vector of constants which
 297 /// can be formed by using a vspltis[bhw] instruction.  The ByteSize field
 298 /// indicates the number of bytes of each element [124] -> [bhw].
 299 bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) {
 300   SDOperand OpVal(0, 0);
 301   // Check to see if this buildvec has a single non-undef value in its elements.
 302   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
 303     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
 304     if (OpVal.Val == 0)
 305       OpVal = N->getOperand(i);
 306     else if (OpVal != N->getOperand(i))
 307       return false;
 308   }
 309
 310   if (OpVal.Val == 0) return false;  // All UNDEF: use implicit def.
 311
 312   unsigned ValSizeInBytes = 0;
 313   uint64_t Value = 0;
 314   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
 315     Value = CN->getValue();
 316     ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
 317   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
 318     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
 319     Value = FloatToBits(CN->getValue());
 320     ValSizeInBytes = 4;
 321   }
 322
 323   // If the splat value is larger than the element value, then we can never do
 324   // this splat.  The only case that we could fit the replicated bits into our
 325   // immediate field for would be zero, and we prefer to use vxor for it.
 326   if (ValSizeInBytes < ByteSize) return false;
 327
 328   // If the element value is larger than the splat value, cut it in half and
 329   // check to see if the two halves are equal.  Continue doing this until we
 330   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
 331   while (ValSizeInBytes > ByteSize) {
 332     ValSizeInBytes >>= 1;
 333
 334     // If the top half equals the bottom half, we're still ok.
 335     if (((Value >> (ValSizeInBytes*8)) & ((8 << ValSizeInBytes)-1)) !=
 336          (Value                        & ((8 << ValSizeInBytes)-1)))
 337       return false;
 338   }
 339
 340   // Properly sign extend the value.
 341   int ShAmt = (4-ByteSize)*8;
 342   int MaskVal = ((int)Value << ShAmt) >> ShAmt;
 343
 344   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
 345   if (MaskVal == 0) return false;
 346
 347   if (Val) *Val = MaskVal;
 348
 349   // Finally, if this value fits in a 5 bit sext field, return true.
 350   return ((MaskVal << (32-5)) >> (32-5)) == MaskVal;
 351 }
 352
 353
 354 /// LowerOperation - Provide custom lowering hooks for some operations.
 355 ///
 356 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
 357   switch (Op.getOpcode()) {
 358   default: assert(0 && "Wasn't expecting to be able to lower this!");
 359   case ISD::FP_TO_SINT: {
 360     assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
 361     SDOperand Src = Op.getOperand(0);
 362     if (Src.getValueType() == MVT::f32)
 363       Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
 364
 365     SDOperand Tmp;
 366     switch (Op.getValueType()) {
 367     default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
 368     case MVT::i32:
 369       Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
 370       break;
 371     case MVT::i64:
 372       Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
 373       break;
 374     }
 375
 376     // Convert the FP value to an int value through memory.
 377     SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
 378     if (Op.getValueType() == MVT::i32)
 379       Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
 380     return Bits;
 381   }
 382   case ISD::SINT_TO_FP:
 383     if (Op.getOperand(0).getValueType() == MVT::i64) {
 384       SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
 385       SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
 386       if (Op.getValueType() == MVT::f32)
 387         FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
 388       return FP;
 389     } else {
 390       assert(Op.getOperand(0).getValueType() == MVT::i32 &&
 391              "Unhandled SINT_TO_FP type in custom expander!");
 392       // Since we only generate this in 64-bit mode, we can take advantage of
 393       // 64-bit registers.  In particular, sign extend the input value into the
 394       // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
 395       // then lfd it and fcfid it.
 396       MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
 397       int FrameIdx = FrameInfo->CreateStackObject(8, 8);
 398       SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
 399
 400       SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
 401                                     Op.getOperand(0));
 402
 403       // STD the extended value into the stack slot.
 404       SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
 405                                     DAG.getEntryNode(), Ext64, FIdx,
 406                                     DAG.getSrcValue(NULL));
 407       // Load the value as a double.
 408       SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
 409
 410       // FCFID it and return it.
 411       SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
 412       if (Op.getValueType() == MVT::f32)
 413         FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
 414       return FP;
 415     }
 416     break;
 417
 418   case ISD::SELECT_CC: {
 419     // Turn FP only select_cc's into fsel instructions.
 420     if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
 421         !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
 422       break;
 423
 424     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 425
 426     // Cannot handle SETEQ/SETNE.
 427     if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
 428
 429     MVT::ValueType ResVT = Op.getValueType();
 430     MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
 431     SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
 432     SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
 433
 434     // If the RHS of the comparison is a 0.0, we don't need to do the
 435     // subtraction at all.
 436     if (isFloatingPointZero(RHS))
 437       switch (CC) {
 438       default: break;       // SETUO etc aren't handled by fsel.
 439       case ISD::SETULT:
 440       case ISD::SETLT:
 441         std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
 442       case ISD::SETUGE:
 443       case ISD::SETGE:
 444         if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
 445           LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
 446         return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
 447       case ISD::SETUGT:
 448       case ISD::SETGT:
 449         std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
 450       case ISD::SETULE:
 451       case ISD::SETLE:
 452         if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
 453           LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
 454         return DAG.getNode(PPCISD::FSEL, ResVT,
 455                            DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
 456       }
 457
 458     SDOperand Cmp;
 459     switch (CC) {
 460     default: break;       // SETUO etc aren't handled by fsel.
 461     case ISD::SETULT:
 462     case ISD::SETLT:
 463       Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
 464       if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
 465         Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
 466       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
 467     case ISD::SETUGE:
 468     case ISD::SETGE:
 469       Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
 470       if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
 471         Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
 472       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
 473     case ISD::SETUGT:
 474     case ISD::SETGT:
 475       Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
 476       if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
 477         Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
 478       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
 479     case ISD::SETULE:
 480     case ISD::SETLE:
 481       Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
 482       if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
 483         Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
 484       return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
 485     }
 486     break;
 487   }
 488   case ISD::SHL: {
 489     assert(Op.getValueType() == MVT::i64 &&
 490            Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
 491     // The generic code does a fine job expanding shift by a constant.
 492     if (isa<ConstantSDNode>(Op.getOperand(1))) break;
 493
 494     // Otherwise, expand into a bunch of logical ops.  Note that these ops
 495     // depend on the PPC behavior for oversized shift amounts.
 496     SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 497                                DAG.getConstant(0, MVT::i32));
 498     SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 499                                DAG.getConstant(1, MVT::i32));
 500     SDOperand Amt = Op.getOperand(1);
 501
 502     SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
 503                                  DAG.getConstant(32, MVT::i32), Amt);
 504     SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
 505     SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
 506     SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
 507     SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
 508                                  DAG.getConstant(-32U, MVT::i32));
 509     SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
 510     SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
 511     SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
 512     return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
 513   }
 514   case ISD::SRL: {
 515     assert(Op.getValueType() == MVT::i64 &&
 516            Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
 517     // The generic code does a fine job expanding shift by a constant.
 518     if (isa<ConstantSDNode>(Op.getOperand(1))) break;
 519
 520     // Otherwise, expand into a bunch of logical ops.  Note that these ops
 521     // depend on the PPC behavior for oversized shift amounts.
 522     SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 523                                DAG.getConstant(0, MVT::i32));
 524     SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 525                                DAG.getConstant(1, MVT::i32));
 526     SDOperand Amt = Op.getOperand(1);
 527
 528     SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
 529                                  DAG.getConstant(32, MVT::i32), Amt);
 530     SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
 531     SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
 532     SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
 533     SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
 534                                  DAG.getConstant(-32U, MVT::i32));
 535     SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
 536     SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
 537     SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
 538     return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
 539   }
 540   case ISD::SRA: {
 541     assert(Op.getValueType() == MVT::i64 &&
 542            Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
 543     // The generic code does a fine job expanding shift by a constant.
 544     if (isa<ConstantSDNode>(Op.getOperand(1))) break;
 545
 546     // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
 547     SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 548                                DAG.getConstant(0, MVT::i32));
 549     SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
 550                                DAG.getConstant(1, MVT::i32));
 551     SDOperand Amt = Op.getOperand(1);
 552
 553     SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
 554                                  DAG.getConstant(32, MVT::i32), Amt);
 555     SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
 556     SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
 557     SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
 558     SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
 559                                  DAG.getConstant(-32U, MVT::i32));
 560     SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
 561     SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
 562     SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
 563                                       Tmp4, Tmp6, ISD::SETLE);
 564     return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
 565   }
 566   case ISD::ConstantPool: {
 567     ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 568     Constant *C = CP->get();
 569     SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
 570     SDOperand Zero = DAG.getConstant(0, MVT::i32);
 571
 572     if (getTargetMachine().getRelocationModel() == Reloc::Static) {
 573       // Generate non-pic code that has direct accesses to the constant pool.
 574       // The address of the global is just (hi(&g)+lo(&g)).
 575       SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
 576       SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
 577       return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
 578     }
 579
 580     // Only lower ConstantPool on Darwin.
 581     if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
 582     SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
 583     if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
 584       // With PIC, the first instruction is actually "GR+hi(&G)".
 585       Hi = DAG.getNode(ISD::ADD, MVT::i32,
 586                        DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
 587     }
 588
 589     SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
 590     Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
 591     return Lo;
 592   }
 593   case ISD::GlobalAddress: {
 594     GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 595     GlobalValue *GV = GSDN->getGlobal();
 596     SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
 597     SDOperand Zero = DAG.getConstant(0, MVT::i32);
 598
 599     if (getTargetMachine().getRelocationModel() == Reloc::Static) {
 600       // Generate non-pic code that has direct accesses to globals.
 601       // The address of the global is just (hi(&g)+lo(&g)).
 602       SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
 603       SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
 604       return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
 605     }
 606
 607     // Only lower GlobalAddress on Darwin.
 608     if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
 609
 610     SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
 611     if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
 612       // With PIC, the first instruction is actually "GR+hi(&G)".
 613       Hi = DAG.getNode(ISD::ADD, MVT::i32,
 614                        DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
 615     }
 616
 617     SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
 618     Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
 619
 620     if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
 621         (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
 622       return Lo;
 623
 624     // If the global is weak or external, we have to go through the lazy
 625     // resolution stub.
 626     return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
 627   }
 628   case ISD::SETCC: {
 629     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 630
 631     // If we're comparing for equality to zero, expose the fact that this is
 632     // implented as a ctlz/srl pair on ppc, so that the dag combiner can
 633     // fold the new nodes.
 634     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
 635       if (C->isNullValue() && CC == ISD::SETEQ) {
 636         MVT::ValueType VT = Op.getOperand(0).getValueType();
 637         SDOperand Zext = Op.getOperand(0);
 638         if (VT < MVT::i32) {
 639           VT = MVT::i32;
 640           Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
 641         }
 642         unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
 643         SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
 644         SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
 645                                     DAG.getConstant(Log2b, getShiftAmountTy()));
 646         return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
 647       }
 648       // Leave comparisons against 0 and -1 alone for now, since they're usually
 649       // optimized.  FIXME: revisit this when we can custom lower all setcc
 650       // optimizations.
 651       if (C->isAllOnesValue() || C->isNullValue())
 652         break;
 653     }
 654
 655     // If we have an integer seteq/setne, turn it into a compare against zero
 656     // by subtracting the rhs from the lhs, which is faster than setting a
 657     // condition register, reading it back out, and masking the correct bit.
 658     MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
 659     if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 660       MVT::ValueType VT = Op.getValueType();
 661       SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
 662                                   Op.getOperand(1));
 663       return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
 664     }
 665     break;
 666   }
 667   case ISD::VASTART: {
 668     // vastart just stores the address of the VarArgsFrameIndex slot into the
 669     // memory location argument.
 670     // FIXME: Replace MVT::i32 with PointerTy
 671     SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
 672     return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
 673                        Op.getOperand(1), Op.getOperand(2));
 674   }
 675   case ISD::RET: {
 676     SDOperand Copy;
 677
 678     switch(Op.getNumOperands()) {
 679     default:
 680       assert(0 && "Do not know how to return this many arguments!");
 681       abort();
 682     case 1:
 683       return SDOperand(); // ret void is legal
 684     case 2: {
 685       MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
 686       unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
 687       Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
 688                               SDOperand());
 689       break;
 690     }
 691     case 3:
 692       Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
 693                               SDOperand());
 694       Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
 695       break;
 696     }
 697     return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
 698   }
 699   case ISD::SCALAR_TO_VECTOR: {
 700     // Create a stack slot that is 16-byte aligned.
 701     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
 702     int FrameIdx = FrameInfo->CreateStackObject(16, 16);
 703     SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
 704
 705     // Store the input value into Value#0 of the stack slot.
 706     SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
 707                                   Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
 708     // Load it out.
 709     return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
 710   }
 711   case ISD::BUILD_VECTOR:
 712     // If this is a case we can't handle, return null and let the default
 713     // expansion code take care of it.  If we CAN select this case, return Op.
 714
 715     // See if this is all zeros.
 716     // FIXME: We should handle splat(-0.0), and other cases here.
 717     if (ISD::isBuildVectorAllZeros(Op.Val))
 718       return Op;
 719
 720     if (PPC::isVecSplatImm(Op.Val, 1) ||    // vspltisb
 721         PPC::isVecSplatImm(Op.Val, 2) ||    // vspltish
 722         PPC::isVecSplatImm(Op.Val, 4))      // vspltisw
 723       return Op;
 724
 725     return SDOperand();
 726
 727   case ISD::VECTOR_SHUFFLE: {
 728     SDOperand V1 = Op.getOperand(0);
 729     SDOperand V2 = Op.getOperand(1);
 730     SDOperand PermMask = Op.getOperand(2);
 731
 732     // Cases that are handled by instructions that take permute immediates
 733     // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
 734     // selected by the instruction selector.
 735     if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF)
 736       break;
 737
 738     // TODO: Handle more cases, and also handle cases that are cheaper to do as
 739     // multiple such instructions than as a constant pool load/vperm pair.
 740
 741     // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
 742     // vector that will get spilled to the constant pool.
 743     if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
 744
 745     // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
 746     // that it is in input element units, not in bytes.  Convert now.
 747     MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
 748     unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
 749
 750     std::vector<SDOperand> ResultMask;
 751     for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
 752       unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
 753
 754       for (unsigned j = 0; j != BytesPerElement; ++j)
 755         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
 756                                              MVT::i8));
 757     }
 758
 759     SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
 760     return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
 761   }
 762   case ISD::INTRINSIC_WO_CHAIN: {
 763     unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
 764
 765     // If this is a lowered altivec predicate compare, CompareOpc is set to the
 766     // opcode number of the comparison.
 767     int CompareOpc = -1;
 768     bool isDot = false;
 769     switch (IntNo) {
 770     default: return SDOperand();    // Don't custom lower most intrinsics.
 771     // Comparison predicates.
 772     case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
 773     case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
 774     case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
 775     case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
 776     case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
 777     case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
 778     case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
 779     case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
 780     case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
 781     case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
 782     case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
 783     case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
 784     case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
 785
 786     // Normal Comparisons.
 787     case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
 788     case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
 789     case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
 790     case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
 791     case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
 792     case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
 793     case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
 794     case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
 795     case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
 796     case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
 797     case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
 798     case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
 799     case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
 800     }
 801
 802     assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
 803
 804     // If this is a non-dot comparison, make the VCMP node.
 805     if (!isDot)
 806       return DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
 807                          Op.getOperand(1), Op.getOperand(2),
 808                          DAG.getConstant(CompareOpc, MVT::i32));
 809
 810     // Create the PPCISD altivec 'dot' comparison node.
 811     std::vector<SDOperand> Ops;
 812     std::vector<MVT::ValueType> VTs;
 813     Ops.push_back(Op.getOperand(2));  // LHS
 814     Ops.push_back(Op.getOperand(3));  // RHS
 815     Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
 816     VTs.push_back(Op.getOperand(2).getValueType());
 817     VTs.push_back(MVT::Flag);
 818     SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
 819
 820     // Now that we have the comparison, emit a copy from the CR to a GPR.
 821     // This is flagged to the above dot comparison.
 822     SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
 823                                   DAG.getRegister(PPC::CR6, MVT::i32),
 824                                   CompNode.getValue(1));
 825
 826     // Unpack the result based on how the target uses it.
 827     unsigned BitNo;   // Bit # of CR6.
 828     bool InvertBit;   // Invert result?
 829     switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
 830     default:  // Can't happen, don't crash on invalid number though.
 831     case 0:   // Return the value of the EQ bit of CR6.
 832       BitNo = 0; InvertBit = false;
 833       break;
 834     case 1:   // Return the inverted value of the EQ bit of CR6.
 835       BitNo = 0; InvertBit = true;
 836       break;
 837     case 2:   // Return the value of the LT bit of CR6.
 838       BitNo = 2; InvertBit = false;
 839       break;
 840     case 3:   // Return the inverted value of the LT bit of CR6.
 841       BitNo = 2; InvertBit = true;
 842       break;
 843     }
 844
 845     // Shift the bit into the low position.
 846     Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
 847                         DAG.getConstant(8-(3-BitNo), MVT::i32));
 848     // Isolate the bit.
 849     Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
 850                         DAG.getConstant(1, MVT::i32));
 851
 852     // If we are supposed to, toggle the bit.
 853     if (InvertBit)
 854       Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
 855                           DAG.getConstant(1, MVT::i32));
 856     return Flags;
 857   }
 858   }
 859   return SDOperand();
 860 }
 861
 862 std::vector<SDOperand>
 863 PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
 864   //
 865   // add beautiful description of PPC stack frame format, or at least some docs
 866   //
 867   MachineFunction &MF = DAG.getMachineFunction();
 868   MachineFrameInfo *MFI = MF.getFrameInfo();
 869   MachineBasicBlock& BB = MF.front();
 870   SSARegMap *RegMap = MF.getSSARegMap();
 871   std::vector<SDOperand> ArgValues;
 872
 873   unsigned ArgOffset = 24;
 874   unsigned GPR_remaining = 8;
 875   unsigned FPR_remaining = 13;
 876   unsigned GPR_idx = 0, FPR_idx = 0;
 877   static const unsigned GPR[] = {
 878     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
 879     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
 880   };
 881   static const unsigned FPR[] = {
 882     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
 883     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
 884   };
 885
 886   // Add DAG nodes to load the arguments...  On entry to a function on PPC,
 887   // the arguments start at offset 24, although they are likely to be passed
 888   // in registers.
 889   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
 890     SDOperand newroot, argt;
 891     unsigned ObjSize;
 892     bool needsLoad = false;
 893     bool ArgLive = !I->use_empty();
 894     MVT::ValueType ObjectVT = getValueType(I->getType());
 895
 896     switch (ObjectVT) {
 897     default: assert(0 && "Unhandled argument type!");
 898     case MVT::i1:
 899     case MVT::i8:
 900     case MVT::i16:
 901     case MVT::i32:
 902       ObjSize = 4;
 903       if (!ArgLive) break;
 904       if (GPR_remaining > 0) {
 905         unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
 906         MF.addLiveIn(GPR[GPR_idx], VReg);
 907         argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
 908         if (ObjectVT != MVT::i32) {
 909           unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
 910                                                        : ISD::AssertZext;
 911           argt = DAG.getNode(AssertOp, MVT::i32, argt,
 912                              DAG.getValueType(ObjectVT));
 913           argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
 914         }
 915       } else {
 916         needsLoad = true;
 917       }
 918       break;
 919     case MVT::i64:
 920       ObjSize = 8;
 921       if (!ArgLive) break;
 922       if (GPR_remaining > 0) {
 923         SDOperand argHi, argLo;
 924         unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
 925         MF.addLiveIn(GPR[GPR_idx], VReg);
 926         argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
 927         // If we have two or more remaining argument registers, then both halves
 928         // of the i64 can be sourced from there.  Otherwise, the lower half will
 929         // have to come off the stack.  This can happen when an i64 is preceded
 930         // by 28 bytes of arguments.
 931         if (GPR_remaining > 1) {
 932           unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
 933           MF.addLiveIn(GPR[GPR_idx+1], VReg);
 934           argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
 935         } else {
 936           int FI = MFI->CreateFixedObject(4, ArgOffset+4);
 937           SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 938           argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
 939                               DAG.getSrcValue(NULL));
 940         }
 941         // Build the outgoing arg thingy
 942         argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
 943         newroot = argLo;
 944       } else {
 945         needsLoad = true;
 946       }
 947       break;
 948     case MVT::f32:
 949     case MVT::f64:
 950       ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
 951       if (!ArgLive) {
 952         if (FPR_remaining > 0) {
 953           --FPR_remaining;
 954           ++FPR_idx;
 955         }
 956         break;
 957       }
 958       if (FPR_remaining > 0) {
 959         unsigned VReg;
 960         if (ObjectVT == MVT::f32)
 961           VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
 962         else
 963           VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
 964         MF.addLiveIn(FPR[FPR_idx], VReg);
 965         argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
 966         --FPR_remaining;
 967         ++FPR_idx;
 968       } else {
 969         needsLoad = true;
 970       }
 971       break;
 972     }
 973
 974     // We need to load the argument to a virtual register if we determined above
 975     // that we ran out of physical registers of the appropriate type
 976     if (needsLoad) {
 977       unsigned SubregOffset = 0;
 978       if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
 979       if (ObjectVT == MVT::i16) SubregOffset = 2;
 980       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
 981       SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
 982       FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
 983                         DAG.getConstant(SubregOffset, MVT::i32));
 984       argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
 985                                    DAG.getSrcValue(NULL));
 986     }
 987
 988     // Every 4 bytes of argument space consumes one of the GPRs available for
 989     // argument passing.
 990     if (GPR_remaining > 0) {
 991       unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
 992       GPR_remaining -= delta;
 993       GPR_idx += delta;
 994     }
 995     ArgOffset += ObjSize;
 996     if (newroot.Val)
 997       DAG.setRoot(newroot.getValue(1));
 998
 999     ArgValues.push_back(argt);
1000   }
1001
1002   // If the function takes variable number of arguments, make a frame index for
1003   // the start of the first vararg value... for expansion of llvm.va_start.
1004   if (F.isVarArg()) {
1005     VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
1006     SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
1007     // If this function is vararg, store any remaining integer argument regs
1008     // to their spots on the stack so that they may be loaded by deferencing the
1009     // result of va_next.
1010     std::vector<SDOperand> MemOps;
1011     for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
1012       unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1013       MF.addLiveIn(GPR[GPR_idx], VReg);
1014       SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1015       SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1016                                     Val, FIN, DAG.getSrcValue(NULL));
1017       MemOps.push_back(Store);
1018       // Increment the address by four for the next argument to store
1019       SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
1020       FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
1021     }
1022     if (!MemOps.empty()) {
1023       MemOps.push_back(DAG.getRoot());
1024       DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
1025     }
1026   }
1027
1028   // Finally, inform the code generator which regs we return values in.
1029   switch (getValueType(F.getReturnType())) {
1030     default: assert(0 && "Unknown type!");
1031     case MVT::isVoid: break;
1032     case MVT::i1:
1033     case MVT::i8:
1034     case MVT::i16:
1035     case MVT::i32:
1036       MF.addLiveOut(PPC::R3);
1037       break;
1038     case MVT::i64:
1039       MF.addLiveOut(PPC::R3);
1040       MF.addLiveOut(PPC::R4);
1041       break;
1042     case MVT::f32:
1043     case MVT::f64:
1044       MF.addLiveOut(PPC::F1);
1045       break;
1046   }
1047
1048   return ArgValues;
1049 }
1050
1051 std::pair<SDOperand, SDOperand>
1052 PPCTargetLowering::LowerCallTo(SDOperand Chain,
1053                                const Type *RetTy, bool isVarArg,
1054                                unsigned CallingConv, bool isTailCall,
1055                                SDOperand Callee, ArgListTy &Args,
1056                                SelectionDAG &DAG) {
1057   // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
1058   // SelectExpr to use to put the arguments in the appropriate registers.
1059   std::vector<SDOperand> args_to_use;
1060
1061   // Count how many bytes are to be pushed on the stack, including the linkage
1062   // area, and parameter passing area.
1063   unsigned NumBytes = 24;
1064
1065   if (Args.empty()) {
1066     Chain = DAG.getCALLSEQ_START(Chain,
1067                                  DAG.getConstant(NumBytes, getPointerTy()));
1068   } else {
1069     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1070       switch (getValueType(Args[i].second)) {
1071       default: assert(0 && "Unknown value type!");
1072       case MVT::i1:
1073       case MVT::i8:
1074       case MVT::i16:
1075       case MVT::i32:
1076       case MVT::f32:
1077         NumBytes += 4;
1078         break;
1079       case MVT::i64:
1080       case MVT::f64:
1081         NumBytes += 8;
1082         break;
1083       }
1084     }
1085
1086     // Just to be safe, we'll always reserve the full 24 bytes of linkage area
1087     // plus 32 bytes of argument space in case any called code gets funky on us.
1088     // (Required by ABI to support var arg)
1089     if (NumBytes < 56) NumBytes = 56;
1090
1091     // Adjust the stack pointer for the new arguments...
1092     // These operations are automatically eliminated by the prolog/epilog pass
1093     Chain = DAG.getCALLSEQ_START(Chain,
1094                                  DAG.getConstant(NumBytes, getPointerTy()));
1095
1096     // Set up a copy of the stack pointer for use loading and storing any
1097     // arguments that may not fit in the registers available for argument
1098     // passing.
1099     SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
1100
1101     // Figure out which arguments are going to go in registers, and which in
1102     // memory.  Also, if this is a vararg function, floating point operations
1103     // must be stored to our stack, and loaded into integer regs as well, if
1104     // any integer regs are available for argument passing.
1105     unsigned ArgOffset = 24;
1106     unsigned GPR_remaining = 8;
1107     unsigned FPR_remaining = 13;
1108
1109     std::vector<SDOperand> MemOps;
1110     for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1111       // PtrOff will be used to store the current argument to the stack if a
1112       // register cannot be found for it.
1113       SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1114       PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1115       MVT::ValueType ArgVT = getValueType(Args[i].second);
1116
1117       switch (ArgVT) {
1118       default: assert(0 && "Unexpected ValueType for argument!");
1119       case MVT::i1:
1120       case MVT::i8:
1121       case MVT::i16:
1122         // Promote the integer to 32 bits.  If the input type is signed use a
1123         // sign extend, otherwise use a zero extend.
1124         if (Args[i].second->isSigned())
1125           Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
1126         else
1127           Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
1128         // FALL THROUGH
1129       case MVT::i32:
1130         if (GPR_remaining > 0) {
1131           args_to_use.push_back(Args[i].first);
1132           --GPR_remaining;
1133         } else {
1134           MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1135                                        Args[i].first, PtrOff,
1136                                        DAG.getSrcValue(NULL)));
1137         }
1138         ArgOffset += 4;
1139         break;
1140       case MVT::i64:
1141         // If we have one free GPR left, we can place the upper half of the i64
1142         // in it, and store the other half to the stack.  If we have two or more
1143         // free GPRs, then we can pass both halves of the i64 in registers.
1144         if (GPR_remaining > 0) {
1145           SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1146                                      Args[i].first, DAG.getConstant(1, MVT::i32));
1147           SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1148                                      Args[i].first, DAG.getConstant(0, MVT::i32));
1149           args_to_use.push_back(Hi);
1150           --GPR_remaining;
1151           if (GPR_remaining > 0) {
1152             args_to_use.push_back(Lo);
1153             --GPR_remaining;
1154           } else {
1155             SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1156             PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1157             MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1158                                          Lo, PtrOff, DAG.getSrcValue(NULL)));
1159           }
1160         } else {
1161           MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1162                                        Args[i].first, PtrOff,
1163                                        DAG.getSrcValue(NULL)));
1164         }
1165         ArgOffset += 8;
1166         break;
1167       case MVT::f32:
1168       case MVT::f64:
1169         if (FPR_remaining > 0) {
1170           args_to_use.push_back(Args[i].first);
1171           --FPR_remaining;
1172           if (isVarArg) {
1173             SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
1174                                           Args[i].first, PtrOff,
1175                                           DAG.getSrcValue(NULL));
1176             MemOps.push_back(Store);
1177             // Float varargs are always shadowed in available integer registers
1178             if (GPR_remaining > 0) {
1179               SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1180                                            DAG.getSrcValue(NULL));
1181               MemOps.push_back(Load.getValue(1));
1182               args_to_use.push_back(Load);
1183               --GPR_remaining;
1184             }
1185             if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1186               SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1187               PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1188               SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1189                                            DAG.getSrcValue(NULL));
1190               MemOps.push_back(Load.getValue(1));
1191               args_to_use.push_back(Load);
1192               --GPR_remaining;
1193             }
1194           } else {
1195             // If we have any FPRs remaining, we may also have GPRs remaining.
1196             // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
1197             // GPRs.
1198             if (GPR_remaining > 0) {
1199               args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1200               --GPR_remaining;
1201             }
1202             if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1203               args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1204               --GPR_remaining;
1205             }
1206           }
1207         } else {
1208           MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1209                                        Args[i].first, PtrOff,
1210                                        DAG.getSrcValue(NULL)));
1211         }
1212         ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
1213         break;
1214       }
1215     }
1216     if (!MemOps.empty())
1217       Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
1218   }
1219
1220   std::vector<MVT::ValueType> RetVals;
1221   MVT::ValueType RetTyVT = getValueType(RetTy);
1222   MVT::ValueType ActualRetTyVT = RetTyVT;
1223   if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
1224     ActualRetTyVT = MVT::i32;   // Promote result to i32.
1225
1226   if (RetTyVT == MVT::i64) {
1227     RetVals.push_back(MVT::i32);
1228     RetVals.push_back(MVT::i32);
1229   } else if (RetTyVT != MVT::isVoid) {
1230     RetVals.push_back(ActualRetTyVT);
1231   }
1232   RetVals.push_back(MVT::Other);
1233
1234   // If the callee is a GlobalAddress node (quite common, every direct call is)
1235   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1236   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1237     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
1238
1239   std::vector<SDOperand> Ops;
1240   Ops.push_back(Chain);
1241   Ops.push_back(Callee);
1242   Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
1243   SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
1244   Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
1245   Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1246                       DAG.getConstant(NumBytes, getPointerTy()));
1247   SDOperand RetVal = TheCall;
1248
1249   // If the result is a small value, add a note so that we keep track of the
1250   // information about whether it is sign or zero extended.
1251   if (RetTyVT != ActualRetTyVT) {
1252     RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
1253                          MVT::i32, RetVal, DAG.getValueType(RetTyVT));
1254     RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
1255   } else if (RetTyVT == MVT::i64) {
1256     RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
1257   }
1258
1259   return std::make_pair(RetVal, Chain);
1260 }
1261
1262 MachineBasicBlock *
1263 PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1264                                            MachineBasicBlock *BB) {
1265   assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
1266           MI->getOpcode() == PPC::SELECT_CC_F4 ||
1267           MI->getOpcode() == PPC::SELECT_CC_F8) &&
1268          "Unexpected instr type to insert");
1269
1270   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
1271   // control-flow pattern.  The incoming instruction knows the destination vreg
1272   // to set, the condition code register to branch on, the true/false values to
1273   // select between, and a branch opcode to use.
1274   const BasicBlock *LLVM_BB = BB->getBasicBlock();
1275   ilist<MachineBasicBlock>::iterator It = BB;
1276   ++It;
1277
1278   //  thisMBB:
1279   //  ...
1280   //   TrueVal = ...
1281   //   cmpTY ccX, r1, r2
1282   //   bCC copy1MBB
1283   //   fallthrough --> copy0MBB
1284   MachineBasicBlock *thisMBB = BB;
1285   MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1286   MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1287   BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
1288     .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
1289   MachineFunction *F = BB->getParent();
1290   F->getBasicBlockList().insert(It, copy0MBB);
1291   F->getBasicBlockList().insert(It, sinkMBB);
1292   // Update machine-CFG edges by first adding all successors of the current
1293   // block to the new block which will contain the Phi node for the select.
1294   for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1295       e = BB->succ_end(); i != e; ++i)
1296     sinkMBB->addSuccessor(*i);
1297   // Next, remove all successors of the current block, and add the true
1298   // and fallthrough blocks as its successors.
1299   while(!BB->succ_empty())
1300     BB->removeSuccessor(BB->succ_begin());
1301   BB->addSuccessor(copy0MBB);
1302   BB->addSuccessor(sinkMBB);
1303
1304   //  copy0MBB:
1305   //   %FalseValue = ...
1306   //   # fallthrough to sinkMBB
1307   BB = copy0MBB;
1308
1309   // Update machine-CFG edges
1310   BB->addSuccessor(sinkMBB);
1311
1312   //  sinkMBB:
1313   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1314   //  ...
1315   BB = sinkMBB;
1316   BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
1317     .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
1318     .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1319
1320   delete MI;   // The pseudo instruction is gone now.
1321   return BB;
1322 }
1323
1324 SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
1325                                                DAGCombinerInfo &DCI) const {
1326   TargetMachine &TM = getTargetMachine();
1327   SelectionDAG &DAG = DCI.DAG;
1328   switch (N->getOpcode()) {
1329   default: break;
1330   case ISD::SINT_TO_FP:
1331     if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
1332       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
1333         // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
1334         // We allow the src/dst to be either f32/f64, but the intermediate
1335         // type must be i64.
1336         if (N->getOperand(0).getValueType() == MVT::i64) {
1337           SDOperand Val = N->getOperand(0).getOperand(0);
1338           if (Val.getValueType() == MVT::f32) {
1339             Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1340             DCI.AddToWorklist(Val.Val);
1341           }
1342
1343           Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
1344           DCI.AddToWorklist(Val.Val);
1345           Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
1346           DCI.AddToWorklist(Val.Val);
1347           if (N->getValueType(0) == MVT::f32) {
1348             Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
1349             DCI.AddToWorklist(Val.Val);
1350           }
1351           return Val;
1352         } else if (N->getOperand(0).getValueType() == MVT::i32) {
1353           // If the intermediate type is i32, we can avoid the load/store here
1354           // too.
1355         }
1356       }
1357     }
1358     break;
1359   case ISD::STORE:
1360     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
1361     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
1362         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
1363         N->getOperand(1).getValueType() == MVT::i32) {
1364       SDOperand Val = N->getOperand(1).getOperand(0);
1365       if (Val.getValueType() == MVT::f32) {
1366         Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1367         DCI.AddToWorklist(Val.Val);
1368       }
1369       Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
1370       DCI.AddToWorklist(Val.Val);
1371
1372       Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
1373                         N->getOperand(2), N->getOperand(3));
1374       DCI.AddToWorklist(Val.Val);
1375       return Val;
1376     }
1377     break;
1378   case PPCISD::VCMP: {
1379     // If a VCMPo node already exists with exactly the same operands as this
1380     // node, use its result instead of this node (VCMPo computes both a CR6 and
1381     // a normal output).
1382     //
1383     if (!N->getOperand(0).hasOneUse() &&
1384         !N->getOperand(1).hasOneUse() &&
1385         !N->getOperand(2).hasOneUse()) {
1386
1387       // Scan all of the users of the LHS, looking for VCMPo's that match.
1388       SDNode *VCMPoNode = 0;
1389
1390       SDNode *LHSN = N->getOperand(0).Val;
1391       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
1392            UI != E; ++UI)
1393         if ((*UI)->getOpcode() == PPCISD::VCMPo &&
1394             (*UI)->getOperand(1) == N->getOperand(1) &&
1395             (*UI)->getOperand(2) == N->getOperand(2) &&
1396             (*UI)->getOperand(0) == N->getOperand(0)) {
1397           VCMPoNode = *UI;
1398           break;
1399         }
1400
1401       // If there are non-zero uses of the flag value, use the VCMPo node!
1402       if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1))
1403         return SDOperand(VCMPoNode, 0);
1404     }
1405     break;
1406   }
1407   }
1408
1409   return SDOperand();
1410 }
1411
1412 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
1413                                                        uint64_t Mask,
1414                                                        uint64_t &KnownZero,
1415                                                        uint64_t &KnownOne,
1416                                                        unsigned Depth) const {
1417   KnownZero = 0;
1418   KnownOne = 0;
1419   switch (Op.getOpcode()) {
1420   default: break;
1421   case ISD::INTRINSIC_WO_CHAIN: {
1422     switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
1423     default: break;
1424     case Intrinsic::ppc_altivec_vcmpbfp_p:
1425     case Intrinsic::ppc_altivec_vcmpeqfp_p:
1426     case Intrinsic::ppc_altivec_vcmpequb_p:
1427     case Intrinsic::ppc_altivec_vcmpequh_p:
1428     case Intrinsic::ppc_altivec_vcmpequw_p:
1429     case Intrinsic::ppc_altivec_vcmpgefp_p:
1430     case Intrinsic::ppc_altivec_vcmpgtfp_p:
1431     case Intrinsic::ppc_altivec_vcmpgtsb_p:
1432     case Intrinsic::ppc_altivec_vcmpgtsh_p:
1433     case Intrinsic::ppc_altivec_vcmpgtsw_p:
1434     case Intrinsic::ppc_altivec_vcmpgtub_p:
1435     case Intrinsic::ppc_altivec_vcmpgtuh_p:
1436     case Intrinsic::ppc_altivec_vcmpgtuw_p:
1437       KnownZero = ~1U;  // All bits but the low one are known to be zero.
1438       break;
1439     }
1440   }
1441   }
1442 }
1443
1444
1445 /// getConstraintType - Given a constraint letter, return the type of
1446 /// constraint it is for this target.
1447 PPCTargetLowering::ConstraintType
1448 PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
1449   switch (ConstraintLetter) {
1450   default: break;
1451   case 'b':
1452   case 'r':
1453   case 'f':
1454   case 'v':
1455   case 'y':
1456     return C_RegisterClass;
1457   }
1458   return TargetLowering::getConstraintType(ConstraintLetter);
1459 }
1460
1461
1462 std::vector<unsigned> PPCTargetLowering::
1463 getRegClassForInlineAsmConstraint(const std::string &Constraint,
1464                                   MVT::ValueType VT) const {
1465   if (Constraint.size() == 1) {
1466     switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
1467     default: break;  // Unknown constriant letter
1468     case 'b':
1469       return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
1470                                    PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1471                                    PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1472                                    PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1473                                    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1474                                    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1475                                    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1476                                    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1477                                    0);
1478     case 'r':
1479       return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
1480                                    PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1481                                    PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1482                                    PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1483                                    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1484                                    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1485                                    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1486                                    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1487                                    0);
1488     case 'f':
1489       return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
1490                                    PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
1491                                    PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
1492                                    PPC::F12, PPC::F13, PPC::F14, PPC::F15,
1493                                    PPC::F16, PPC::F17, PPC::F18, PPC::F19,
1494                                    PPC::F20, PPC::F21, PPC::F22, PPC::F23,
1495                                    PPC::F24, PPC::F25, PPC::F26, PPC::F27,
1496                                    PPC::F28, PPC::F29, PPC::F30, PPC::F31,
1497                                    0);
1498     case 'v':
1499       return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
1500                                    PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
1501                                    PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
1502                                    PPC::V12, PPC::V13, PPC::V14, PPC::V15,
1503                                    PPC::V16, PPC::V17, PPC::V18, PPC::V19,
1504                                    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
1505                                    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
1506                                    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
1507                                    0);
1508     case 'y':
1509       return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
1510                                    PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
1511                                    0);
1512     }
1513   }
1514
1515   return std::vector<unsigned>();
1516 }
1517
1518 // isOperandValidForConstraint
1519 bool PPCTargetLowering::
1520 isOperandValidForConstraint(SDOperand Op, char Letter) {
1521   switch (Letter) {
1522   default: break;
1523   case 'I':
1524   case 'J':
1525   case 'K':
1526   case 'L':
1527   case 'M':
1528   case 'N':
1529   case 'O':
1530   case 'P': {
1531     if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
1532     unsigned Value = cast<ConstantSDNode>(Op)->getValue();
1533     switch (Letter) {
1534     default: assert(0 && "Unknown constraint letter!");
1535     case 'I':  // "I" is a signed 16-bit constant.
1536       return (short)Value == (int)Value;
1537     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
1538     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
1539       return (short)Value == 0;
1540     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
1541       return (Value >> 16) == 0;
1542     case 'M':  // "M" is a constant that is greater than 31.
1543       return Value > 31;
1544     case 'N':  // "N" is a positive constant that is an exact power of two.
1545       return (int)Value > 0 && isPowerOf2_32(Value);
1546     case 'O':  // "O" is the constant zero.
1547       return Value == 0;
1548     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
1549       return (short)-Value == (int)-Value;
1550     }
1551     break;
1552   }
1553   }
1554
1555   // Handle standard constraint letters.
1556   return TargetLowering::isOperandValidForConstraint(Op, Letter);
1557 }
1558
1559 /// isLegalAddressImmediate - Return true if the integer value can be used
1560 /// as the offset of the target addressing mode.
1561 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
1562   // PPC allows a sign-extended 16-bit immediate field.
1563   return (V > -(1 << 16) && V < (1 << 16)-1);
1564 }