X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FTargetLowering.cpp;h=c6fd113ee75912945e65eea7f9e2438f24e83852;hb=4ee451de366474b9c228b4e5fa573795a715216d;hp=5943cc1ebd362e415f6bf0a0df27884584810a97;hpb=dfe8934258086ca901122d6ce48c64f297a10404;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5943cc1ebd3..c6fd113ee75 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/MRegisterInfo.h" @@ -20,6 +21,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/CallingConv.h" using namespace llvm; /// InitLibcallNames - Set default libcall names. @@ -45,42 +48,58 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::NEG_I64] = "__negdi2"; Names[RTLIB::ADD_F32] = "__addsf3"; Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; Names[RTLIB::SUB_F32] = "__subsf3"; Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; Names[RTLIB::MUL_F32] = "__mulsf3"; Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; Names[RTLIB::DIV_F32] = "__divsf3"; Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; Names[RTLIB::REM_F32] = "fmodf"; Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_PPCF128] = "fmodl"; Names[RTLIB::NEG_F32] = "__negsf2"; Names[RTLIB::NEG_F64] = "__negdf2"; Names[RTLIB::POWI_F32] = "__powisf2"; Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; Names[RTLIB::SQRT_F32] = "sqrtf"; Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; Names[RTLIB::SIN_F32] = "sinf"; Names[RTLIB::SIN_F64] = "sin"; Names[RTLIB::COS_F32] = "cosf"; Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; - Names[RTLIB::FPTOSINT_LD_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; - Names[RTLIB::FPTOUINT_LD_I32] = "__fixunsxfsi"; - Names[RTLIB::FPTOUINT_LD_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; - Names[RTLIB::SINTTOFP_I64_LD] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; @@ -136,13 +155,17 @@ TargetLowering::TargetLowering(TargetMachine &tm) memset(&IndexedModeActions, 0, sizeof(IndexedModeActions)); memset(&ConvertActions, 0, sizeof(ConvertActions)); - // Set all indexed load / store to expand. + // Set default actions for various operations. for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. for (unsigned IM = (unsigned)ISD::PRE_INC; IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand); setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand); } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::ValueType)VT, Expand); } IsLittleEndian = TD->isLittleEndian(); @@ -169,10 +192,67 @@ TargetLowering::TargetLowering(TargetMachine &tm) InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); + + // Tell Legalize whether the assembler supports DEBUG_LOC. + if (!TM.getTargetAsmInfo()->hasDotLocAndDotFile()) + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); } TargetLowering::~TargetLowering() {} + +SDOperand TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { + assert(getSubtarget() && "Subtarget not defined"); + SDOperand ChainOp = Op.getOperand(0); + SDOperand DestOp = Op.getOperand(1); + SDOperand SourceOp = Op.getOperand(2); + SDOperand CountOp = Op.getOperand(3); + SDOperand AlignOp = Op.getOperand(4); + SDOperand AlwaysInlineOp = Op.getOperand(5); + + bool AlwaysInline = (bool)cast(AlwaysInlineOp)->getValue(); + unsigned Align = (unsigned)cast(AlignOp)->getValue(); + if (Align == 0) Align = 1; + + // If size is unknown, call memcpy. + ConstantSDNode *I = dyn_cast(CountOp); + if (!I) { + assert(!AlwaysInline && "Cannot inline copy of unknown size"); + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); + } + + // If not DWORD aligned or if size is more than threshold, then call memcpy. + // The libc version is likely to be faster for the following cases. It can + // use the address value and run time information about the CPU. + // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster + unsigned Size = I->getValue(); + if (AlwaysInline || + (Size <= getSubtarget()->getMaxInlineSizeThreshold() && + (Align & 3) == 0)) + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); +} + + +SDOperand TargetLowering::LowerMEMCPYCall(SDOperand Chain, + SDOperand Dest, + SDOperand Source, + SDOperand Count, + SelectionDAG &DAG) { + MVT::ValueType IntPtr = getPointerTy(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getTargetData()->getIntPtrType(); + Entry.Node = Dest; Args.push_back(Entry); + Entry.Node = Source; Args.push_back(Entry); + Entry.Node = Count; Args.push_back(Entry); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, + DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); + return CallResult.second; +} + + /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -215,6 +295,14 @@ void TargetLowering::computeRegisterProperties() { } } + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); + } + // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { @@ -280,6 +368,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT, unsigned NumVectorRegs = 1; + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && @@ -309,6 +404,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT::ValueType VT, return 1; } +SDOperand TargetLowering::getPICJumpTableRelocBase(SDOperand Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); + return Table; +} + //===----------------------------------------------------------------------===// // Optimization Methods //===----------------------------------------------------------------------===// @@ -842,6 +944,32 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, KnownZero |= ~InMask & DemandedMask; break; } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = MVT::getIntVTBitMask(Op.getValueType()) ^ 1; + break; + case ISD::BIT_CONVERT: +#if 0 + // If this is an FP->Int bitcast and if the sign bit is the only thing that + // is demanded, turn this into a FGETSIGN. + if (DemandedMask == MVT::getIntVTSignBit(Op.getValueType()) && + MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && + !MVT::isVector(Op.getOperand(0).getValueType())) { + // Only do this xform if FGETSIGN is valid or if before legalize. + if (!TLO.AfterLegalize || + isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDOperand Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + Op.getOperand(0)); + unsigned ShVal = MVT::getSizeInBits(Op.getValueType())-1; + SDOperand ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), + Sign, ShAmt)); + } + } +#endif + break; case ISD::ADD: case ISD::SUB: case ISD::INTRINSIC_WO_CHAIN: @@ -1152,6 +1280,28 @@ TargetLowering::SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1, // Constant fold or commute setcc. SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond); if (O.Val) return O; + } else if (ConstantFPSDNode *CFP = dyn_cast(N1.Val)) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: assert(0 && "Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, VT); + case 1: // Known true. + return DAG.getConstant(1, VT); + case 2: // undefind. + return DAG.getNode(ISD::UNDEF, VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(VT, N0, N0, Cond); } if (N0 == N1) { @@ -1367,10 +1517,15 @@ void TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, SelectionDAG &DAG) { switch (ConstraintLetter) { default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer - case 's': // Relocatable Constant - case 'X': { // Allows any operand. + case 's': { // Relocatable Constant // These operands are interested in values of the form (GV+C), where C may // be folded in as an offset of GV, or it may be explicitly added. Also, it // is possible and fine if either GV or C are missing. @@ -1681,15 +1836,21 @@ SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Check to see if we can do this. if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) return SDOperand(); // BuildSDIV only operates on i32 or i64 - if (!isOperationLegal(ISD::MULHS, VT)) - return SDOperand(); // Make sure the target supports MULHS. int64_t d = cast(N->getOperand(1))->getSignExtended(); ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); // Multiply the numerator (operand 0) by the magic value - SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + SDOperand Q; + if (isOperationLegal(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegal(ISD::SMUL_LOHI, VT)) + Q = SDOperand(DAG.getNode(ISD::SMUL_LOHI, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).Val, 1); + else + return SDOperand(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator if (d > 0 && magics.m < 0) { Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); @@ -1729,15 +1890,21 @@ SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // Check to see if we can do this. if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) return SDOperand(); // BuildUDIV only operates on i32 or i64 - if (!isOperationLegal(ISD::MULHU, VT)) - return SDOperand(); // Make sure the target supports MULHU. uint64_t d = cast(N->getOperand(1))->getValue(); mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); // Multiply the numerator (operand 0) by the magic value - SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + SDOperand Q; + if (isOperationLegal(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), + DAG.getConstant(magics.m, VT)); + else if (isOperationLegal(ISD::UMUL_LOHI, VT)) + Q = SDOperand(DAG.getNode(ISD::UMUL_LOHI, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, VT)).Val, 1); + else + return SDOperand(); // No mulhu or equvialent if (Created) Created->push_back(Q.Val);