X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FBasicTargetTransformInfo.cpp;h=24bc570f44a1fcb25010a4b126c486ce350aa292;hb=2a2fcc1a0ec4a115bbe76c185f7cd64e77b61e7c;hp=763a4c0b3cfc487f0a0beda0030a033139e62b3b;hpb=f35ce2376ca361e059ad9390ce1f8467f8756bcd;p=oota-llvm.git diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 763a4c0b3cf..24bc570f44a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -39,7 +39,12 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + /// Estimate the cost overhead of SK_Alternate shuffle. + unsigned getAltShuffleOverhead(Type *Ty) const; + + const TargetLoweringBase *getTLI() const { + return TM->getSubtargetImpl()->getTargetLowering(); + } public: BasicTTI() : ImmutablePass(ID), TM(nullptr) { @@ -197,7 +202,7 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const { void BasicTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { // This unrolling functionality is target independent, but to provide some - // motivation for its indended use, for x86: + // motivation for its intended use, for x86: // According to the Intel 64 and IA-32 Architectures Optimization Reference // Manual, Intel Core models and later have a loop stream detector @@ -327,8 +332,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return OpCost; } +unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Shuffle cost is equal to the cost of extracting element from its argument + // plus the cost of inserting them onto the result vector. + + // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index + // 0 of first vector, index 1 of second vector,index 2 of first vector and + // finally index 3 of second vector and insert them at index <0,1,2,3> of + // result vector. + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + return Cost; +} + unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { + if (Kind == SK_Alternate) { + return getAltShuffleOverhead(Tp); + } return 1; } @@ -549,6 +574,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; + // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0;