From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 18:46:41 +0000 (+0000) Subject: Loop Vectorizer: Refactor code to compute vectorized memory instruction cost X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7d80dad40741ecc2e45943f685bf539eeed2ba51;p=oota-llvm.git Loop Vectorizer: Refactor code to compute vectorized memory instruction cost Introduce a helper class that computes the cost of memory access instructions. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174422 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 62542737de0..7d696a72ddd 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -560,6 +560,11 @@ public: /// \return information about the register usage of the loop. RegisterUsage calculateRegisterUsage(); + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -571,11 +576,6 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -594,6 +594,177 @@ private: DataLayout *DL; }; +/// A helper class to compute the cost of a memory operation (load or store). +class MemoryCostComputation { +public: + /// \brief This function computes the cost of a memory instruction, either of + /// a load or of a store. + /// \param Inst a pointer to a LoadInst or a StoreInst. + /// \param VF the vector factor to use. + /// \param TTI the target transform information used to obtain costs. + /// \param Legality the legality class used by this function to obtain the + /// access strid of the memory operation. + /// \returns the estimated cost of the memory instruction. + static unsigned computeCost(Value *Inst, unsigned VF, + const TargetTransformInfo &TTI, + LoopVectorizationLegality *Legality) { + if (StoreInst *Store = dyn_cast(Inst)) + return StoreCost(Store, VF, TTI, Legality).cost(); + + return LoadCost(cast(Inst), VF, TTI, Legality).cost(); + } + +private: + /// An helper class to compute the cost of vectorize memory instruction. It is + /// subclassed by load and store cost computation classes who fill the fields + /// with values that require knowing about the concrete Load/StoreInst class. + class MemoryOpCost { + public: + /// \return the cost of vectorizing the memory access instruction. + unsigned cost() { + if (VectorFactor == 1) + return TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, AddressSpace); + + if ((Stride = Legality->isConsecutivePtr(PointerOperand))) + return costOfWideMemInst(); + + return costOfScalarizedMemInst(); + } + + protected: + /// The pointer operand of the memory instruction. + Value *PointerOperand; + /// The scalar type of the memory access. + Type *ScalarTy; + /// The vector type of the memory access. + Type *VectorTy; + /// The vector factor by which we vectorize. + unsigned VectorFactor; + /// The stride of the memory access. + int Stride; + /// The alignment of the memory operation. + unsigned Alignment; + /// The address space of the memory operation. + unsigned AddressSpace; + /// The opcode of the memory instruction. + unsigned Opcode; + /// Are we looking at a load or store instruction. + bool IsLoadInst; + const TargetTransformInfo &TTI; + LoopVectorizationLegality *Legality; + + /// Constructs a helper class to compute the cost of a memory instruction. + /// \param VF the vector factor (the length of the vector). + /// \param TI the target transform information used by this class to obtain + /// costs. + /// \param L the legality class used by this class to obtain the access + /// stride of the memory operation. + MemoryOpCost(unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : + VectorFactor(VF), TTI(TI), Legality(L) { + } + + private: + /// \return the cost if the memory instruction is scalarized. + unsigned costOfScalarizedMemInst() { + unsigned Cost = 0; + Cost += costOfExtractFromPointerVector(); + Cost += costOfExtractFromValueVector(); + Cost += VectorFactor * TTI.getMemoryOpCost(Opcode, ScalarTy, Alignment, + AddressSpace); + Cost += costOfInsertIntoValueVector(); + return Cost; + } + + /// \return the cost of extracting the pointers out of the pointer vector. + unsigned costOfExtractFromPointerVector() { + Type *PtrTy = getVectorizedPointerOperandType(); + return costOfVectorInstForAllElems(Instruction::ExtractElement, PtrTy); + } + + /// \return the cost for extracting values out of the value vector if the + /// memory instruction is a store and zero otherwise. + unsigned costOfExtractFromValueVector() { + if (IsLoadInst) + return 0; + + return costOfVectorInstForAllElems(Instruction::ExtractElement, VectorTy); + } + + /// \return the cost of insert values into the value vector if the memory + /// instruction was a load and zero otherwise. + unsigned costOfInsertIntoValueVector() { + if (!IsLoadInst) + return 0; + + return costOfVectorInstForAllElems(Instruction::InsertElement, VectorTy); + } + + /// \return the cost of a vector memory instruction. + unsigned costOfWideMemInst() { + unsigned Cost = TTI.getMemoryOpCost(Opcode, VectorTy, Alignment, + AddressSpace); + // Reverse stride. + if (Stride < 0) + Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, + 0); + return Cost; + } + + /// Helper function to compute the cost of one insert- or extractelement + /// instruction per vector element. + /// \param VecOpcode the vector instruction opcode (Can be either + /// InsertElement or an ExtractElement). + /// \param Ty the vector type the vector instruction operates on. + /// \return the cost of an vector instruction applied to each vector + /// element. + unsigned costOfVectorInstForAllElems(unsigned VecOpcode, Type *Ty) { + unsigned Cost = 0; + for (unsigned i = 0; i < VectorFactor; ++i) + Cost += TTI.getVectorInstrCost(VecOpcode, Ty, i); + return Cost; + } + + /// \return a vectorized type for the pointer operand. + Type * getVectorizedPointerOperandType() { + Type *PointerOpTy = PointerOperand->getType(); + return LoopVectorizationCostModel::ToVectorTy(PointerOpTy, VectorFactor); + } + }; + + /// Implementation of the abstract memory cost base class. Sets field of base + /// class whose value depends on the LoadInst. + class LoadCost : public MemoryOpCost { + public: + LoadCost(LoadInst *Load, unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { + PointerOperand = Load->getPointerOperand(); + ScalarTy = Load->getType(); + VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); + Alignment = Load->getAlignment(); + AddressSpace = Load->getPointerAddressSpace(); + Opcode = Load->getOpcode(); + IsLoadInst = true; + } + }; + + /// Implementation of the abstract memory cost base class. Sets field of base + /// class whose value depends on the StoreInst. + class StoreCost : public MemoryOpCost { + public: + StoreCost(StoreInst *Store, unsigned VF, const TargetTransformInfo &TI, + LoopVectorizationLegality *L) : MemoryOpCost(VF, TI, L) { + PointerOperand = Store->getPointerOperand(); + ScalarTy = Store->getValueOperand()->getType(); + VectorTy = LoopVectorizationCostModel::ToVectorTy(ScalarTy, VF); + Alignment = Store->getAlignment(); + AddressSpace = Store->getPointerAddressSpace(); + Opcode = Store->getOpcode(); + IsLoadInst = false; + } + }; +}; + /// The LoopVectorize Pass. struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid @@ -3097,83 +3268,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } + case Instruction::Load: case Instruction::Store: { - StoreInst *SI = cast(I); - Type *ValTy = SI->getValueOperand()->getType(); - VectorTy = ToVectorTy(ValTy, VF); - - if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); - - // Scalarized stores. - int Stride = Legal->isConsecutivePtr(SI->getPointerOperand()); - bool Reverse = Stride < 0; - if (0 == Stride) { - unsigned Cost = 0; - - // The cost of extracting from the value vector and pointer vector. - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); - for (unsigned i = 0; i < VF; ++i) { - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, - i); - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); - } - - // The cost of the scalar stores. - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - SI->getAlignment(), - SI->getPointerAddressSpace()); - return Cost; - } - - // Wide stores. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); - if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, - VectorTy, 0); - return Cost; + return MemoryCostComputation::computeCost(I, VF, TTI, Legal); } - case Instruction::Load: { - LoadInst *LI = cast(I); - if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), - LI->getPointerAddressSpace()); - - // Scalarized loads. - int Stride = Legal->isConsecutivePtr(LI->getPointerOperand()); - bool Reverse = Stride < 0; - if (0 == Stride) { - unsigned Cost = 0; - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); - - // The cost of extracting from the pointer vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); - - // The cost of inserting data to the result vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i); - - // The cost of the scalar stores. - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); - return Cost; - } - - // Wide loads. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - LI->getAlignment(), - LI->getPointerAddressSpace()); - if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); - return Cost; - } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: