X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FSLPVectorizer.cpp;h=76b46779535d8c475ddee27fb041e9de4dad1e2b;hb=12af22e8cc217827cf4f118b0f5e4ebbda9925ae;hp=9fea4450a10973acc969ddfc4b0e1ff3f82590e9;hpb=2158dec9653eb651610bd9383f38b38bf2960d7e;p=oota-llvm.git diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9fea4450a10..76b46779535 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -166,6 +166,23 @@ static unsigned getSameOpcode(ArrayRef VL) { return Opcode; } +/// Get the intersection (logical and) of all of the potential IR flags +/// of each scalar operation (VL) that will be converted into a vector (I). +/// Flag set: NSW, NUW, exact, and all of fast-math. +static void propagateIRFlags(Value *I, ArrayRef VL) { + if (auto *VecOp = dyn_cast(I)) { + if (auto *Intersection = dyn_cast(VL[0])) { + // Intersection is initialized to the 0th scalar, + // so start counting from index '1'. + for (int i = 1, e = VL.size(); i < e; ++i) { + if (auto *Scalar = dyn_cast(VL[i])) + Intersection->andIRFlags(Scalar); + } + VecOp->copyIRFlags(Intersection); + } + } +} + /// \returns \p I after propagating metadata from \p VL. static Instruction *propagateMetadata(Instruction *I, ArrayRef VL) { Instruction *I0 = cast(VL[0]); @@ -342,6 +359,33 @@ static void reorderInputsAccordingToOpcode(ArrayRef VL, } } +/// \returns True if in-tree use also needs extract. This refers to +/// possible scalar operand in vectorized instruction. +static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, + TargetLibraryInfo *TLI) { + + unsigned Opcode = UserInst->getOpcode(); + switch (Opcode) { + case Instruction::Load: { + LoadInst *LI = cast(UserInst); + return (LI->getPointerOperand() == Scalar); + } + case Instruction::Store: { + StoreInst *SI = cast(UserInst); + return (SI->getPointerOperand() == Scalar); + } + case Instruction::Call: { + CallInst *CI = cast(UserInst); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + if (hasVectorInstrinsicScalarOpd(ID, 1)) { + return (CI->getArgOperand(1) == Scalar); + } + } + default: + return false; + } +} + /// Bottom Up SLP Vectorizer. class BoUpSLP { public: @@ -864,18 +908,27 @@ void BoUpSLP::buildTree(ArrayRef Roots, for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); - // Skip in-tree scalars that become vectors. - if (ScalarToTreeEntry.count(U)) { - DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << - *U << ".\n"); - int Idx = ScalarToTreeEntry[U]; (void) Idx; - assert(!VectorizableTree[Idx].NeedToGather && "Bad state"); - continue; - } Instruction *UserInst = dyn_cast(U); if (!UserInst) continue; + // Skip in-tree scalars that become vectors + if (ScalarToTreeEntry.count(U)) { + int Idx = ScalarToTreeEntry[U]; + TreeEntry *UseEntry = &VectorizableTree[Idx]; + Value *UseScalar = UseEntry->Scalars[0]; + // Some in-tree scalars will remain as scalar in vectorized + // instructions. If that is the case, the one in Lane 0 will + // be used. + if (UseScalar != U || + !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) { + DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U + << ".\n"); + assert(!VectorizableTree[Idx].NeedToGather && "Bad state"); + continue; + } + } + // Ignore users in the user ignore list. if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != UserIgnoreList.end()) @@ -976,6 +1029,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { Instruction *VL0 = cast(VL[0]); BasicBlock *BB = cast(VL0)->getParent(); + if (!DT->isReachableFromEntry(BB)) { + // Don't go into unreachable blocks. They may contain instructions with + // dependency cycles which confuse the final scheduling. + DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); + newTreeEntry(VL, false); + return; + } + // Check that every instructions appears once in this bundle. for (unsigned i = 0, e = VL.size(); i < e; ++i) for (unsigned j = i+1; j < e; ++j) @@ -1417,6 +1478,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_UniformConstantValue; + TargetTransformInfo::OperandValueProperties Op1VP = + TargetTransformInfo::OP_None; + TargetTransformInfo::OperandValueProperties Op2VP = + TargetTransformInfo::OP_None; // If all operands are exactly the same ConstantInt then set the // operand kind to OK_UniformConstantValue. @@ -1438,11 +1503,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { CInt != cast(I->getOperand(1))) Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; } + // FIXME: Currently cost of model modification for division by + // power of 2 is handled only for X86. Add support for other targets. + if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt && + CInt->getValue().isPowerOf2()) + Op2VP = TargetTransformInfo::OP_PowerOf2; - ScalarCost = - VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK); - VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK); + ScalarCost = VecTy->getNumElements() * + TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK, + Op1VP, Op2VP); + VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK, + Op1VP, Op2VP); } return VecCost - ScalarCost; } @@ -1977,6 +2048,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { BinaryOperator *BinOp = cast(VL0); Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); E->VectorizedValue = V; + propagateIRFlags(E->VectorizedValue, E->Scalars); ++NumVectorInstructions; if (Instruction *I = dyn_cast(V)) @@ -1995,6 +2067,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo(AS)); + + // The pointer operand uses an in-tree scalar so we add the new BitCast to + // ExternalUses list to make sure that an extract will be generated in the + // future. + if (ScalarToTreeEntry.count(LI->getPointerOperand())) + ExternalUses.push_back( + ExternalUser(LI->getPointerOperand(), cast(VecPtr), 0)); + unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); if (!Alignment) @@ -2019,6 +2099,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo(AS)); StoreInst *S = Builder.CreateStore(VecValue, VecPtr); + + // The pointer operand uses an in-tree scalar so we add the new BitCast to + // ExternalUses list to make sure that an extract will be generated in the + // future. + if (ScalarToTreeEntry.count(SI->getPointerOperand())) + ExternalUses.push_back( + ExternalUser(SI->getPointerOperand(), cast(VecPtr), 0)); + if (!Alignment) Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); S->setAlignment(Alignment); @@ -2060,6 +2148,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); Function *FI; Intrinsic::ID IID = Intrinsic::not_intrinsic; + Value *ScalarArg = nullptr; if (CI && (FI = CI->getCalledFunction())) { IID = (Intrinsic::ID) FI->getIntrinsicID(); } @@ -2070,6 +2159,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // a scalar. This argument should not be vectorized. if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) { CallInst *CEI = cast(E->Scalars[0]); + ScalarArg = CEI->getArgOperand(j); OpVecs.push_back(CEI->getArgOperand(j)); continue; } @@ -2088,6 +2178,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; Function *CF = Intrinsic::getDeclaration(M, ID, Tys); Value *V = Builder.CreateCall(CF, OpVecs); + + // The scalar argument uses an in-tree scalar so we add the new vectorized + // call to ExternalUses list to make sure that an extract will be + // generated in the future. + if (ScalarArg && ScalarToTreeEntry.count(ScalarArg)) + ExternalUses.push_back(ExternalUser(ScalarArg, cast(V), 0)); + E->VectorizedValue = V; ++NumVectorInstructions; return V; @@ -2115,18 +2212,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { BinaryOperator *BinOp1 = cast(VL1); Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); - // Create appropriate shuffle to take alternative operations from - // the vector. - std::vector Mask(E->Scalars.size()); + // Create shuffle to take alternate operations from the vector. + // Also, gather up odd and even scalar ops to propagate IR flags to + // each vector operation. + ValueList OddScalars, EvenScalars; unsigned e = E->Scalars.size(); + SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { - if (i & 1) + if (i & 1) { Mask[i] = Builder.getInt32(e + i); - else + OddScalars.push_back(E->Scalars[i]); + } else { Mask[i] = Builder.getInt32(i); + EvenScalars.push_back(E->Scalars[i]); + } } Value *ShuffleMask = ConstantVector::get(Mask); + propagateIRFlags(V0, EvenScalars); + propagateIRFlags(V1, OddScalars); Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); E->VectorizedValue = V; @@ -3084,11 +3188,9 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { BinaryOperator *B0 = dyn_cast(B->getOperand(0)); BinaryOperator *B1 = dyn_cast(B->getOperand(1)); if (tryToVectorizePair(A, B0, R)) { - B->moveBefore(V); return true; } if (tryToVectorizePair(A, B1, R)) { - B->moveBefore(V); return true; } } @@ -3098,11 +3200,9 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { BinaryOperator *A0 = dyn_cast(A->getOperand(0)); BinaryOperator *A1 = dyn_cast(A->getOperand(1)); if (tryToVectorizePair(A0, B, R)) { - A->moveBefore(V); return true; } if (tryToVectorizePair(A1, B, R)) { - A->moveBefore(V); return true; } } @@ -3298,8 +3398,7 @@ public: unsigned i = 0; for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { - ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, ReductionOps); + V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -3491,8 +3590,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to vectorize them. unsigned NumElts = (SameTypeIt - IncIt); DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); - if (NumElts > 1 && - tryToVectorizeList(ArrayRef(IncIt, NumElts), R)) { + if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; @@ -3634,8 +3732,8 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { // Process the stores in chunks of 16. for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) { unsigned Len = std::min(CE - CI, 16); - ArrayRef Chunk(&it->second[CI], Len); - Changed |= vectorizeStores(Chunk, -SLPCostThreshold, R); + Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), + -SLPCostThreshold, R); } } return Changed;