From: Arnold Schwaighofer Date: Tue, 5 Feb 2013 15:08:02 +0000 (+0000) Subject: Loop Vectorizer: Handle pointer stores/loads in getWidestType() X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=935645b7655a0b5189d40b3d65b3bcb14e30d859;p=oota-llvm.git Loop Vectorizer: Handle pointer stores/loads in getWidestType() In the loop vectorizer cost model, we used to ignore stores/loads of a pointer type when computing the widest type within a loop. This meant that if we had only stores/loads of pointers in a loop we would return a widest type of 8bits (instead of 32 or 64 bit) and therefore a vector factor that was too big. Now, if we see a consecutive store/load of pointers we use the size of a pointer (from data layout). This problem occured in SingleSource/Benchmarks/Shootout-C++/hash.cpp (reduced test case is the first test in vector_ptr_load_store.ll). radar://13139343 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174377 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1b242c93ba1..62542737de0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -518,8 +518,9 @@ class LoopVectorizationCostModel { public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, - const TargetTransformInfo &TTI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} + const TargetTransformInfo &TTI, + DataLayout *DL) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} /// Information about vectorization costs struct VectorizationFactor { @@ -575,6 +576,10 @@ private: /// the scalar type. static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted + /// as a vector operation. + bool isConsecutiveLoadOrStore(Instruction *I); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -585,6 +590,8 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; + /// Target data layout information. + DataLayout *DL; }; /// The LoopVectorize Pass. @@ -624,7 +631,7 @@ struct LoopVectorize : public LoopPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); // Check the function attribues to find out if this function should be // optimized for size. @@ -2786,14 +2793,17 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; // Examine the stored values. - if (StoreInst *ST = dyn_cast(it)) + StoreInst *ST = 0; + if ((ST = dyn_cast(it))) T = ST->getValueOperand()->getType(); - // Ignore stored/loaded pointer types. - if (T->isPointerTy()) - continue; - - MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); + // Ignore loaded pointer types and stored pointer types that are not + // consecutive. However, we do want to take consecutive stores/loads of + // pointer vectors into account. + if (T->isPointerTy() && isConsecutiveLoadOrStore(it)) + MaxWidth = std::max(MaxWidth, DL->getPointerSizeInBits()); + else + MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits()); } } @@ -3241,4 +3251,16 @@ namespace llvm { } } +bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { + // Check for a store. + StoreInst *ST = dyn_cast(Inst); + if (ST) + return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; + // Check for a load. + LoadInst *LI = dyn_cast(Inst); + if (LI) + return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; + + return false; +} diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll new file mode 100644 index 00000000000..c6777184d24 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -0,0 +1,149 @@ +;RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%0 = type { %0*, %1 } +%1 = type { i8*, i32 } + +@p = global [2048 x [8 x i32*]] zeroinitializer, align 16 +@q = global [2048 x i16] zeroinitializer, align 16 +@r = global [2048 x i16] zeroinitializer, align 16 + +; Tests for widest type +; Ensure that we count the pointer store in the first test case. We have a +; consecutive vector of pointers store, therefore we should count it towards the +; widest vector count. +; +; CHECK: test_consecutive_store +; CHECK: The Widest type: 64 bits +define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 { + %4 = load %0** %2, align 8 + %5 = icmp eq %0** %0, %1 + br i1 %5, label %12, label %6 + +;