From 0f58dbae4ab2397dc310e19d171a4bc60fc6c9a7 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 8 Feb 2013 18:19:17 +0000 Subject: [PATCH] Refine fix to bug 15041. Thanks to help from Nadav and Hal, I have a more reasonable (and even correct!) approach. This specifically penalizes the insertelement and extractelement operations for the performance hit that will occur on PowerPC processors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174725 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 35 +++++++++---------- .../CostModel/PowerPC/insert_extract.ll | 16 +++++++++ 2 files changed, 33 insertions(+), 18 deletions(-) create mode 100644 test/Analysis/CostModel/PowerPC/insert_extract.ll diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f57d7643e18..5e9ad347d33 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -194,24 +194,23 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { assert(Val->isVectorTy() && "This must be a vector type"); - const unsigned Awful = 1000; - - // Vector element insert/extract with Altivec is very expensive. - // Until VSX is available, avoid vectorizing loops that require - // these operations. - if (Opcode == ISD::EXTRACT_VECTOR_ELT || - Opcode == ISD::INSERT_VECTOR_ELT) - return Awful; - - // We don't vectorize SREM/UREM so well. Constrain the vectorizer - // for those as well. - if (Opcode == ISD::SREM || Opcode == ISD::UREM) - return Awful; - - // VSELECT is not yet implemented, leading to use of insert/extract - // and ISEL, hence not a good idea. - if (Opcode == ISD::VSELECT) - return Awful; + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Estimated cost of a load-hit-store delay. This was obtained + // experimentally as a minimum needed to prevent unprofitable + // vectorization for the paq8p benchmark. It may need to be + // raised further if other unprofitable cases remain. + unsigned LHSPenalty = 12; + + // Vector element insert/extract with Altivec is very expensive, + // because they require store and reload with the attendant + // processor stall for load-hit-store. Until VSX is available, + // these need to be estimated as very costly. + if (ISD == ISD::EXTRACT_VECTOR_ELT || + ISD == ISD::INSERT_VECTOR_ELT) + return LHSPenalty + + TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); } diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll new file mode 100644 index 00000000000..f51963d56fd --- /dev/null +++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @insert(i32 %arg) { + ; CHECK: cost of 13 {{.*}} insertelement + %x = insertelement <4 x i32> undef, i32 %arg, i32 0 + ret i32 undef +} + +define i32 @extract(<4 x i32> %arg) { + ; CHECK: cost of 13 {{.*}} extractelement + %x = extractelement <4 x i32> %arg, i32 0 + ret i32 %x +} + -- 2.34.1