/// The contract for this function is the same as \c getOperationCost except
/// that it supports an interface that provides extra information specific to
/// the GEP operation.
- unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) const;
/// \brief Estimate the cost of a function call when lowered.
///
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
- virtual unsigned getGEPCost(const Value *Ptr,
+ virtual unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) = 0;
virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
return Impl.getOperationCost(Opcode, Ty, OpTy);
}
- unsigned getGEPCost(const Value *Ptr,
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) override {
- return Impl.getGEPCost(Ptr, Operands);
+ return Impl.getGEPCost(PointeeType, Ptr, Operands);
}
unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
return Impl.getCallCost(FTy, NumArgs);
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
}
}
- unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) {
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
return static_cast<T *>(this)->getCallCost(F, Arguments.size());
}
+ using BaseT::getGEPCost;
+
+ unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands) {
+ const GlobalValue *BaseGV = nullptr;
+ if (Ptr != nullptr) {
+ // TODO: will remove this when pointers have an opaque type.
+ assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+ PointeeType &&
+ "explicit pointee type doesn't match operand's pointee type");
+ BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
+ }
+ bool HasBaseReg = (BaseGV == nullptr);
+ int64_t BaseOffset = 0;
+ int64_t Scale = 0;
+
+ // Assumes the address space is 0 when Ptr is nullptr.
+ unsigned AS =
+ (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
+ auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands);
+ for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+ BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+ } else {
+ // Needs scale register.
+ if (Scale != 0) {
+ // No addressing mode takes two scale registers.
+ return TTI::TCC_Basic;
+ }
+ Scale = ElementSize;
+ }
+ } else {
+ StructType *STy = cast<StructType>(*GTI);
+ uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+ BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
+ }
+ }
+
+ if (static_cast<T *>(this)->isLegalAddressingMode(
+ PointerType::get(*GTI, AS), const_cast<GlobalValue *>(BaseGV),
+ BaseOffset, HasBaseReg, Scale, AS)) {
+ return TTI::TCC_Free;
+ }
+ return TTI::TCC_Basic;
+ }
+
using BaseT::getIntrinsicCost;
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
return TTI::TCC_Free; // Model all PHI nodes as free.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
- SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
- return static_cast<T *>(this)
- ->getGEPCost(GEP->getPointerOperand(), Indices);
+ SmallVector<Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
+ return static_cast<T *>(this)->getGEPCost(
+ GEP->getSourceElementType(), GEP->getPointerOperand(), Indices);
}
if (auto CS = ImmutableCallSite(U)) {
// current type directly.
Type *operator->() const { return operator*(); }
- Value *getOperand() const { return *OpIt; }
+ Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
generic_gep_type_iterator& operator++() { // Preincrement
if (CurTy.getInt()) {
return -1;
switch (I->getOpcode()) {
- case Instruction::GetElementPtr:{
- Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
- return TTI->getAddressComputationCost(ValTy);
- }
+ case Instruction::GetElementPtr:
+ return TTI->getUserCost(I);
case Instruction::Ret:
case Instruction::PHI:
Basis.CandidateKind == C.CandidateKind);
}
+// TODO: use TTI->getGEPCost.
static bool isGEPFoldable(GetElementPtrInst *GEP,
const TargetTransformInfo *TTI,
const DataLayout *DL) {
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios6.0.0"
-define void @test_geps() {
- ; Cost of scalar integer geps should be one. We can't always expect it to be
- ; folded into the instruction addressing mode.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+define void @test_geps(i32 %i) {
+ ; GEPs with index 0 are essentially NOOPs.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a0 = getelementptr inbounds i8, i8* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a1 = getelementptr inbounds i16, i16* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a2 = getelementptr inbounds i32, i32* undef, i32 0
-
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a3 = getelementptr inbounds i64, i64* undef, i32 0
-
- ; Cost of scalar floating point geps should be one. We cannot fold the address
- ; computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
%a4 = getelementptr inbounds float, float* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
%a5 = getelementptr inbounds double, double* undef, i32 0
-
-
- ; Cost of vector geps should be one. We cannot fold the address computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
%a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
%a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
%a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
%a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
%a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
%a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
+ ; Cost of GEPs is one if we cannot fold the address computation.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %b0 = getelementptr inbounds i8, i8* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %b1 = getelementptr inbounds i16, i16* undef, i32 1024
+ ; Thumb-2 cannot fold offset >= 2^12 into address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %b2 = getelementptr inbounds i32, i32* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %b3 = getelementptr inbounds i64, i64* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+ %b4 = getelementptr inbounds float, float* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+ %b5 = getelementptr inbounds double, double* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %c0 = getelementptr inbounds i8, i8* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %c1 = getelementptr inbounds i16, i16* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %c2 = getelementptr inbounds i32, i32* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %c3 = getelementptr inbounds i64, i64* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+ %c4 = getelementptr inbounds float, float* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+ %c5 = getelementptr inbounds double, double* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
+ ; Thumb-2 cannot fold scales larger than 8 to address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
ret void
}
; -- No triple in this module --
-;CHECK: cost of 1 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+; CHECK-LABEL: function 'no_info'
+; CHECK: cost of 1 {{.*}} add
+; CHECK: cost of 1 {{.*}} ret
define i32 @no_info(i32 %arg) {
%e = add i32 %arg, %arg
ret i32 %e
}
+
+define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) {
+; CHECK-LABEL: function 'addressing_mode_reg_reg'
+ %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing.
+; CHECK: cost of 0 {{.*}} getelementptr
+ %v = load i8, i8* %p
+ ret i8 %v
+}
+
+; CHECK-LABEL: function 'addressing_mode_scaled_reg'
+define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) {
+ ; NoTTI rejects reg+scale*reg addressing.
+ %p = getelementptr i32, i32* %a, i32 %b
+; CHECK: cost of 1 {{.*}} getelementptr
+ %v = load i32, i32* %p
+ ret i32 %v
+}
%reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
- %add = add i16 %0, %reduction.026
+ %mul = shl i16 %0, 1
+ %add = add i16 %mul, %reduction.026
%sext = mul i64 %indvars.iv, 12884901888
%idxprom3 = ashr exact i64 %sext, 32
%arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
%1 = load i16, i16* %arrayidx4, align 2
- %add7 = add i16 %add, %1
+ %mul2 = shl i16 %1, 1
+ %add7 = add i16 %add, %mul2
%sext28 = mul i64 %indvars.iv, 21474836480
%idxprom10 = ashr exact i64 %sext28, 32
%arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
%2 = load i16, i16* %arrayidx11, align 2
- %add14 = add i16 %add7, %2
+ %mul3 = shl i16 %2, 1
+ %add14 = add i16 %add7, %mul3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end: ; preds = %for.body
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body