Add a stub for the x86 cost model impl. Implement a basic cost rule for inserting...
authorNadav Rotem <nrotem@apple.com>
Fri, 2 Nov 2012 23:27:16 +0000 (23:27 +0000)
committerNadav Rotem <nrotem@apple.com>
Fri, 2 Nov 2012 23:27:16 +0000 (23:27 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167333 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86TargetMachine.h
test/Analysis/CostModel/X86/insert-extract-at-zero.ll [new file with mode: 0644]

index c77d8b69bd322dc2b8cb082b11f82fb89766f026..d4c30369b743d95fc3a8e97b4f9e96e95b31469a 100644 (file)
@@ -19,6 +19,7 @@
 #include "X86RegisterInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -946,6 +947,21 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
+
+  class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
+  public:
+    explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
+    VectorTargetTransformImpl(TL) {}
+
+    virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                                        unsigned Index) const {
+      // Floating point scalars are already located in index #0.
+      if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+        return 0;
+      return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
+    }
+  };
+
 }
 
 #endif    // X86ISELLOWERING_H
index 01296c331ec7bd219885bab39f1a6302ac527c4b..12311a1abfbdfdb669ca5e8ace472b425061d67a 100644 (file)
@@ -82,7 +82,7 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
   ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
+  X86VectorTargetTransformInfo VTTI;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -119,7 +119,7 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
   ScalarTargetTransformImpl STTI;
-  VectorTargetTransformImpl VTTI;
+  X86VectorTargetTransformInfo VTTI;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
diff --git a/test/Analysis/CostModel/X86/insert-extract-at-zero.ll b/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
new file mode 100644 (file)
index 0000000..eea5b60
--- /dev/null
@@ -0,0 +1,33 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
+  ;CHECK: cost of 0 {{.*}} extract
+  %A = extractelement <4 x float> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %B = extractelement <4 x i32> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %C = extractelement <4 x float> undef, i32 1
+
+  ;CHECK: cost of 0 {{.*}} extract
+  %D = extractelement <8 x float> undef, i32 0
+  ;CHECK: cost of 1 {{.*}} extract
+  %E = extractelement <8 x float> undef, i32 1
+
+  ;CHECK: cost of 1 {{.*}} extract
+  %F = extractelement <8 x float> undef, i32 %arg
+
+  ;CHECK: cost of 0 {{.*}} insert
+  %G = insertelement <4 x float> undef, float %fl, i32 0
+  ;CHECK: cost of 1 {{.*}} insert
+  %H = insertelement <4 x float> undef, float %fl, i32 1
+  ;CHECK: cost of 1 {{.*}} insert
+  %I = insertelement <4 x i32> undef, i32 %arg, i32 0
+
+  ;CHECK: cost of 0 {{.*}} insert
+  %J = insertelement <4 x double> undef, double undef, i32 0
+
+  ret i32 0
+}