R600/SI: Replace v1i32 type with i32 in imageload and sample intrinsics

author Tom Stellard <thomas.stellard@amd.com>

Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 40f082723afeaae5fd0de0e063dff00fe39da438..30a510de91c07e1976e843f634e74ade96b9ee60 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -43,8 +43,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
    addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
    addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
  
-  addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass);
-
    addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
    addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
    addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td

index b7419782d34253e8d523b24797b8c3ebac97b634..263945622311eb7b07b15be7d80eddb0c402dc8a 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -27,7 +27,7 @@ def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
  >;
  
  class SDSample<string opcode> : SDNode <opcode,
-  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVec<1>, SDTCisVT<2, v32i8>,
+  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
                         SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
  >;
  
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 4704217489172e71c59643a8fbe26b29615a8c39..e719cb32706579bf5fbb61cca737d3c9b3b9c5eb 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1326,7 +1326,7 @@ def : Pat <
  
  /* SIsample for simple 1D texture lookup */
  def : Pat <
-  (SIsample v1i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+  (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
    (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
  >;
  
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp

index 9da11e88eb8b8be43b4f8c3b05eb89469f81834d..f194d8b56dc6b433db0f73a2f7578a82e9a2d661 100644 (file)
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -16,6 +16,9 @@
  ///      legal for some compute APIs, and we don't want to declare it as legal
  ///      in the backend, because we want the legalizer to expand all v16i8
  ///      operations.
+/// v1* => *
+///   - Having v1* types complicates the legalizer and we can easily replace
+///   - them with the element type.
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPU.h"
@@ -109,6 +112,19 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
        Types.push_back(i128);
        NeedToReplace = true;
        Name = Name + ".i128";
+    } else if (Arg->getType()->isVectorTy() &&
+               Arg->getType()->getVectorNumElements() == 1 &&
+               Arg->getType()->getVectorElementType() ==
+                                              Type::getInt32Ty(I.getContext())){
+      Type *ElementTy = Arg->getType()->getVectorElementType();
+      std::string TypeName = "i32";
+      InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
+      assert(Def);
+      Args.push_back(Def->getOperand(1));
+      Types.push_back(ElementTy);
+      std::string VecTypeName = "v1" + TypeName;
+      Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
+      NeedToReplace = true;
      } else {
        Args.push_back(Arg);
        Types.push_back(Arg->getType());
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll

index 1c830a9919c7e0ee89f85aa0e042a2d8c6038c19..2651b99a421ac1d27fb50844c88497810b0c2520 100644 (file)
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -135,6 +135,23 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
     ret void
  }
  
+; CHECK: @v1
+; CHECK: IMAGE_SAMPLE VGPR{{[[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15
+define void @v1(i32 %a1) {
+entry:
+  %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
+  %1 = call <4 x float> @llvm.SI.sample.v1i32(<1 x i32> %0, <32 x i8> undef, <16 x i8> undef, i32 0)
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = extractelement <4 x float> %1, i32 1
+  %4 = extractelement <4 x float> %1, i32 2
+  %5 = extractelement <4 x float> %1, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %2, float %3, float %4, float %5)
+  ret void
+}
+
+
+declare <4 x float> @llvm.SI.sample.v1i32(<1 x i32>, <32 x i8>, <16 x i8>, i32) readnone
+
  declare <4 x float> @llvm.SI.sample.(<4 x i32>, <32 x i8>, <16 x i8>, i32) readnone
  
  declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 14 Aug 2013 23:24:53 +0000 (23:24 +0000)
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
lib/Target/R600/SITypeRewriter.cpp		patch \| blob \| history
test/CodeGen/R600/llvm.SI.sample.ll		patch \| blob \| history