[SeparateConstOffsetFromGEP] strengthen the inbounds attribute

author Jingyue Wu <jingyue@google.com>

Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)

committer Jingyue Wu <jingyue@google.com>

Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)
author Jingyue Wu <jingyue@google.com>
Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)
committer Jingyue Wu <jingyue@google.com>
Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

index 4a875311881a5bd9424880c705e63f5409d83139..ad86c8c282880f7ebb31b1496e914009a0b6486e 100644 (file)
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -891,13 +891,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
    // Clear the inbounds attribute because the new index may be off-bound.
    // e.g.,
    //
-  // b = add i64 a, 5
-  // addr = gep inbounds float* p, i64 b
+  // b     = add i64 a, 5
+  // addr  = gep inbounds float, float* p, i64 b
    //
    // is transformed to:
    //
-  // addr2 = gep float* p, i64 a
-  // addr = gep float* addr2, i64 5
+  // addr2 = gep float, float* p, i64 a ; inbounds removed
+  // addr  = gep inbounds float, float* addr2, i64 5
    //
    // If a is -4, although the old index b is in bounds, the new index a is
    // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +907,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
    //
    // TODO(jingyue): do some range analysis to keep as many inbounds as
    // possible. GEPs with inbounds are more friendly to alias analysis.
+  bool GEPWasInBounds = GEP->isInBounds();
    GEP->setIsInBounds(false);
  
    // Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +969,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
      NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
                                         ConstantInt::get(IntPtrTy, Index, true),
                                         GEP->getName(), GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
    } else {
      // Unlikely but possible. For example,
      // #pragma pack(1)
@@ -990,6 +993,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
          Type::getInt8Ty(GEP->getContext()), NewGEP,
          ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
          GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
      if (GEP->getType() != I8PtrTy)
        NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
    }
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll

index 527634db0f5b8a81db08b6d89a1dcb4007cab3e3..6f117697dded3524c3ca73fdf9c01fe30be200ae 100644 (file)
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
  
  ; IR-LABEL: @sum_of_array(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
  define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
    %tmp = sext i32 %y to i64
    %tmp1 = sext i32 %x to i64
@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
  
  ; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
  ; IR: add i32 %x, 256
  ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
  ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
  ; DS instructions have a larger immediate offset, so make sure these are OK.
  ; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
  define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
    %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
    %tmp4 = load float, float addrspace(3)* %tmp2, align 4
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll

index 073313d40e77a0032d067079fb2ae6dc985317e5..a0410024f6e7adc3d8bf01ce5d461bdd7d337b8f 100644 (file)
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
  
  ; IR-LABEL: @sum_of_array(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
  
  ; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
  ; the order of "sext" and "add" when computing the array indices. @sum_of_array
@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
  
  ; IR-LABEL: @sum_of_array2(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
  
  
  ; This function loads
@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
  
  ; IR-LABEL: @sum_of_array3(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
  
  
  ; This function loads
@@ -191,6 +191,6 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
  
  ; IR-LABEL: @sum_of_array4(
  ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

index 2fdd158a35ede9cb72cde4045b11b5d1f54aedce..eeeac19637410211d219aa677d74fb9ab546c8cc 100644 (file)
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -44,7 +44,7 @@ entry:
  ; CHECK: add i32 %j, -2
  ; CHECK: sext
  ; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
  
  ; We should be able to trace into sext/zext if it can be distributed to both
  ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
  }
  ; CHECK-LABEL: @ext_add_no_overflow(
  ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
  
  ; Verifies we handle nested sext/zext correctly.
  define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
@@ -110,7 +110,7 @@ entry:
  }
  ; CHECK-LABEL: @sext_or(
  ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
  
  ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
  ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -125,7 +125,7 @@ entry:
  }
  ; CHECK-LABEL: @expr(
  ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
  ; CHECK: store i64 %b5, i64* %out
  
  ; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
@@ -143,7 +143,7 @@ entry:
  ; CHECK: sext i32
  ; CHECK: sext i32
  ; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
  
  ; Verifies we handle "sub" correctly.
  define float* @sub(i64 %i, i64 %j) {
@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
  ; CHECK-LABEL: @sub(
  ; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
  ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
  
  %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
  
@@ -173,7 +173,7 @@ entry:
  ; CHECK-LABEL: @packed_struct(
  ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
  ; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
  ; CHECK: bitcast i8* %uglygep to i64*
  
  ; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
@@ -272,7 +272,7 @@ entry:
    %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
  ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
  ; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
  ; CHECK: bitcast
    ret %struct2* %ptr2
  ; CHECK-NEXT: ret
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll

index d1a0f33d5a21a56d9a7b3a98366a59167e072bf0..601ca52913537878eaeaebaef783b30dc7814b28 100644 (file)
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
@@ -25,7 +25,7 @@ then:
    %or = or i64 %i, 3
    %p = getelementptr inbounds float, float* %input, i64 %or
  ; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
    ret float* %p
  
  exit:
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll

index 278250a9c80ee51e2451d419147aa34cae4feec2..f2853aca698f5212f3d5eef614f5070458a76227 100644 (file)
--- a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -57,10 +57,10 @@ bb:
  
  ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
  ; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
  
  ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
  define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
  bb:
    %i2 = shl nsw i32 %i, 1
author	Jingyue Wu <jingyue@google.com>
	Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)
committer	Jingyue Wu <jingyue@google.com>
	Thu, 13 Aug 2015 18:48:49 +0000 (18:48 +0000)
lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp		patch \| blob \| history
test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll		patch \| blob \| history
test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll		patch \| blob \| history
test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll		patch \| blob \| history
test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll		patch \| blob \| history
test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll		patch \| blob \| history