// Clear the inbounds attribute because the new index may be off-bound.
// e.g.,
//
- // b = add i64 a, 5
- // addr = gep inbounds float* p, i64 b
+ // b = add i64 a, 5
+ // addr = gep inbounds float, float* p, i64 b
//
// is transformed to:
//
- // addr2 = gep float* p, i64 a
- // addr = gep float* addr2, i64 5
+ // addr2 = gep float, float* p, i64 a ; inbounds removed
+ // addr = gep inbounds float, float* addr2, i64 5
//
// If a is -4, although the old index b is in bounds, the new index a is
// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
//
// TODO(jingyue): do some range analysis to keep as many inbounds as
// possible. GEPs with inbounds are more friendly to alias analysis.
+ bool GEPWasInBounds = GEP->isInBounds();
GEP->setIsInBounds(false);
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
ConstantInt::get(IntPtrTy, Index, true),
GEP->getName(), GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
Type::getInt8Ty(GEP->getContext()), NewGEP,
ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
GEP);
+ // Inherit the inbounds attribute of the original GEP.
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; DS instructions have a larger immediate offset, so make sure these are OK.
; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
%tmp4 = load float, float addrspace(3)* %tmp2, align 4
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
; the order of "sext" and "add" when computing the array indices. @sum_of_array
; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; CHECK: add i32 %j, -2
; CHECK: sext
; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
; We should be able to trace into sext/zext if it can be distributed to both
; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
}
; CHECK-LABEL: @ext_add_no_overflow(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
; Verifies we handle nested sext/zext correctly.
define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
}
; CHECK-LABEL: @sext_or(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
}
; CHECK-LABEL: @expr(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
; CHECK: store i64 %b5, i64* %out
; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
; CHECK: sext i32
; CHECK: sext i32
; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
; Verifies we handle "sub" correctly.
define float* @sub(i64 %i, i64 %j) {
; CHECK-LABEL: @sub(
; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
; CHECK-LABEL: @packed_struct(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
; CHECK: bitcast i8* %uglygep to i64*
; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
; CHECK: bitcast
ret %struct2* %ptr2
; CHECK-NEXT: ret
%or = or i64 %i, 3
%p = getelementptr inbounds float, float* %input, i64 %or
; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
ret float* %p
exit:
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1