include "llvm/IR/IntrinsicsHexagon.td"
include "llvm/IR/IntrinsicsNVVM.td"
include "llvm/IR/IntrinsicsMips.td"
-include "llvm/IR/IntrinsicsR600.td"
+include "llvm/IR/IntrinsicsAMDGPU.td"
include "llvm/IR/IntrinsicsBPF.td"
include "llvm/IR/IntrinsicsSystemZ.td"
--- /dev/null
+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the R600-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "r600" in {
+
+class R600ReadPreloadRegisterIntrinsic<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<name>;
+
+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
+ def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
+ def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
+ def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
+}
+
+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_r600_read_global_size">;
+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_r600_read_local_size">;
+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_r600_read_ngroups">;
+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_r600_read_tgid">;
+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_r600_read_tidig">;
+} // End TargetPrefix = "r600"
+
+let TargetPrefix = "AMDGPU" in {
+
+class AMDGPUReadPreloadRegisterIntrinsic<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<name>;
+
+def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+ // 1st parameter: Numerator
+ // 2nd parameter: Denominator
+ // 3rd parameter: Constant to select select between first and
+ // second. (0 = first, 1 = second).
+ Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
+ [IntrNoMem]>;
+
+def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
+ Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
+ [IntrNoMem]>;
+
+def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
+ Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem]>;
+
+def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
+
+def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
+ Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_amdgpu_read_workdim">;
+
+} // End TargetPrefix = "AMDGPU"
+++ /dev/null
-//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the R600-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "r600" in {
-
-class R600ReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
-
-multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
- def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
- def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
- def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-}
-
-defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_global_size">;
-defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_local_size">;
-defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_ngroups">;
-defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tgid">;
-defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tidig">;
-} // End TargetPrefix = "r600"
-
-let TargetPrefix = "AMDGPU" in {
-
-class AMDGPUReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
-
-def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
- // 1st parameter: Numerator
- // 2nd parameter: Denominator
- // 3rd parameter: Constant to select select between first and
- // second. (0 = first, 1 = second).
- Intrinsic<[llvm_anyfloat_ty, llvm_i1_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem]>;
-
-def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
- Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem]>;
-
-def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
- Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]>;
-
-def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem]>;
-
-def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-
-def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
-
-def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
- Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-
-def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
- "__builtin_amdgpu_read_workdim">;
-
-} // End TargetPrefix = "AMDGPU"
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
--- /dev/null
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s
+
+; COMMON-LABEL: @test_sink_ptrtoint_asc(
+; ASC: addrspacecast
+; ASC-NOT: ptrtoint
+; ASC-NOT: inttoptr
+
+define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
+bb:
+ %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
+ %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp3 = sext i32 %tmp2 to i64
+ %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
+ %tmp5 = load float, float addrspace(1)* %tmp4, align 4
+ %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)*
+ %tmp7 = fcmp olt float %tmp5, 8.388608e+06
+ br i1 %tmp7, label %bb8, label %bb14
+
+bb8: ; preds = %bb
+ %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1
+ %tmp10 = fmul float %tmp9, 0x3E74442D00000000
+ %tmp11 = fsub float -0.000000e+00, %tmp10
+ %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1
+ store float %tmp12, float addrspace(4)* %tmp6, align 4
+ %tmp13 = fsub float -0.000000e+00, %tmp12
+ br label %bb15
+
+bb14: ; preds = %bb
+ store float 2.000000e+00, float addrspace(4)* %tmp6, align 4
+ br label %bb15
+
+bb15: ; preds = %bb14, %bb8
+ %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ]
+ %tmp17 = fsub float -0.000000e+00, %tmp16
+ %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1
+ %tmp19 = fsub float 2.187500e-01, %tmp18
+ %tmp20 = fsub float 7.187500e-01, %tmp19
+ %tmp21 = fcmp ogt float %tmp5, 1.600000e+01
+ %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20
+ %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3
+ store float %tmp22, float addrspace(1)* %tmp23, align 4
+ ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
-
+++ /dev/null
-; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s
-
-; COMMON-LABEL: @test_sink_ptrtoint_asc(
-; ASC: addrspacecast
-; ASC-NOT: ptrtoint
-; ASC-NOT: inttoptr
-
-define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
-bb:
- %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
- %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp3 = sext i32 %tmp2 to i64
- %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
- %tmp5 = load float, float addrspace(1)* %tmp4, align 4
- %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)*
- %tmp7 = fcmp olt float %tmp5, 8.388608e+06
- br i1 %tmp7, label %bb8, label %bb14
-
-bb8: ; preds = %bb
- %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1
- %tmp10 = fmul float %tmp9, 0x3E74442D00000000
- %tmp11 = fsub float -0.000000e+00, %tmp10
- %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1
- store float %tmp12, float addrspace(4)* %tmp6, align 4
- %tmp13 = fsub float -0.000000e+00, %tmp12
- br label %bb15
-
-bb14: ; preds = %bb
- store float 2.000000e+00, float addrspace(4)* %tmp6, align 4
- br label %bb15
-
-bb15: ; preds = %bb14, %bb8
- %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ]
- %tmp17 = fsub float -0.000000e+00, %tmp16
- %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1
- %tmp19 = fsub float 2.187500e-01, %tmp18
- %tmp20 = fsub float 7.187500e-01, %tmp19
- %tmp21 = fcmp ogt float %tmp5, 1.600000e+01
- %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20
- %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3
- store float %tmp22, float addrspace(1)* %tmp23, align 4
- ret void
-}
-
-declare float @llvm.fma.f32(float, float, float) #1
-declare i32 @llvm.r600.read.tidig.x() #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
--- /dev/null
+; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
+
+; Mostly copied from x86 version.
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; }
+; return c;
+;}
+;
+
+; CHECK-LABEL: @popcount_i64
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i64 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i64 %a.addr.04, -1
+ %and = and i64 %sub, %a.addr.04
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; CHECK-LABEL: @popcount_i32
+; CHECK: entry
+; CHECK: llvm.ctpop.i32
+; CHECK: ret
+define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i32 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i32 %a.addr.04, -1
+ %and = and i32 %sub, %a.addr.04
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+; int c = 0;
+; while (a) {
+; c++;
+; a &= a - 1;
+; mydata1 *= c;
+; mydata2 *= (int)a;
+; }
+; return c + mydata1 + mydata2;
+;}
+
+; CHECK-LABEL: @popcount2
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+ %tobool9 = icmp eq i64 %a, 0
+ br i1 %tobool9, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+ %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+ %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.013, 1
+ %sub = add i64 %a.addr.010, -1
+ %and = and i64 %sub, %a.addr.010
+ %mul = mul nsw i32 %inc, %mydata1.addr.011
+ %conv = trunc i64 %and to i32
+ %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+ %tobool = icmp eq i64 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+ %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+ %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+ %add2 = add i32 %add, %c.0.lcssa
+ ret i32 %add2
+}
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
-
+++ /dev/null
-; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
-
-; Mostly copied from x86 version.
-
-;To recognize this pattern:
-;int popcount(unsigned long long a) {
-; int c = 0;
-; while (a) {
-; c++;
-; a &= a - 1;
-; }
-; return c;
-;}
-;
-
-; CHECK-LABEL: @popcount_i64
-; CHECK: entry
-; CHECK: llvm.ctpop.i64
-; CHECK: ret
-define i32 @popcount_i64(i64 %a) nounwind uwtable readnone ssp {
-entry:
- %tobool3 = icmp eq i64 %a, 0
- br i1 %tobool3, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
- %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
- %inc = add nsw i32 %c.05, 1
- %sub = add i64 %a.addr.04, -1
- %and = and i64 %sub, %a.addr.04
- %tobool = icmp eq i64 %and, 0
- br i1 %tobool, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
- ret i32 %c.0.lcssa
-}
-
-; CHECK-LABEL: @popcount_i32
-; CHECK: entry
-; CHECK: llvm.ctpop.i32
-; CHECK: ret
-define i32 @popcount_i32(i32 %a) nounwind uwtable readnone ssp {
-entry:
- %tobool3 = icmp eq i32 %a, 0
- br i1 %tobool3, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
- %a.addr.04 = phi i32 [ %and, %while.body ], [ %a, %entry ]
- %inc = add nsw i32 %c.05, 1
- %sub = add i32 %a.addr.04, -1
- %and = and i32 %sub, %a.addr.04
- %tobool = icmp eq i32 %and, 0
- br i1 %tobool, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
- ret i32 %c.0.lcssa
-}
-
-; To recognize this pattern:
-;int popcount(unsigned long long a, int mydata1, int mydata2) {
-; int c = 0;
-; while (a) {
-; c++;
-; a &= a - 1;
-; mydata1 *= c;
-; mydata2 *= (int)a;
-; }
-; return c + mydata1 + mydata2;
-;}
-
-; CHECK-LABEL: @popcount2
-; CHECK: entry
-; CHECK: llvm.ctpop.i64
-; CHECK: ret
-define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
-entry:
- %tobool9 = icmp eq i64 %a, 0
- br i1 %tobool9, label %while.end, label %while.body
-
-while.body: ; preds = %entry, %while.body
- %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
- %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
- %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
- %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
- %inc = add nsw i32 %c.013, 1
- %sub = add i64 %a.addr.010, -1
- %and = and i64 %sub, %a.addr.010
- %mul = mul nsw i32 %inc, %mydata1.addr.011
- %conv = trunc i64 %and to i32
- %mul1 = mul nsw i32 %conv, %mydata2.addr.012
- %tobool = icmp eq i64 %and, 0
- br i1 %tobool, label %while.end, label %while.body
-
-while.end: ; preds = %while.body, %entry
- %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
- %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
- %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
- %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
- %add2 = add i32 %add, %c.0.lcssa
- ret i32 %add2
-}
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
--- /dev/null
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+; Simple 3-pair chain with loads and stores
+define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_3_3_3(
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+ %i0 = load double, double addrspace(3)* %a, align 8
+ %i1 = load double, double addrspace(3)* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
+ %i3 = load double, double addrspace(3)* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
+ %i4 = load double, double addrspace(3)* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ store double %mul, double addrspace(3)* %c, align 8
+ %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
+ store double %mul5, double addrspace(3)* %arrayidx5, align 8
+ ret void
+}
+
+define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
+; CHECK-LABEL: @test1_as_3_0_0(
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
+; CHECK: ret
+ %i0 = load double, double addrspace(3)* %a, align 8
+ %i1 = load double, double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
+ %i3 = load double, double addrspace(3)* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+ %i4 = load double, double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ store double %mul, double* %c, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
+ store double %mul5, double* %arrayidx5, align 8
+ ret void
+}
+
+define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
+; CHECK-LABEL: @test1_as_0_0_3(
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
+; CHECK: ret
+ %i0 = load double, double* %a, align 8
+ %i1 = load double, double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
+ %i3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+ %i4 = load double, double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ store double %mul, double addrspace(3)* %c, align 8
+ %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
+ store double %mul5, double addrspace(3)* %arrayidx5, align 8
+ ret void
+}
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
-
+++ /dev/null
-; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
-
-target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-
-
-; Simple 3-pair chain with loads and stores
-define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_3_3_3(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
- %i0 = load double, double addrspace(3)* %a, align 8
- %i1 = load double, double addrspace(3)* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
- %i3 = load double, double addrspace(3)* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
- %i4 = load double, double addrspace(3)* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double addrspace(3)* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
- store double %mul5, double addrspace(3)* %arrayidx5, align 8
- ret void
-}
-
-define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
-; CHECK-LABEL: @test1_as_3_0_0(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
-; CHECK: ret
- %i0 = load double, double addrspace(3)* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
- %i3 = load double, double addrspace(3)* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-}
-
-define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_0_0_3(
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double addrspace(3)* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
- store double %mul5, double addrspace(3)* %arrayidx5, align 8
- ret void
-}
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
--- /dev/null
+; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp = sext i32 %y to i64
+ %tmp1 = sext i32 %x to i64
+ %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 1
+ %tmp7 = sext i32 %tmp6 to i64
+ %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 1
+ %tmp13 = sext i32 %tmp12 to i64
+ %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
+}
+
+@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
+
+; Some of the indices go over the maximum mubuf offset, so don't split them.
+
+; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: add i32 %x, 256
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp = sext i32 %y to i64
+ %tmp1 = sext i32 %x to i64
+ %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
+ %tmp4 = load float, float addrspace(2)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 255
+ %tmp7 = sext i32 %tmp6 to i64
+ %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
+ %tmp10 = load float, float addrspace(2)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 256
+ %tmp13 = sext i32 %tmp12 to i64
+ %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
+ %tmp16 = load float, float addrspace(2)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
+ %tmp20 = load float, float addrspace(2)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
+}
+
+
+@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
+
+; DS instructions have a larger immediate offset, so make sure these are OK.
+; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+ %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
+ %tmp4 = load float, float addrspace(3)* %tmp2, align 4
+ %tmp5 = fadd float %tmp4, 0.000000e+00
+ %tmp6 = add i32 %y, 255
+ %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
+ %tmp10 = load float, float addrspace(3)* %tmp8, align 4
+ %tmp11 = fadd float %tmp5, %tmp10
+ %tmp12 = add i32 %x, 4032
+ %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
+ %tmp16 = load float, float addrspace(3)* %tmp14, align 4
+ %tmp17 = fadd float %tmp11, %tmp16
+ %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
+ %tmp20 = load float, float addrspace(3)* %tmp18, align 4
+ %tmp21 = fadd float %tmp17, %tmp20
+ store float %tmp21, float addrspace(1)* %output, align 4
+ ret void
+}
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
-
+++ /dev/null
-; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
-
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-
-@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
-
-; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
-define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
- %tmp = sext i32 %y to i64
- %tmp1 = sext i32 %x to i64
- %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp
- %tmp4 = load float, float addrspace(2)* %tmp2, align 4
- %tmp5 = fadd float %tmp4, 0.000000e+00
- %tmp6 = add i32 %y, 1
- %tmp7 = sext i32 %tmp6 to i64
- %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp7
- %tmp10 = load float, float addrspace(2)* %tmp8, align 4
- %tmp11 = fadd float %tmp5, %tmp10
- %tmp12 = add i32 %x, 1
- %tmp13 = sext i32 %tmp12 to i64
- %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp
- %tmp16 = load float, float addrspace(2)* %tmp14, align 4
- %tmp17 = fadd float %tmp11, %tmp16
- %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp13, i64 %tmp7
- %tmp20 = load float, float addrspace(2)* %tmp18, align 4
- %tmp21 = fadd float %tmp17, %tmp20
- store float %tmp21, float addrspace(1)* %output, align 4
- ret void
-}
-
-@array2 = internal addrspace(2) constant [4096 x [4 x float]] zeroinitializer, align 4
-
-; Some of the indices go over the maximum mubuf offset, so don't split them.
-
-; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
-; IR: add i32 %x, 256
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
- %tmp = sext i32 %y to i64
- %tmp1 = sext i32 %x to i64
- %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp
- %tmp4 = load float, float addrspace(2)* %tmp2, align 4
- %tmp5 = fadd float %tmp4, 0.000000e+00
- %tmp6 = add i32 %y, 255
- %tmp7 = sext i32 %tmp6 to i64
- %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp7
- %tmp10 = load float, float addrspace(2)* %tmp8, align 4
- %tmp11 = fadd float %tmp5, %tmp10
- %tmp12 = add i32 %x, 256
- %tmp13 = sext i32 %tmp12 to i64
- %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp
- %tmp16 = load float, float addrspace(2)* %tmp14, align 4
- %tmp17 = fadd float %tmp11, %tmp16
- %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp13, i64 %tmp7
- %tmp20 = load float, float addrspace(2)* %tmp18, align 4
- %tmp21 = fadd float %tmp17, %tmp20
- store float %tmp21, float addrspace(1)* %output, align 4
- ret void
-}
-
-
-@lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
-
-; DS instructions have a larger immediate offset, so make sure these are OK.
-; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
-define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
- %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
- %tmp4 = load float, float addrspace(3)* %tmp2, align 4
- %tmp5 = fadd float %tmp4, 0.000000e+00
- %tmp6 = add i32 %y, 255
- %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
- %tmp10 = load float, float addrspace(3)* %tmp8, align 4
- %tmp11 = fadd float %tmp5, %tmp10
- %tmp12 = add i32 %x, 4032
- %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
- %tmp16 = load float, float addrspace(3)* %tmp14, align 4
- %tmp17 = fadd float %tmp11, %tmp16
- %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
- %tmp20 = load float, float addrspace(3)* %tmp18, align 4
- %tmp21 = fadd float %tmp17, %tmp20
- store float %tmp21, float addrspace(1)* %output, align 4
- ret void
-}
--- /dev/null
+; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]]
+; SI-NEXT: ret i64 [[SEL]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]]
+; SI-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]]
+; SI-NEXT: ret i16 [[SEL]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]]
+; SI-NEXT: ret i64 [[SEL]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]]
+; SI-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]]
+; SI-NEXT: ret i16 [[SEL]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1c(i64 %A) {
+; ALL-LABEL: @test1c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]]
+; ALL-NEXT: ret i64 [[SEL]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+ ret i64 %cond
+}
+
+define i32 @test2c(i32 %A) {
+; ALL-LABEL: @test2c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]]
+; ALL-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3c(i16 signext %A) {
+; ALL-LABEL: @test3c(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]]
+; ALL-NEXT: ret i16 [[SEL]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+ ret i16 %cond
+}
+
+
+define i64 @test1d(i64 %A) {
+; ALL-LABEL: @test1d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]]
+; ALL-NEXT: ret i64 [[SEL]]
+entry:
+ %tobool = icmp eq i64 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+ ret i64 %cond
+}
+
+
+define i32 @test2d(i32 %A) {
+; ALL-LABEL: @test2d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]]
+; ALL-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+ ret i32 %cond
+}
+
+
+define signext i16 @test3d(i16 signext %A) {
+; ALL-LABEL: @test3d(
+; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]]
+; ALL-NEXT: ret i16 [[SEL]]
+entry:
+ %tobool = icmp eq i16 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true: ; preds = %entry
+ %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.true
+ %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+ ret i16 %cond
+}
+
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+++ /dev/null
-; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-; RUN: opt -S -simplifycfg -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-
-
-define i64 @test1(i64 %A) {
-; ALL-LABEL: @test1(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTLZ]]
-; SI-NEXT: ret i64 [[SEL]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2(i32 %A) {
-; ALL-LABEL: @test2(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTLZ]]
-; SI-NEXT: ret i32 [[SEL]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3(i16 signext %A) {
-; ALL-LABEL: @test3(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; SI-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTLZ]]
-; SI-NEXT: ret i16 [[SEL]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1b(i64 %A) {
-; ALL-LABEL: @test1b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 64, i64 [[CTTZ]]
-; SI-NEXT: ret i64 [[SEL]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2b(i32 %A) {
-; ALL-LABEL: @test2b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTTZ]]
-; SI-NEXT: ret i32 [[SEL]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3b(i16 signext %A) {
-; ALL-LABEL: @test3b(
-; SI: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; SI-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; SI-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 16, i16 [[CTTZ]]
-; SI-NEXT: ret i16 [[SEL]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1c(i64 %A) {
-; ALL-LABEL: @test1c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTLZ]]
-; ALL-NEXT: ret i64 [[SEL]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-define i32 @test2c(i32 %A) {
-; ALL-LABEL: @test2c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTLZ]]
-; ALL-NEXT: ret i32 [[SEL]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3c(i16 signext %A) {
-; ALL-LABEL: @test3c(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL-NEXT: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTLZ]]
-; ALL-NEXT: ret i16 [[SEL]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1d(i64 %A) {
-; ALL-LABEL: @test1d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i64 63, i64 [[CTTZ]]
-; ALL-NEXT: ret i64 [[SEL]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2d(i32 %A) {
-; ALL-LABEL: @test2d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 31, i32 [[CTTZ]]
-; ALL-NEXT: ret i32 [[SEL]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3d(i16 signext %A) {
-; ALL-LABEL: @test3d(
-; ALL: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL-NEXT: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; ALL-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i16 15, i16 [[CTTZ]]
-; ALL-NEXT: ret i16 [[SEL]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i32 @llvm.ctlz.i32(i32, i1)
-declare i16 @llvm.ctlz.i16(i16, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare i16 @llvm.cttz.i16(i16, i1)
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
--- /dev/null
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
--- /dev/null
+; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+
+; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
+; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
+; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
+define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
+bb:
+ %i2 = shl nsw i32 %i, 1
+ %j1 = add nsw i32 %i, 1023
+ %tmp = sext i32 %j1 to i64
+ %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+ %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
+ %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
+ %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+ %j2 = add nsw i32 %i2, 1023
+ %tmp5 = sext i32 %j2 to i64
+ %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+ %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
+ %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
+; CHECK: %j1 = add nsw i32 %i, 1024
+; CHECK: %tmp = sext i32 %j1 to i64
+; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
+bb:
+ %i2 = shl nsw i32 %i, 1
+ %j1 = add nsw i32 %i, 1024
+ %tmp = sext i32 %j1 to i64
+ %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
+ %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
+ %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
+ %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+ %j2 = add nsw i32 %i2, 1024
+ %tmp5 = sext i32 %j2 to i64
+ %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
+ %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
+ %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
+; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
+; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+
+; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
+; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
+bb:
+ %i2 = shl nsw i32 %i, 1
+ %j1 = add nsw i32 %i, 16383
+ %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+ %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
+ %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
+ %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+ %j2 = add nsw i32 %i2, 16383
+ %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+ %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
+ %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
+ %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
+; CHECK: %j1 = add nsw i32 %i, 16384
+; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+; CHECK: %j2 = add i32 %j1, %i
+; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
+bb:
+ %i2 = shl nsw i32 %i, 1
+ %j1 = add nsw i32 %i, 16384
+ %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
+ %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
+ %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
+ %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v11, i32 addrspace(1)* %tmp4, align 4
+
+ %j2 = add nsw i32 %i2, 16384
+ %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
+ %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
+ %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
+ %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v22, i32 addrspace(1)* %tmp7, align 4
+
+ ret void
+}
+++ /dev/null
-if not 'R600' in config.root.targets:
- config.unsupported = True
+++ /dev/null
-; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s
-
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-
-
-; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset(
-; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]]
-; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]]
-define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
-bb:
- %i2 = shl nsw i32 %i, 1
- %j1 = add nsw i32 %i, 1023
- %tmp = sext i32 %j1 to i64
- %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
- %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
- %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
- %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
- %j2 = add nsw i32 %i2, 1023
- %tmp5 = sext i32 %j2 to i64
- %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
- %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
- %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
- %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
- ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset(
-; CHECK: %j1 = add nsw i32 %i, 1024
-; CHECK: %tmp = sext i32 %j1 to i64
-; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
-; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
-define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) {
-bb:
- %i2 = shl nsw i32 %i, 1
- %j1 = add nsw i32 %i, 1024
- %tmp = sext i32 %j1 to i64
- %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp
- %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)*
- %v11 = load i32, i32 addrspace(1)* %tmp3, align 4
- %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
- %j2 = add nsw i32 %i2, 1024
- %tmp5 = sext i32 %j2 to i64
- %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5
- %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)*
- %v22 = load i32, i32 addrspace(1)* %tmp6, align 4
- %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
- ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
-; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
-
-; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
-define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
-bb:
- %i2 = shl nsw i32 %i, 1
- %j1 = add nsw i32 %i, 16383
- %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
- %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
- %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
- %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
- %j2 = add nsw i32 %i2, 16383
- %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
- %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
- %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
- %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
- ret void
-}
-
-; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset(
-; CHECK: %j1 = add nsw i32 %i, 16384
-; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
-; CHECK: %j2 = add i32 %j1, %i
-; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
-define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
-bb:
- %i2 = shl nsw i32 %i, 1
- %j1 = add nsw i32 %i, 16384
- %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1
- %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)*
- %v11 = load i32, i32 addrspace(3)* %tmp3, align 4
- %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v11, i32 addrspace(1)* %tmp4, align 4
-
- %j2 = add nsw i32 %i2, 16384
- %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2
- %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)*
- %v22 = load i32, i32 addrspace(3)* %tmp6, align 4
- %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
- store i32 %v22, i32 addrspace(1)* %tmp7, align 4
-
- ret void
-}