1 ; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32
2 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
3 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64
4 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
8 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
9 ; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
11 ; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
13 %a = load i8 addrspace(1)* %ptr
17 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
18 ; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
20 ; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
22 %a = load i8 addrspace(3)* %ptr
26 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
27 ; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
29 ; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
31 %a = load i8 addrspace(5)* %ptr
36 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
37 ; PTX32: ld.global.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
39 ; PTX64: ld.global.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
41 %a = load i16 addrspace(1)* %ptr
45 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
46 ; PTX32: ld.shared.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
48 ; PTX64: ld.shared.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
50 %a = load i16 addrspace(3)* %ptr
54 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
55 ; PTX32: ld.local.u16 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
57 ; PTX64: ld.local.u16 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
59 %a = load i16 addrspace(5)* %ptr
64 define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
65 ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
67 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
69 %a = load i32 addrspace(1)* %ptr
73 define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
74 ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
76 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
78 %a = load i32 addrspace(3)* %ptr
82 define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
83 ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
85 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
87 %a = load i32 addrspace(5)* %ptr
92 define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
93 ; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
95 ; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
97 %a = load i64 addrspace(1)* %ptr
101 define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
102 ; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
104 ; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
106 %a = load i64 addrspace(3)* %ptr
110 define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
111 ; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
113 ; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
115 %a = load i64 addrspace(5)* %ptr
120 define float @ld_global_f32(float addrspace(1)* %ptr) {
121 ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
123 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
125 %a = load float addrspace(1)* %ptr
129 define float @ld_shared_f32(float addrspace(3)* %ptr) {
130 ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
132 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
134 %a = load float addrspace(3)* %ptr
138 define float @ld_local_f32(float addrspace(5)* %ptr) {
139 ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
141 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
143 %a = load float addrspace(5)* %ptr
148 define double @ld_global_f64(double addrspace(1)* %ptr) {
149 ; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
151 ; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
153 %a = load double addrspace(1)* %ptr
157 define double @ld_shared_f64(double addrspace(3)* %ptr) {
158 ; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
160 ; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
162 %a = load double addrspace(3)* %ptr
166 define double @ld_local_f64(double addrspace(5)* %ptr) {
167 ; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
169 ; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
171 %a = load double addrspace(5)* %ptr