1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
6 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
7 ; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
9 ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
11 %a = load i8, i8 addrspace(1)* %ptr
15 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
16 ; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
18 ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
20 %a = load i8, i8 addrspace(3)* %ptr
24 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
25 ; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
27 ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
29 %a = load i8, i8 addrspace(5)* %ptr
34 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
35 ; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
37 ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
39 %a = load i16, i16 addrspace(1)* %ptr
43 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
44 ; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
46 ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
48 %a = load i16, i16 addrspace(3)* %ptr
52 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
53 ; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
55 ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
57 %a = load i16, i16 addrspace(5)* %ptr
62 define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
63 ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
65 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
67 %a = load i32, i32 addrspace(1)* %ptr
71 define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
72 ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
74 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
76 %a = load i32, i32 addrspace(3)* %ptr
80 define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
81 ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
83 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
85 %a = load i32, i32 addrspace(5)* %ptr
90 define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
91 ; PTX32: ld.global.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
93 ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
95 %a = load i64, i64 addrspace(1)* %ptr
99 define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
100 ; PTX32: ld.shared.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
102 ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
104 %a = load i64, i64 addrspace(3)* %ptr
108 define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
109 ; PTX32: ld.local.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
111 ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
113 %a = load i64, i64 addrspace(5)* %ptr
118 define float @ld_global_f32(float addrspace(1)* %ptr) {
119 ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
121 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
123 %a = load float, float addrspace(1)* %ptr
127 define float @ld_shared_f32(float addrspace(3)* %ptr) {
128 ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
130 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
132 %a = load float, float addrspace(3)* %ptr
136 define float @ld_local_f32(float addrspace(5)* %ptr) {
137 ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
139 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
141 %a = load float, float addrspace(5)* %ptr
146 define double @ld_global_f64(double addrspace(1)* %ptr) {
147 ; PTX32: ld.global.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
149 ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
151 %a = load double, double addrspace(1)* %ptr
155 define double @ld_shared_f64(double addrspace(3)* %ptr) {
156 ; PTX32: ld.shared.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
158 ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
160 %a = load double, double addrspace(3)* %ptr
164 define double @ld_local_f64(double addrspace(5)* %ptr) {
165 ; PTX32: ld.local.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
167 ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
169 %a = load double, double addrspace(5)* %ptr