1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
4 target triple = "nvptx64-nvidia-cuda"
7 define <4 x float> @t1(i8* %p1) {
12 %cast = bitcast i8* %p1 to <4 x float>*
13 %r = load <4 x float>, <4 x float>* %cast, align 1
18 define <4 x float> @t2(i8* %p1) {
22 %cast = bitcast i8* %p1 to <4 x float>*
23 %r = load <4 x float>, <4 x float>* %cast, align 4
28 define <4 x float> @t3(i8* %p1) {
31 %cast = bitcast i8* %p1 to <4 x float>*
32 %r = load <4 x float>, <4 x float>* %cast, align 8
37 define <4 x float> @t4(i8* %p1) {
39 %cast = bitcast i8* %p1 to <4 x float>*
40 %r = load <4 x float>, <4 x float>* %cast, align 16
46 define void @s1(<4 x float>* %p1, <4 x float> %v) {
51 store <4 x float> %v, <4 x float>* %p1, align 1
56 define void @s2(<4 x float>* %p1, <4 x float> %v) {
60 store <4 x float> %v, <4 x float>* %p1, align 4
65 define void @s3(<4 x float>* %p1, <4 x float> %v) {
67 store <4 x float> %v, <4 x float>* %p1, align 8
72 define void @s4(<4 x float>* %p1, <4 x float> %v) {
74 store <4 x float> %v, <4 x float>* %p1, align 16