1 ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
3 ; FIXME: Broken on evergreen
4 ; FIXME: For some reason the 8 and 16 vectors are being stored as
5 ; individual elements instead of 128-bit stores.
8 ; FIXME: Why is the constant moved into the intermediate register and
9 ; not just directly into the vector component?
11 ; SI-LABEL: @insertelement_v4f32_0:
12 ; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
14 ; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
15 ; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
16 ; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
17 define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
18 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
19 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
23 ; SI-LABEL: @insertelement_v4f32_1:
24 define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
25 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
26 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
30 ; SI-LABEL: @insertelement_v4f32_2:
31 define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
32 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
33 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
37 ; SI-LABEL: @insertelement_v4f32_3:
38 define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
39 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
40 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
44 ; SI-LABEL: @insertelement_v4i32_0:
45 define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
46 %vecins = insertelement <4 x i32> %a, i32 999, i32 0
47 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
51 ; SI-LABEL: @dynamic_insertelement_v2f32:
52 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
53 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
54 ; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
55 define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
56 %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
57 store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
61 ; SI-LABEL: @dynamic_insertelement_v4f32:
62 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
63 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
64 ; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
65 define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
66 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
67 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
71 ; SI-LABEL: @dynamic_insertelement_v8f32:
72 ; FIXMESI: BUFFER_STORE_DWORDX4
73 ; FIXMESI: BUFFER_STORE_DWORDX4
74 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
75 %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
76 store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
80 ; SI-LABEL: @dynamic_insertelement_v16f32:
81 ; FIXMESI: BUFFER_STORE_DWORDX4
82 ; FIXMESI: BUFFER_STORE_DWORDX4
83 ; FIXMESI: BUFFER_STORE_DWORDX4
84 ; FIXMESI: BUFFER_STORE_DWORDX4
85 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
86 %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
87 store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
91 ; SI-LABEL: @dynamic_insertelement_v2i32:
92 ; SI: BUFFER_STORE_DWORDX2
93 define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
94 %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
95 store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
99 ; SI-LABEL: @dynamic_insertelement_v4i32:
100 ; SI: BUFFER_STORE_DWORDX4
101 define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
102 %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
103 store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
107 ; SI-LABEL: @dynamic_insertelement_v8i32:
108 ; FIXMESI: BUFFER_STORE_DWORDX4
109 ; FIXMESI: BUFFER_STORE_DWORDX4
110 define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
111 %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
112 store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
116 ; SI-LABEL: @dynamic_insertelement_v16i32:
117 ; FIXMESI: BUFFER_STORE_DWORDX4
118 ; FIXMESI: BUFFER_STORE_DWORDX4
119 ; FIXMESI: BUFFER_STORE_DWORDX4
120 ; FIXMESI: BUFFER_STORE_DWORDX4
121 define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
122 %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
123 store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
128 ; SI-LABEL: @dynamic_insertelement_v2i16:
129 ; FIXMESI: BUFFER_STORE_DWORDX2
130 define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
131 %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
132 store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
136 ; SI-LABEL: @dynamic_insertelement_v4i16:
137 ; FIXMESI: BUFFER_STORE_DWORDX4
138 define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
139 %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
140 store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
145 ; SI-LABEL: @dynamic_insertelement_v2i8:
146 ; FIXMESI: BUFFER_STORE_USHORT
147 define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
148 %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
149 store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
153 ; SI-LABEL: @dynamic_insertelement_v4i8:
154 ; FIXMESI: BUFFER_STORE_DWORD
155 define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
156 %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
157 store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
161 ; SI-LABEL: @dynamic_insertelement_v8i8:
162 ; FIXMESI: BUFFER_STORE_DWORDX2
163 define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
164 %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
165 store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
169 ; SI-LABEL: @dynamic_insertelement_v16i8:
170 ; FIXMESI: BUFFER_STORE_DWORDX4
171 define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
172 %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
173 store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16