; CHECK: load float
; CHECK: load float
; CHECK: call void @unknown
-define void @test(float * %a, float * %b) {
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+define void @test(float * %a, float * %b, float * %c, float * %d) {
entry:
+ ; Don't vectorize these loads.
%l0 = load float, float* %a
%a1 = getelementptr inbounds float, float* %a, i64 1
%l1 = load float, float* %a1
call void @unknown()
call void @unknown()
+ ; Don't vectorize these stores because their operands are too far away.
store float %l0, float* %b
%b1 = getelementptr inbounds float, float* %b, i64 1
store float %l1, float* %b1
store float %l2, float* %b2
%b3 = getelementptr inbounds float, float* %b, i64 3
store float %l3, float* %b3
+
+ ; But still vectorize the following instructions, because even if the budget
+ ; is exceeded there is a minimum region size.
+ %l4 = load float, float* %c
+ %c1 = getelementptr inbounds float, float* %c, i64 1
+ %l5 = load float, float* %c1
+ %c2 = getelementptr inbounds float, float* %c, i64 2
+ %l6 = load float, float* %c2
+ %c3 = getelementptr inbounds float, float* %c, i64 3
+ %l7 = load float, float* %c3
+
+ store float %l4, float* %d
+ %d1 = getelementptr inbounds float, float* %d, i64 1
+ store float %l5, float* %d1
+ %d2 = getelementptr inbounds float, float* %d, i64 2
+ store float %l6, float* %d2
+ %d3 = getelementptr inbounds float, float* %d, i64 3
+ store float %l7, float* %d3
+
ret void
}