1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
2 target triple = "x86_64-apple-macosx10.8.0"
3 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
6 ; CHECK: tiny_tree_fully_vectorizable
7 ; CHECK: load <2 x double>
8 ; CHECK: store <2 x double>
11 define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
13 %cmp12 = icmp eq i64 %count, 0
14 br i1 %cmp12, label %for.end, label %for.body
16 for.body: ; preds = %entry, %for.body
17 %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
18 %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
19 %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
20 %0 = load double* %src.addr.013, align 8
21 store double %0, double* %dst.addr.014, align 8
22 %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 1
23 %1 = load double* %arrayidx2, align 8
24 %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1
25 store double %1, double* %arrayidx3, align 8
26 %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
27 %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
28 %inc = add i64 %i.015, 1
29 %exitcond = icmp eq i64 %inc, %count
30 br i1 %exitcond, label %for.end, label %for.body
32 for.end: ; preds = %for.body, %entry
36 ; CHECK: tiny_tree_fully_vectorizable2
37 ; CHECK: load <4 x float>
38 ; CHECK: store <4 x float>
41 define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
43 %cmp20 = icmp eq i64 %count, 0
44 br i1 %cmp20, label %for.end, label %for.body
46 for.body: ; preds = %entry, %for.body
47 %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
48 %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
49 %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
50 %0 = load float* %src.addr.021, align 4
51 store float %0, float* %dst.addr.022, align 4
52 %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 1
53 %1 = load float* %arrayidx2, align 4
54 %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
55 store float %1, float* %arrayidx3, align 4
56 %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
57 %2 = load float* %arrayidx4, align 4
58 %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
59 store float %2, float* %arrayidx5, align 4
60 %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
61 %3 = load float* %arrayidx6, align 4
62 %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
63 store float %3, float* %arrayidx7, align 4
64 %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
65 %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
66 %inc = add i64 %i.023, 1
67 %exitcond = icmp eq i64 %inc, %count
68 br i1 %exitcond, label %for.end, label %for.body
70 for.end: ; preds = %for.body, %entry
74 ; We do not vectorize the tiny tree which is not fully vectorizable.
75 ; CHECK: tiny_tree_not_fully_vectorizable
76 ; CHECK-NOT: <2 x double>
79 define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
81 %cmp12 = icmp eq i64 %count, 0
82 br i1 %cmp12, label %for.end, label %for.body
84 for.body: ; preds = %entry, %for.body
85 %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
86 %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
87 %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
88 %0 = load double* %src.addr.013, align 8
89 store double %0, double* %dst.addr.014, align 8
90 %arrayidx2 = getelementptr inbounds double* %src.addr.013, i64 2
91 %1 = load double* %arrayidx2, align 8
92 %arrayidx3 = getelementptr inbounds double* %dst.addr.014, i64 1
93 store double %1, double* %arrayidx3, align 8
94 %add.ptr = getelementptr inbounds double* %src.addr.013, i64 %i.015
95 %add.ptr4 = getelementptr inbounds double* %dst.addr.014, i64 %i.015
96 %inc = add i64 %i.015, 1
97 %exitcond = icmp eq i64 %inc, %count
98 br i1 %exitcond, label %for.end, label %for.body
100 for.end: ; preds = %for.body, %entry
105 ; CHECK: tiny_tree_not_fully_vectorizable2
106 ; CHECK-NOT: <2 x double>
109 define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
111 %cmp20 = icmp eq i64 %count, 0
112 br i1 %cmp20, label %for.end, label %for.body
114 for.body: ; preds = %entry, %for.body
115 %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
116 %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
117 %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
118 %0 = load float* %src.addr.021, align 4
119 store float %0, float* %dst.addr.022, align 4
120 %arrayidx2 = getelementptr inbounds float* %src.addr.021, i64 4
121 %1 = load float* %arrayidx2, align 4
122 %arrayidx3 = getelementptr inbounds float* %dst.addr.022, i64 1
123 store float %1, float* %arrayidx3, align 4
124 %arrayidx4 = getelementptr inbounds float* %src.addr.021, i64 2
125 %2 = load float* %arrayidx4, align 4
126 %arrayidx5 = getelementptr inbounds float* %dst.addr.022, i64 2
127 store float %2, float* %arrayidx5, align 4
128 %arrayidx6 = getelementptr inbounds float* %src.addr.021, i64 3
129 %3 = load float* %arrayidx6, align 4
130 %arrayidx7 = getelementptr inbounds float* %dst.addr.022, i64 3
131 store float %3, float* %arrayidx7, align 4
132 %add.ptr = getelementptr inbounds float* %src.addr.021, i64 %i.023
133 %add.ptr8 = getelementptr inbounds float* %dst.addr.022, i64 %i.023
134 %inc = add i64 %i.023, 1
135 %exitcond = icmp eq i64 %inc, %count
136 br i1 %exitcond, label %for.end, label %for.body
138 for.end: ; preds = %for.body, %entry