test/Transforms/SROA/vector-promotion.ll

   1 ; RUN: opt < %s -sroa -S | FileCheck %s
   2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
   3
   4 %S1 = type { i64, [42 x float] }
   5
   6 define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
   7 ; CHECK: @test1
   8 entry:
   9         %a = alloca [2 x <4 x i32>]
  10 ; CHECK-NOT: alloca
  11
  12   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
  13   store <4 x i32> %x, <4 x i32>* %a.x
  14   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
  15   store <4 x i32> %y, <4 x i32>* %a.y
  16 ; CHECK-NOT: store
  17
  18   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
  19   %tmp1 = load i32* %a.tmp1
  20   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
  21   %tmp2 = load i32* %a.tmp2
  22   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
  23   %tmp3 = load i32* %a.tmp3
  24 ; CHECK-NOT: load
  25 ; CHECK:      extractelement <4 x i32> %x, i32 2
  26 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
  27 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
  28
  29   %tmp4 = add i32 %tmp1, %tmp2
  30   %tmp5 = add i32 %tmp3, %tmp4
  31   ret i32 %tmp5
  32 ; CHECK-NEXT: add
  33 ; CHECK-NEXT: add
  34 ; CHECK-NEXT: ret
  35 }
  36
  37 define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
  38 ; CHECK: @test2
  39 entry:
  40         %a = alloca [2 x <4 x i32>]
  41 ; CHECK-NOT: alloca
  42
  43   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
  44   store <4 x i32> %x, <4 x i32>* %a.x
  45   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
  46   store <4 x i32> %y, <4 x i32>* %a.y
  47 ; CHECK-NOT: store
  48
  49   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
  50   %tmp1 = load i32* %a.tmp1
  51   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
  52   %tmp2 = load i32* %a.tmp2
  53   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
  54   %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
  55   %tmp3.vec = load <2 x i32>* %a.tmp3.cast
  56   %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
  57 ; CHECK-NOT: load
  58 ; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
  59 ; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
  60 ; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
  61 ; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
  62
  63   %tmp4 = add i32 %tmp1, %tmp2
  64   %tmp5 = add i32 %tmp3, %tmp4
  65   ret i32 %tmp5
  66 ; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
  67 ; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
  68 ; CHECK-NEXT: ret i32 %[[sum2]]
  69 }
  70
  71 define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
  72 ; CHECK: @test3
  73 entry:
  74         %a = alloca [2 x <4 x i32>]
  75 ; CHECK-NOT: alloca
  76
  77   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
  78   store <4 x i32> %x, <4 x i32>* %a.x
  79   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
  80   store <4 x i32> %y, <4 x i32>* %a.y
  81 ; CHECK-NOT: store
  82
  83   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
  84   call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i32 1, i1 false)
  85 ; CHECK-NOT: memset
  86
  87   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
  88   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
  89   call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
  90   %tmp1 = load i32* %a.tmp1
  91   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
  92   %tmp2 = load i32* %a.tmp2
  93   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
  94   %tmp3 = load i32* %a.tmp3
  95 ; CHECK-NOT: load
  96 ; CHECK:      %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
  97 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
  98 ; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3
  99 ; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0
 100
 101   %tmp4 = add i32 %tmp1, %tmp2
 102   %tmp5 = add i32 %tmp3, %tmp4
 103   ret i32 %tmp5
 104 ; CHECK-NEXT: add
 105 ; CHECK-NEXT: add
 106 ; CHECK-NEXT: ret
 107 }
 108
 109 define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
 110 ; CHECK: @test4
 111 entry:
 112         %a = alloca [2 x <4 x i32>]
 113 ; CHECK-NOT: alloca
 114
 115   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
 116   store <4 x i32> %x, <4 x i32>* %a.x
 117   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
 118   store <4 x i32> %y, <4 x i32>* %a.y
 119 ; CHECK-NOT: store
 120
 121   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
 122   %z.cast = bitcast <4 x i32>* %z to i8*
 123   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i32 1, i1 false)
 124 ; CHECK-NOT: memcpy
 125
 126   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
 127   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
 128   %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
 129   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
 130   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
 131   %tmp1 = load i32* %a.tmp1
 132   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
 133   %tmp2 = load i32* %a.tmp2
 134   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
 135   %tmp3 = load i32* %a.tmp3
 136 ; CHECK-NOT: memcpy
 137 ; CHECK:      %[[load:.*]] = load <4 x i32>* %z
 138 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
 139 ; CHECK-NEXT: %[[element_load:.*]] = load i32* %[[gep]]
 140 ; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
 141 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
 142 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
 143 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
 144
 145   %tmp4 = add i32 %tmp1, %tmp2
 146   %tmp5 = add i32 %tmp3, %tmp4
 147   ret i32 %tmp5
 148 ; CHECK-NEXT: add
 149 ; CHECK-NEXT: add
 150 ; CHECK-NEXT: ret
 151 }
 152
 153 define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
 154 ; CHECK: @test5
 155 ; The same as the above, but with reversed source and destination for the
 156 ; element memcpy, and a self copy.
 157 entry:
 158         %a = alloca [2 x <4 x i32>]
 159 ; CHECK-NOT: alloca
 160
 161   %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
 162   store <4 x i32> %x, <4 x i32>* %a.x
 163   %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
 164   store <4 x i32> %y, <4 x i32>* %a.y
 165 ; CHECK-NOT: store
 166
 167   %a.y.cast = bitcast <4 x i32>* %a.y to i8*
 168   %a.x.cast = bitcast <4 x i32>* %a.x to i8*
 169   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i32 1, i1 false)
 170 ; CHECK-NOT: memcpy
 171
 172   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
 173   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
 174   %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
 175   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
 176   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
 177   %tmp1 = load i32* %a.tmp1
 178   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
 179   %tmp2 = load i32* %a.tmp2
 180   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
 181   %tmp3 = load i32* %a.tmp3
 182 ; CHECK-NOT: memcpy
 183 ; CHECK:      %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
 184 ; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
 185 ; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]]
 186 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 2
 187 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
 188 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
 189
 190   %tmp4 = add i32 %tmp1, %tmp2
 191   %tmp5 = add i32 %tmp3, %tmp4
 192   ret i32 %tmp5
 193 ; CHECK-NEXT: add
 194 ; CHECK-NEXT: add
 195 ; CHECK-NEXT: ret
 196 }
 197
 198 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 199 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 200
 201 define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
 202 ; CHECK: @test6
 203 ; The old scalarrepl pass would wrongly drop the store to the second alloca.
 204 ; PR13254
 205   %tmp = alloca { <4 x i64>, <4 x i64> }
 206   %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0
 207   store <4 x i64> %x, <4 x i64>* %p0
 208 ; CHECK: store <4 x i64> %x,
 209   %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1
 210   store <4 x i64> %y, <4 x i64>* %p1
 211 ; CHECK: store <4 x i64> %y,
 212   %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
 213   %res = load i64* %addr, align 4
 214   ret i64 %res
 215 }
 216
 217 define <4 x i32> @test_subvec_store() {
 218 ; CHECK: @test_subvec_store
 219 entry:
 220   %a = alloca <4 x i32>
 221 ; CHECK-NOT: alloca
 222
 223   %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
 224   %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
 225   store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
 226 ; CHECK-NOT: store
 227 ; CHECK:     select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 228
 229   %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
 230   %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
 231   store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
 232 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 233
 234   %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
 235   %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
 236   store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
 237 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 238
 239   %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3
 240   store i32 3, i32* %a.gep3
 241 ; CHECK-NEXT: insertelement <4 x i32>
 242
 243   %ret = load <4 x i32>* %a
 244
 245   ret <4 x i32> %ret
 246 ; CHECK-NEXT: ret <4 x i32>
 247 }
 248
 249 define <4 x i32> @test_subvec_load() {
 250 ; CHECK: @test_subvec_load
 251 entry:
 252   %a = alloca <4 x i32>
 253 ; CHECK-NOT: alloca
 254   store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
 255 ; CHECK-NOT: store
 256
 257   %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
 258   %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
 259   %first = load <2 x i32>* %a.cast0
 260 ; CHECK-NOT: load
 261 ; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
 262
 263   %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
 264   %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
 265   %second = load <2 x i32>* %a.cast1
 266 ; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
 267
 268   %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
 269   %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
 270   %third = load <2 x i32>* %a.cast2
 271 ; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 272
 273   %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
 274   %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 275 ; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
 276 ; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 277
 278   ret <4 x i32> %ret
 279 ; CHECK-NEXT: ret <4 x i32> %[[ret]]
 280 }
 281
 282 declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i32, i1) nounwind
 283
 284 define <4 x float> @test_subvec_memset() {
 285 ; CHECK: @test_subvec_memset
 286 entry:
 287   %a = alloca <4 x float>
 288 ; CHECK-NOT: alloca
 289
 290   %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
 291   %a.cast0 = bitcast float* %a.gep0 to i8*
 292   call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
 293 ; CHECK-NOT: store
 294 ; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 295
 296   %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
 297   %a.cast1 = bitcast float* %a.gep1 to i8*
 298   call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
 299 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 300
 301   %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
 302   %a.cast2 = bitcast float* %a.gep2 to i8*
 303   call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
 304 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 305
 306   %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
 307   %a.cast3 = bitcast float* %a.gep3 to i8*
 308   call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
 309 ; CHECK-NEXT: insertelement <4 x float>
 310
 311   %ret = load <4 x float>* %a
 312
 313   ret <4 x float> %ret
 314 ; CHECK-NEXT: ret <4 x float>
 315 }
 316
 317 define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
 318 ; CHECK: @test_subvec_memcpy
 319 entry:
 320   %a = alloca <4 x float>
 321 ; CHECK-NOT: alloca
 322
 323   %a.gep0 = getelementptr <4 x float>* %a, i32 0, i32 0
 324   %a.cast0 = bitcast float* %a.gep0 to i8*
 325   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
 326 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
 327 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
 328 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 329 ; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 330
 331   %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
 332   %a.cast1 = bitcast float* %a.gep1 to i8*
 333   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
 334 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
 335 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
 336 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
 337 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 338
 339   %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
 340   %a.cast2 = bitcast float* %a.gep2 to i8*
 341   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
 342 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
 343 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
 344 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 345 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 346
 347   %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
 348   %a.cast3 = bitcast float* %a.gep3 to i8*
 349   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
 350 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
 351 ; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
 352 ; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float>
 353
 354   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
 355 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
 356 ; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
 357 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
 358
 359   %ret = load <4 x float>* %a
 360
 361   ret <4 x float> %ret
 362 ; CHECK-NEXT: ret <4 x float> %[[insert_f]]
 363 }
 364
 365 define i32 @PR14212() {
 366 ; CHECK: @PR14212
 367 ; This caused a crash when "splitting" the load of the i32 in order to promote
 368 ; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
 369 entry:
 370   %retval = alloca <3 x i8>, align 4
 371 ; CHECK-NOT: alloca
 372
 373   store <3 x i8> undef, <3 x i8>* %retval, align 4
 374   %cast = bitcast <3 x i8>* %retval to i32*
 375   %load = load i32* %cast, align 4
 376   ret i32 %load
 377 ; CHECK: ret i32
 378 }
 379
 380 define <2 x i8> @PR14349.1(i32 %x) {
 381 ; CHECK: @PR14349.1
 382 ; The first testcase for broken SROA rewriting of split integer loads and
 383 ; stores due to smaller vector loads and stores. This particular test ensures
 384 ; that we can rewrite a split store of an integer to a store of a vector.
 385 entry:
 386   %a = alloca i32
 387 ; CHECK-NOT: alloca
 388
 389   store i32 %x, i32* %a
 390 ; CHECK-NOT: store
 391
 392   %cast = bitcast i32* %a to <2 x i8>*
 393   %vec = load <2 x i8>* %cast
 394 ; CHECK-NOT: load
 395
 396   ret <2 x i8> %vec
 397 ; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
 398 ; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
 399 ; CHECK: ret <2 x i8> %[[cast]]
 400 }
 401
 402 define i32 @PR14349.2(<2 x i8> %x) {
 403 ; CHECK: @PR14349.2
 404 ; The first testcase for broken SROA rewriting of split integer loads and
 405 ; stores due to smaller vector loads and stores. This particular test ensures
 406 ; that we can rewrite a split load of an integer to a load of a vector.
 407 entry:
 408   %a = alloca i32
 409 ; CHECK-NOT: alloca
 410
 411   %cast = bitcast i32* %a to <2 x i8>*
 412   store <2 x i8> %x, <2 x i8>* %cast
 413 ; CHECK-NOT: store
 414
 415   %int = load i32* %a
 416 ; CHECK-NOT: load
 417
 418   ret i32 %int
 419 ; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
 420 ; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
 421 ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
 422 ; CHECK: ret i32 %[[insert]]
 423 }