test/Transforms/SROA/basictest.ll

   1 ; RUN: opt < %s -sroa -S | FileCheck %s
   2 ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
   3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
   4
   5 declare void @llvm.lifetime.start(i64, i8* nocapture)
   6 declare void @llvm.lifetime.end(i64, i8* nocapture)
   7
   8 define i32 @test0() {
   9 ; CHECK: @test0
  10 ; CHECK-NOT: alloca
  11 ; CHECK: ret i32
  12
  13 entry:
  14   %a1 = alloca i32
  15   %a2 = alloca float
  16
  17   %a1.i8 = bitcast i32* %a1 to i8*
  18   call void @llvm.lifetime.start(i64 4, i8* %a1.i8)
  19
  20   store i32 0, i32* %a1
  21   %v1 = load i32* %a1
  22
  23   call void @llvm.lifetime.end(i64 4, i8* %a1.i8)
  24
  25   %a2.i8 = bitcast float* %a2 to i8*
  26   call void @llvm.lifetime.start(i64 4, i8* %a2.i8)
  27
  28   store float 0.0, float* %a2
  29   %v2 = load float * %a2
  30   %v2.int = bitcast float %v2 to i32
  31   %sum1 = add i32 %v1, %v2.int
  32
  33   call void @llvm.lifetime.end(i64 4, i8* %a2.i8)
  34
  35   ret i32 %sum1
  36 }
  37
  38 define i32 @test1() {
  39 ; CHECK: @test1
  40 ; CHECK-NOT: alloca
  41 ; CHECK: ret i32 0
  42
  43 entry:
  44   %X = alloca { i32, float }
  45   %Y = getelementptr { i32, float }* %X, i64 0, i32 0
  46   store i32 0, i32* %Y
  47   %Z = load i32* %Y
  48   ret i32 %Z
  49 }
  50
  51 define i64 @test2(i64 %X) {
  52 ; CHECK: @test2
  53 ; CHECK-NOT: alloca
  54 ; CHECK: ret i64 %X
  55
  56 entry:
  57   %A = alloca [8 x i8]
  58   %B = bitcast [8 x i8]* %A to i64*
  59   store i64 %X, i64* %B
  60   br label %L2
  61
  62 L2:
  63   %Z = load i64* %B
  64   ret i64 %Z
  65 }
  66
  67 define void @test3(i8* %dst, i8* %src) {
  68 ; CHECK: @test3
  69
  70 entry:
  71   %a = alloca [300 x i8]
  72 ; CHECK-NOT:  alloca
  73 ; CHECK:      %[[test3_a1:.*]] = alloca [42 x i8]
  74 ; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
  75 ; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
  76 ; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
  77 ; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
  78 ; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
  79 ; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
  80
  81   %b = getelementptr [300 x i8]* %a, i64 0, i64 0
  82   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
  83 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
  84 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
  85 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
  86 ; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
  87 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
  88 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
  89 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
  90 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 142
  91 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
  92 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
  93 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 158
  94 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
  95 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
  96 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 200
  97 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
  98 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
  99 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 207
 100 ; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
 101 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 208
 102 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
 103 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 104 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 215
 105 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
 106 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
 107
 108   ; Clobber a single element of the array, this should be promotable.
 109   %c = getelementptr [300 x i8]* %a, i64 0, i64 42
 110   store i8 0, i8* %c
 111
 112   ; Make a sequence of overlapping stores to the array. These overlap both in
 113   ; forward strides and in shrinking accesses.
 114   %overlap.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 142
 115   %overlap.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 143
 116   %overlap.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 144
 117   %overlap.4.i8 = getelementptr [300 x i8]* %a, i64 0, i64 145
 118   %overlap.5.i8 = getelementptr [300 x i8]* %a, i64 0, i64 146
 119   %overlap.6.i8 = getelementptr [300 x i8]* %a, i64 0, i64 147
 120   %overlap.7.i8 = getelementptr [300 x i8]* %a, i64 0, i64 148
 121   %overlap.8.i8 = getelementptr [300 x i8]* %a, i64 0, i64 149
 122   %overlap.9.i8 = getelementptr [300 x i8]* %a, i64 0, i64 150
 123   %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
 124   %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
 125   %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
 126   %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
 127   %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
 128   %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
 129   %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
 130   %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
 131   %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
 132   %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
 133   %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
 134   store i8 1, i8* %overlap.1.i8
 135 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
 136 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
 137   store i16 1, i16* %overlap.1.i16
 138 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
 139 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
 140   store i32 1, i32* %overlap.1.i32
 141 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
 142 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
 143   store i64 1, i64* %overlap.1.i64
 144 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
 145 ; CHECK-NEXT: store i64 1, i64* %[[bitcast]]
 146   store i64 2, i64* %overlap.2.i64
 147 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 1
 148 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 149 ; CHECK-NEXT: store i64 2, i64* %[[bitcast]]
 150   store i64 3, i64* %overlap.3.i64
 151 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 2
 152 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 153 ; CHECK-NEXT: store i64 3, i64* %[[bitcast]]
 154   store i64 4, i64* %overlap.4.i64
 155 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 3
 156 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 157 ; CHECK-NEXT: store i64 4, i64* %[[bitcast]]
 158   store i64 5, i64* %overlap.5.i64
 159 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 4
 160 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 161 ; CHECK-NEXT: store i64 5, i64* %[[bitcast]]
 162   store i64 6, i64* %overlap.6.i64
 163 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 5
 164 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 165 ; CHECK-NEXT: store i64 6, i64* %[[bitcast]]
 166   store i64 7, i64* %overlap.7.i64
 167 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 6
 168 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 169 ; CHECK-NEXT: store i64 7, i64* %[[bitcast]]
 170   store i64 8, i64* %overlap.8.i64
 171 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 7
 172 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 173 ; CHECK-NEXT: store i64 8, i64* %[[bitcast]]
 174   store i64 9, i64* %overlap.9.i64
 175 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 8
 176 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
 177 ; CHECK-NEXT: store i64 9, i64* %[[bitcast]]
 178
 179   ; Make two sequences of overlapping stores with more gaps and irregularities.
 180   %overlap2.1.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 200
 181   %overlap2.1.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 201
 182   %overlap2.1.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 202
 183   %overlap2.1.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 203
 184
 185   %overlap2.2.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 208
 186   %overlap2.2.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 209
 187   %overlap2.2.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 210
 188   %overlap2.2.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 211
 189
 190   %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
 191   %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
 192   %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
 193   %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
 194   %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
 195   store i8 1,  i8*  %overlap2.1.0.i8
 196 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
 197 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
 198   store i16 1, i16* %overlap2.1.0.i16
 199 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
 200 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
 201   store i32 1, i32* %overlap2.1.0.i32
 202 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
 203 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
 204   store i32 2, i32* %overlap2.1.1.i32
 205 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 1
 206 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 207 ; CHECK-NEXT: store i32 2, i32* %[[bitcast]]
 208   store i32 3, i32* %overlap2.1.2.i32
 209 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
 210 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 211 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
 212   store i32 4, i32* %overlap2.1.3.i32
 213 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 3
 214 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 215 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
 216
 217   %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
 218   %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
 219   %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
 220   %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
 221   %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
 222   store i32 1, i32* %overlap2.2.0.i32
 223 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
 224 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
 225   store i8 1,  i8*  %overlap2.2.1.i8
 226 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
 227 ; CHECK-NEXT: store i8 1, i8* %[[gep]]
 228   store i16 1, i16* %overlap2.2.1.i16
 229 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
 230 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 231 ; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
 232   store i32 1, i32* %overlap2.2.1.i32
 233 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
 234 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 235 ; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
 236   store i32 3, i32* %overlap2.2.2.i32
 237 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
 238 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 239 ; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
 240   store i32 4, i32* %overlap2.2.3.i32
 241 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 3
 242 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
 243 ; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
 244
 245   %overlap2.prefix = getelementptr i8* %overlap2.1.1.i8, i64 -4
 246   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
 247 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 39
 248 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
 249 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 3
 250 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
 251 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
 252
 253   ; Bridge between the overlapping areas
 254   call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
 255 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
 256 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
 257 ; ...promoted i8 store...
 258 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
 259 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
 260
 261   ; Entirely within the second overlap.
 262   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
 263 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
 264 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
 265
 266   ; Trailing past the second overlap.
 267   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
 268 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
 269 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
 270 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 5
 271 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
 272 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
 273
 274   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
 275 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
 276 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
 277 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
 278 ; CHECK-NEXT: store i8 0, i8* %[[gep]]
 279 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
 280 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
 281 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
 282 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 142
 283 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
 284 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
 285 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 158
 286 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
 287 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
 288 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 200
 289 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
 290 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 291 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 207
 292 ; CHECK-NEXT: store i8 42, i8* %[[gep]]
 293 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 208
 294 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
 295 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 296 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 215
 297 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
 298 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
 299
 300   ret void
 301 }
 302
 303 define void @test4(i8* %dst, i8* %src) {
 304 ; CHECK: @test4
 305
 306 entry:
 307   %a = alloca [100 x i8]
 308 ; CHECK-NOT:  alloca
 309 ; CHECK:      %[[test4_a1:.*]] = alloca [20 x i8]
 310 ; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
 311 ; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
 312 ; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
 313 ; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
 314 ; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
 315
 316   %b = getelementptr [100 x i8]* %a, i64 0, i64 0
 317   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
 318 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
 319 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
 320 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 20
 321 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 322 ; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
 323 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 22
 324 ; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
 325 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 23
 326 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
 327 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 328 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 30
 329 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
 330 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
 331 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 40
 332 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 333 ; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
 334 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
 335 ; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
 336 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
 337 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
 338 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 339 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 50
 340 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 341 ; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
 342 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 52
 343 ; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
 344 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 53
 345 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
 346 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 347 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 60
 348 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
 349 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
 350
 351   %a.src.1 = getelementptr [100 x i8]* %a, i64 0, i64 20
 352   %a.dst.1 = getelementptr [100 x i8]* %a, i64 0, i64 40
 353   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
 354 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
 355 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
 356 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 357
 358   ; Clobber a single element of the array, this should be promotable, and be deleted.
 359   %c = getelementptr [100 x i8]* %a, i64 0, i64 42
 360   store i8 0, i8* %c
 361
 362   %a.src.2 = getelementptr [100 x i8]* %a, i64 0, i64 50
 363   call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
 364 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
 365 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
 366 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 367
 368   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
 369 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
 370 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
 371 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 20
 372 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 373 ; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
 374 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 22
 375 ; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
 376 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 23
 377 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
 378 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 379 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 30
 380 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
 381 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
 382 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 40
 383 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 384 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
 385 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
 386 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 387 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
 388 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
 389 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 390 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 50
 391 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
 392 ; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
 393 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 52
 394 ; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
 395 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 53
 396 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
 397 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 398 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 60
 399 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
 400 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
 401
 402   ret void
 403 }
 404
 405 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 406 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 407 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 408
 409 define i16 @test5() {
 410 ; CHECK: @test5
 411 ; CHECK: alloca float
 412 ; CHECK: ret i16 %
 413
 414 entry:
 415   %a = alloca [4 x i8]
 416   %fptr = bitcast [4 x i8]* %a to float*
 417   store float 0.0, float* %fptr
 418   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 2
 419   %iptr = bitcast i8* %ptr to i16*
 420   %val = load i16* %iptr
 421   ret i16 %val
 422 }
 423
 424 define i32 @test6() {
 425 ; CHECK: @test6
 426 ; CHECK: alloca i32
 427 ; CHECK-NEXT: store volatile i32
 428 ; CHECK-NEXT: load i32*
 429 ; CHECK-NEXT: ret i32
 430
 431 entry:
 432   %a = alloca [4 x i8]
 433   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
 434   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
 435   %iptr = bitcast i8* %ptr to i32*
 436   %val = load i32* %iptr
 437   ret i32 %val
 438 }
 439
 440 define void @test7(i8* %src, i8* %dst) {
 441 ; CHECK: @test7
 442 ; CHECK: alloca i32
 443 ; CHECK-NEXT: bitcast i8* %src to i32*
 444 ; CHECK-NEXT: load volatile i32*
 445 ; CHECK-NEXT: store volatile i32
 446 ; CHECK-NEXT: bitcast i8* %dst to i32*
 447 ; CHECK-NEXT: load volatile i32*
 448 ; CHECK-NEXT: store volatile i32
 449 ; CHECK-NEXT: ret
 450
 451 entry:
 452   %a = alloca [4 x i8]
 453   %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
 454   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
 455   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
 456   ret void
 457 }
 458
 459
 460 %S1 = type { i32, i32, [16 x i8] }
 461 %S2 = type { %S1*, %S2* }
 462
 463 define %S2 @test8(%S2* %s2) {
 464 ; CHECK: @test8
 465 entry:
 466   %new = alloca %S2
 467 ; CHECK-NOT: alloca
 468
 469   %s2.next.ptr = getelementptr %S2* %s2, i64 0, i32 1
 470   %s2.next = load %S2** %s2.next.ptr
 471 ; CHECK:      %[[gep:.*]] = getelementptr %S2* %s2, i64 0, i32 1
 472 ; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
 473
 474   %s2.next.s1.ptr = getelementptr %S2* %s2.next, i64 0, i32 0
 475   %s2.next.s1 = load %S1** %s2.next.s1.ptr
 476   %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0
 477   store %S1* %s2.next.s1, %S1** %new.s1.ptr
 478   %s2.next.next.ptr = getelementptr %S2* %s2.next, i64 0, i32 1
 479   %s2.next.next = load %S2** %s2.next.next.ptr
 480   %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1
 481   store %S2* %s2.next.next, %S2** %new.next.ptr
 482 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 0
 483 ; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
 484 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 1
 485 ; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
 486
 487   %new.s1 = load %S1** %new.s1.ptr
 488   %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
 489 ; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
 490   %new.next = load %S2** %new.next.ptr
 491   %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
 492 ; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
 493   ret %S2 %result2
 494 ; CHECK-NEXT: ret %S2 %[[result2]]
 495 }
 496
 497 define i64 @test9() {
 498 ; Ensure we can handle loads off the end of an alloca even when wrapped in
 499 ; weird bit casts and types. The result is undef, but this shouldn't crash
 500 ; anything.
 501 ; CHECK: @test9
 502 ; CHECK-NOT: alloca
 503 ; CHECK: ret i64 undef
 504
 505 entry:
 506   %a = alloca { [3 x i8] }
 507   %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
 508   store i8 0, i8* %gep1, align 1
 509   %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
 510   store i8 0, i8* %gep2, align 1
 511   %gep3 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 2
 512   store i8 26, i8* %gep3, align 1
 513   %cast = bitcast { [3 x i8] }* %a to { i64 }*
 514   %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
 515   %result = load i64* %elt
 516   ret i64 %result
 517 }
 518
 519 define %S2* @test10() {
 520 ; CHECK: @test10
 521 ; CHECK-NOT: alloca %S2*
 522 ; CHECK: ret %S2* null
 523
 524 entry:
 525   %a = alloca [8 x i8]
 526   %ptr = getelementptr [8 x i8]* %a, i32 0, i32 0
 527   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
 528   %s2ptrptr = bitcast i8* %ptr to %S2**
 529   %s2ptr = load %S2** %s2ptrptr
 530   ret %S2* %s2ptr
 531 }
 532
 533 define i32 @test11() {
 534 ; CHECK: @test11
 535 ; CHECK-NOT: alloca
 536 ; CHECK: ret i32 0
 537
 538 entry:
 539   %X = alloca i32
 540   br i1 undef, label %good, label %bad
 541
 542 good:
 543   %Y = getelementptr i32* %X, i64 0
 544   store i32 0, i32* %Y
 545   %Z = load i32* %Y
 546   ret i32 %Z
 547
 548 bad:
 549   %Y2 = getelementptr i32* %X, i64 1
 550   store i32 0, i32* %Y2
 551   %Z2 = load i32* %Y2
 552   ret i32 %Z2
 553 }
 554
 555 define i32 @test12() {
 556 ; CHECK: @test12
 557 ; CHECK: alloca i24
 558 ;
 559 ; FIXME: SROA should promote accesses to this into whole i24 operations instead
 560 ; of i8 operations.
 561 ; CHECK: store i8 0
 562 ; CHECK: store i8 0
 563 ; CHECK: store i8 0
 564 ;
 565 ; CHECK: load i24*
 566
 567 entry:
 568   %a = alloca [3 x i8]
 569   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
 570   store i8 0, i8* %b0ptr
 571   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
 572   store i8 0, i8* %b1ptr
 573   %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
 574   store i8 0, i8* %b2ptr
 575   %iptr = bitcast [3 x i8]* %a to i24*
 576   %i = load i24* %iptr
 577   %ret = zext i24 %i to i32
 578   ret i32 %ret
 579 }
 580
 581 define i32 @test13() {
 582 ; Ensure we don't crash and handle undefined loads that straddle the end of the
 583 ; allocation.
 584 ; CHECK: @test13
 585 ; CHECK: %[[ret:.*]] = zext i16 undef to i32
 586 ; CHECK: ret i32 %[[ret]]
 587
 588 entry:
 589   %a = alloca [3 x i8]
 590   %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
 591   store i8 0, i8* %b0ptr
 592   %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
 593   store i8 0, i8* %b1ptr
 594   %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
 595   store i8 0, i8* %b2ptr
 596   %iptrcast = bitcast [3 x i8]* %a to i16*
 597   %iptrgep = getelementptr i16* %iptrcast, i64 1
 598   %i = load i16* %iptrgep
 599   %ret = zext i16 %i to i32
 600   ret i32 %ret
 601 }
 602
 603 %test14.struct = type { [3 x i32] }
 604
 605 define void @test14(...) nounwind uwtable {
 606 ; This is a strange case where we split allocas into promotable partitions, but
 607 ; also gain enough data to prove they must be dead allocas due to GEPs that walk
 608 ; across two adjacent allocas. Test that we don't try to promote or otherwise
 609 ; do bad things to these dead allocas, they should just be removed.
 610 ; CHECK: @test14
 611 ; CHECK-NEXT: entry:
 612 ; CHECK-NEXT: ret void
 613
 614 entry:
 615   %a = alloca %test14.struct
 616   %p = alloca %test14.struct*
 617   %0 = bitcast %test14.struct* %a to i8*
 618   %1 = getelementptr i8* %0, i64 12
 619   %2 = bitcast i8* %1 to %test14.struct*
 620   %3 = getelementptr inbounds %test14.struct* %2, i32 0, i32 0
 621   %4 = getelementptr inbounds %test14.struct* %a, i32 0, i32 0
 622   %5 = bitcast [3 x i32]* %3 to i32*
 623   %6 = bitcast [3 x i32]* %4 to i32*
 624   %7 = load i32* %6, align 4
 625   store i32 %7, i32* %5, align 4
 626   %8 = getelementptr inbounds i32* %5, i32 1
 627   %9 = getelementptr inbounds i32* %6, i32 1
 628   %10 = load i32* %9, align 4
 629   store i32 %10, i32* %8, align 4
 630   %11 = getelementptr inbounds i32* %5, i32 2
 631   %12 = getelementptr inbounds i32* %6, i32 2
 632   %13 = load i32* %12, align 4
 633   store i32 %13, i32* %11, align 4
 634   ret void
 635 }
 636
 637 define i32 @test15(i1 %flag) nounwind uwtable {
 638 ; Ensure that when there are dead instructions using an alloca that are not
 639 ; loads or stores we still delete them during partitioning and rewriting.
 640 ; Otherwise we'll go to promote them while thy still have unpromotable uses.
 641 ; CHECK: @test15
 642 ; CHECK-NEXT: entry:
 643 ; CHECK-NEXT:   br label %loop
 644 ; CHECK:      loop:
 645 ; CHECK-NEXT:   br label %loop
 646
 647 entry:
 648   %l0 = alloca i64
 649   %l1 = alloca i64
 650   %l2 = alloca i64
 651   %l3 = alloca i64
 652   br label %loop
 653
 654 loop:
 655   %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
 656
 657   store i64 1879048192, i64* %l0, align 8
 658   %bc0 = bitcast i64* %l0 to i8*
 659   %gep0 = getelementptr i8* %bc0, i64 3
 660   %dead0 = bitcast i8* %gep0 to i64*
 661
 662   store i64 1879048192, i64* %l1, align 8
 663   %bc1 = bitcast i64* %l1 to i8*
 664   %gep1 = getelementptr i8* %bc1, i64 3
 665   %dead1 = getelementptr i8* %gep1, i64 1
 666
 667   store i64 1879048192, i64* %l2, align 8
 668   %bc2 = bitcast i64* %l2 to i8*
 669   %gep2.1 = getelementptr i8* %bc2, i64 1
 670   %gep2.2 = getelementptr i8* %bc2, i64 3
 671   ; Note that this select should get visited multiple times due to using two
 672   ; different GEPs off the same alloca. We should only delete it once.
 673   %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
 674
 675   store i64 1879048192, i64* %l3, align 8
 676   %bc3 = bitcast i64* %l3 to i8*
 677   %gep3 = getelementptr i8* %bc3, i64 3
 678
 679   br label %loop
 680 }
 681
 682 define void @test16(i8* %src, i8* %dst) {
 683 ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
 684 ; CHECK: @test16
 685 ; CHECK-NOT: alloca
 686 ; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
 687 ; CHECK-NEXT: load i24* %[[srccast]]
 688 ; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
 689 ; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
 690 ; CHECK-NEXT: ret void
 691
 692 entry:
 693   %a = alloca [3 x i8]
 694   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
 695   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
 696   %cast = bitcast i8* %ptr to i24*
 697   store i24 0, i24* %cast
 698   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
 699   ret void
 700 }
 701
 702 define void @test17(i8* %src, i8* %dst) {
 703 ; Ensure that we can rewrite unpromotable memcpys which extend past the end of
 704 ; the alloca.
 705 ; CHECK: @test17
 706 ; CHECK:      %[[a:.*]] = alloca [3 x i8]
 707 ; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
 708 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
 709 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]],
 710 ; CHECK-NEXT: ret void
 711
 712 entry:
 713   %a = alloca [3 x i8]
 714   %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
 715   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
 716   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
 717   ret void
 718 }
 719
 720 define void @test18(i8* %src, i8* %dst, i32 %size) {
 721 ; Preserve transfer instrinsics with a variable size, even if they overlap with
 722 ; fixed size operations. Further, continue to split and promote allocas preceding
 723 ; the variable sized intrinsic.
 724 ; CHECK: @test18
 725 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
 726 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
 727 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
 728 ; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
 729 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
 730 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
 731 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
 732 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
 733 ; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
 734 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
 735 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8* %dst, i64 4
 736 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
 737 ; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]]
 738 ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
 739 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size,
 740 ; CHECK-NEXT: ret void
 741
 742 entry:
 743   %a = alloca [42 x i8]
 744   %ptr = getelementptr [42 x i8]* %a, i32 0, i32 0
 745   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
 746   %ptr2 = getelementptr [42 x i8]* %a, i32 0, i32 8
 747   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
 748   call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
 749   %cast = bitcast i8* %ptr to i32*
 750   store i32 42, i32* %cast
 751   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
 752   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
 753   ret void
 754 }
 755