test/CodeGen/X86/MergeConsecutiveStores.ll

   1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
   2
   3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
   4 target triple = "x86_64-apple-macosx10.8.0"
   5
   6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
   7 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
   8
   9 ; CHECK: merge_const_store
  10 ; save 1,2,3 ... as one big integer.
  11 ; CHECK: movabsq $578437695752307201
  12 ; CHECK: ret
  13 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
  14   %1 = icmp sgt i32 %count, 0
  15   br i1 %1, label %.lr.ph, label %._crit_edge
  16 .lr.ph:
  17   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
  18   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
  19   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
  20   store i8 1, i8* %2, align 1
  21   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
  22   store i8 2, i8* %3, align 1
  23   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
  24   store i8 3, i8* %4, align 1
  25   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
  26   store i8 4, i8* %5, align 1
  27   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
  28   store i8 5, i8* %6, align 1
  29   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
  30   store i8 6, i8* %7, align 1
  31   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
  32   store i8 7, i8* %8, align 1
  33   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
  34   store i8 8, i8* %9, align 1
  35   %10 = add nsw i32 %i.02, 1
  36   %11 = getelementptr inbounds %struct.A* %.01, i64 1
  37   %exitcond = icmp eq i32 %10, %count
  38   br i1 %exitcond, label %._crit_edge, label %.lr.ph
  39 ._crit_edge:
  40   ret void
  41 }
  42
  43 ; No vectors because we use noimplicitfloat
  44 ; CHECK: merge_const_store_no_vec
  45 ; CHECK-NOT: vmovups
  46 ; CHECK: ret
  47 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
  48   %1 = icmp sgt i32 %count, 0
  49   br i1 %1, label %.lr.ph, label %._crit_edge
  50 .lr.ph:
  51   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
  52   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
  53   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
  54   store i32 0, i32* %2, align 4
  55   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
  56   store i32 0, i32* %3, align 4
  57   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
  58   store i32 0, i32* %4, align 4
  59   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
  60   store i32 0, i32* %5, align 4
  61   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
  62   store i32 0, i32* %6, align 4
  63   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
  64   store i32 0, i32* %7, align 4
  65   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
  66   store i32 0, i32* %8, align 4
  67   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
  68   store i32 0, i32* %9, align 4
  69   %10 = add nsw i32 %i.02, 1
  70   %11 = getelementptr inbounds %struct.B* %.01, i64 1
  71   %exitcond = icmp eq i32 %10, %count
  72   br i1 %exitcond, label %._crit_edge, label %.lr.ph
  73 ._crit_edge:
  74   ret void
  75 }
  76
  77 ; Move the constants using a single vector store.
  78 ; CHECK: merge_const_store_vec
  79 ; CHECK: vmovups
  80 ; CHECK: ret
  81 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
  82   %1 = icmp sgt i32 %count, 0
  83   br i1 %1, label %.lr.ph, label %._crit_edge
  84 .lr.ph:
  85   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
  86   %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
  87   %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
  88   store i32 0, i32* %2, align 4
  89   %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
  90   store i32 0, i32* %3, align 4
  91   %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
  92   store i32 0, i32* %4, align 4
  93   %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
  94   store i32 0, i32* %5, align 4
  95   %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
  96   store i32 0, i32* %6, align 4
  97   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
  98   store i32 0, i32* %7, align 4
  99   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
 100   store i32 0, i32* %8, align 4
 101   %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
 102   store i32 0, i32* %9, align 4
 103   %10 = add nsw i32 %i.02, 1
 104   %11 = getelementptr inbounds %struct.B* %.01, i64 1
 105   %exitcond = icmp eq i32 %10, %count
 106   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 107 ._crit_edge:
 108   ret void
 109 }
 110
 111 ; Move the first 4 constants as a single vector. Move the rest as scalars.
 112 ; CHECK: merge_nonconst_store
 113 ; CHECK: movl $67305985
 114 ; CHECK: movb
 115 ; CHECK: movb
 116 ; CHECK: movb
 117 ; CHECK: movb
 118 ; CHECK: ret
 119 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
 120   %1 = icmp sgt i32 %count, 0
 121   br i1 %1, label %.lr.ph, label %._crit_edge
 122 .lr.ph:
 123   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
 124   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
 125   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
 126   store i8 1, i8* %2, align 1
 127   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
 128   store i8 2, i8* %3, align 1
 129   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
 130   store i8 3, i8* %4, align 1
 131   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
 132   store i8 4, i8* %5, align 1
 133   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
 134   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
 135   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
 136   store i8 6, i8* %7, align 1
 137   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
 138   store i8 7, i8* %8, align 1
 139   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
 140   store i8 8, i8* %9, align 1
 141   %10 = add nsw i32 %i.02, 1
 142   %11 = getelementptr inbounds %struct.A* %.01, i64 1
 143   %exitcond = icmp eq i32 %10, %count
 144   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 145 ._crit_edge:
 146   ret void
 147 }
 148
 149
 150 ;CHECK-LABEL: merge_loads_i16:
 151 ; load:
 152 ;CHECK: movw
 153 ; store:
 154 ;CHECK: movw
 155 ;CHECK: ret
 156 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
 157   %1 = icmp sgt i32 %count, 0
 158   br i1 %1, label %.lr.ph, label %._crit_edge
 159
 160 .lr.ph:                                           ; preds = %0
 161   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
 162   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
 163   br label %4
 164
 165 ; <label>:4                                       ; preds = %4, %.lr.ph
 166   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
 167   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
 168   %5 = load i8* %2, align 1
 169   %6 = load i8* %3, align 1
 170   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
 171   store i8 %5, i8* %7, align 1
 172   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
 173   store i8 %6, i8* %8, align 1
 174   %9 = add nsw i32 %i.02, 1
 175   %10 = getelementptr inbounds %struct.A* %.01, i64 1
 176   %exitcond = icmp eq i32 %9, %count
 177   br i1 %exitcond, label %._crit_edge, label %4
 178
 179 ._crit_edge:                                      ; preds = %4, %0
 180   ret void
 181 }
 182
 183 ; The loads and the stores are interleved. Can't merge them.
 184 ;CHECK-LABEL: no_merge_loads:
 185 ;CHECK: movb
 186 ;CHECK: movb
 187 ;CHECK: movb
 188 ;CHECK: movb
 189 ;CHECK: ret
 190 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
 191   %1 = icmp sgt i32 %count, 0
 192   br i1 %1, label %.lr.ph, label %._crit_edge
 193
 194 .lr.ph:                                           ; preds = %0
 195   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
 196   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
 197   br label %a4
 198
 199 a4:                                       ; preds = %4, %.lr.ph
 200   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
 201   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
 202   %a5 = load i8* %2, align 1
 203   %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
 204   store i8 %a5, i8* %a7, align 1
 205   %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
 206   %a6 = load i8* %3, align 1
 207   store i8 %a6, i8* %a8, align 1
 208   %a9 = add nsw i32 %i.02, 1
 209   %a10 = getelementptr inbounds %struct.A* %.01, i64 1
 210   %exitcond = icmp eq i32 %a9, %count
 211   br i1 %exitcond, label %._crit_edge, label %a4
 212
 213 ._crit_edge:                                      ; preds = %4, %0
 214   ret void
 215 }
 216
 217
 218 ;CHECK-LABEL: merge_loads_integer:
 219 ; load:
 220 ;CHECK: movq
 221 ; store:
 222 ;CHECK: movq
 223 ;CHECK: ret
 224 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
 225   %1 = icmp sgt i32 %count, 0
 226   br i1 %1, label %.lr.ph, label %._crit_edge
 227
 228 .lr.ph:                                           ; preds = %0
 229   %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
 230   %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
 231   br label %4
 232
 233 ; <label>:4                                       ; preds = %4, %.lr.ph
 234   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
 235   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
 236   %5 = load i32* %2
 237   %6 = load i32* %3
 238   %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
 239   store i32 %5, i32* %7
 240   %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
 241   store i32 %6, i32* %8
 242   %9 = add nsw i32 %i.02, 1
 243   %10 = getelementptr inbounds %struct.B* %.01, i64 1
 244   %exitcond = icmp eq i32 %9, %count
 245   br i1 %exitcond, label %._crit_edge, label %4
 246
 247 ._crit_edge:                                      ; preds = %4, %0
 248   ret void
 249 }
 250
 251
 252 ;CHECK-LABEL: merge_loads_vector:
 253 ; load:
 254 ;CHECK: movups
 255 ; store:
 256 ;CHECK: movups
 257 ;CHECK: ret
 258 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
 259   %a1 = icmp sgt i32 %count, 0
 260   br i1 %a1, label %.lr.ph, label %._crit_edge
 261
 262 .lr.ph:                                           ; preds = %0
 263   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
 264   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
 265   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
 266   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
 267   br label %block4
 268
 269 block4:                                       ; preds = %4, %.lr.ph
 270   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
 271   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
 272   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
 273   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
 274   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
 275   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
 276   %b1 = load i32* %a2
 277   %b2 = load i32* %a3
 278   %b3 = load i32* %a4
 279   %b4 = load i32* %a5
 280   store i32 %b1, i32* %a7
 281   store i32 %b2, i32* %a8
 282   store i32 %b3, i32* %a9
 283   store i32 %b4, i32* %a10
 284   %c9 = add nsw i32 %i.02, 1
 285   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
 286   %exitcond = icmp eq i32 %c9, %count
 287   br i1 %exitcond, label %._crit_edge, label %block4
 288
 289 ._crit_edge:                                      ; preds = %4, %0
 290   ret void
 291 }
 292
 293 ;CHECK-LABEL: merge_loads_no_align:
 294 ; load:
 295 ;CHECK: movl
 296 ;CHECK: movl
 297 ;CHECK: movl
 298 ;CHECK: movl
 299 ; store:
 300 ;CHECK: movl
 301 ;CHECK: movl
 302 ;CHECK: movl
 303 ;CHECK: movl
 304 ;CHECK: ret
 305 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
 306   %a1 = icmp sgt i32 %count, 0
 307   br i1 %a1, label %.lr.ph, label %._crit_edge
 308
 309 .lr.ph:                                           ; preds = %0
 310   %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
 311   %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
 312   %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
 313   %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
 314   br label %block4
 315
 316 block4:                                       ; preds = %4, %.lr.ph
 317   %i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
 318   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
 319   %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
 320   %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
 321   %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
 322   %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
 323   %b1 = load i32* %a2, align 1
 324   %b2 = load i32* %a3, align 1
 325   %b3 = load i32* %a4, align 1
 326   %b4 = load i32* %a5, align 1
 327   store i32 %b1, i32* %a7, align 1
 328   store i32 %b2, i32* %a8, align 1
 329   store i32 %b3, i32* %a9, align 1
 330   store i32 %b4, i32* %a10, align 1
 331   %c9 = add nsw i32 %i.02, 1
 332   %c10 = getelementptr inbounds %struct.B* %.01, i64 1
 333   %exitcond = icmp eq i32 %c9, %count
 334   br i1 %exitcond, label %._crit_edge, label %block4
 335
 336 ._crit_edge:                                      ; preds = %4, %0
 337   ret void
 338 }
 339
 340 ; Make sure that we merge the consecutive load/store sequence below and use a
 341 ; word (16 bit) instead of a byte copy.
 342 ; CHECK: MergeLoadStoreBaseIndexOffset
 343 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 344 ; CHECK: movw    [[REG]], (%{{.*}})
 345 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
 346   br label %1
 347
 348 ; <label>:1
 349   %.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
 350   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
 351   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
 352   %2 = getelementptr inbounds i64* %.0, i64 1
 353   %3 = load i64* %.0, align 1
 354   %4 = getelementptr inbounds i8* %c, i64 %3
 355   %5 = load i8* %4, align 1
 356   %6 = add i64 %3, 1
 357   %7 = getelementptr inbounds i8* %c, i64 %6
 358   %8 = load i8* %7, align 1
 359   store i8 %5, i8* %.08, align 1
 360   %9 = getelementptr inbounds i8* %.08, i64 1
 361   store i8 %8, i8* %9, align 1
 362   %10 = getelementptr inbounds i8* %.08, i64 2
 363   %11 = add nsw i32 %.09, -1
 364   %12 = icmp eq i32 %11, 0
 365   br i1 %12, label %13, label %1
 366
 367 ; <label>:13
 368   ret void
 369 }
 370
 371 ; Make sure that we merge the consecutive load/store sequence below and use a
 372 ; word (16 bit) instead of a byte copy even if there are intermediate sign
 373 ; extensions.
 374 ; CHECK: MergeLoadStoreBaseIndexOffsetSext
 375 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 376 ; CHECK: movw    [[REG]], (%{{.*}})
 377 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
 378   br label %1
 379
 380 ; <label>:1
 381   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
 382   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
 383   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
 384   %2 = getelementptr inbounds i8* %.0, i64 1
 385   %3 = load i8* %.0, align 1
 386   %4 = sext i8 %3 to i64
 387   %5 = getelementptr inbounds i8* %c, i64 %4
 388   %6 = load i8* %5, align 1
 389   %7 = add i64 %4, 1
 390   %8 = getelementptr inbounds i8* %c, i64 %7
 391   %9 = load i8* %8, align 1
 392   store i8 %6, i8* %.08, align 1
 393   %10 = getelementptr inbounds i8* %.08, i64 1
 394   store i8 %9, i8* %10, align 1
 395   %11 = getelementptr inbounds i8* %.08, i64 2
 396   %12 = add nsw i32 %.09, -1
 397   %13 = icmp eq i32 %12, 0
 398   br i1 %13, label %14, label %1
 399
 400 ; <label>:14
 401   ret void
 402 }
 403
 404 ; However, we can only merge ignore sign extensions when they are on all memory
 405 ; computations;
 406 ; CHECK: loadStoreBaseIndexOffsetSextNoSex
 407 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 408 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
 409 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
 410   br label %1
 411
 412 ; <label>:1
 413   %.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
 414   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
 415   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
 416   %2 = getelementptr inbounds i8* %.0, i64 1
 417   %3 = load i8* %.0, align 1
 418   %4 = sext i8 %3 to i64
 419   %5 = getelementptr inbounds i8* %c, i64 %4
 420   %6 = load i8* %5, align 1
 421   %7 = add i8 %3, 1
 422   %wrap.4 = sext i8 %7 to i64
 423   %8 = getelementptr inbounds i8* %c, i64 %wrap.4
 424   %9 = load i8* %8, align 1
 425   store i8 %6, i8* %.08, align 1
 426   %10 = getelementptr inbounds i8* %.08, i64 1
 427   store i8 %9, i8* %10, align 1
 428   %11 = getelementptr inbounds i8* %.08, i64 2
 429   %12 = add nsw i32 %.09, -1
 430   %13 = icmp eq i32 %12, 0
 431   br i1 %13, label %14, label %1
 432
 433 ; <label>:14
 434   ret void
 435 }