1 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
2 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE
3 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
4 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE
5 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
6 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE
7 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
8 ; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE
11 ; Note: Lots of tests use inline asm instead of regular calls.
12 ; This allows to have a better control on what the allocation will do.
13 ; Otherwise, we may have spill right in the entry block, defeating
14 ; shrink-wrapping. Moreover, some of the inline asm statements (nop)
15 ; are here to ensure that the related paths do not end up as critical
17 ; Also disable the late if-converter as it makes harder to reason on
20 ; Initial motivating example: Simple diamond with a call just on one side.
23 ; Compare the arguments and jump to exit.
26 ; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
29 ; CHECK: push {r7, lr}
30 ; CHECK-NEXT: mov r7, sp
32 ; Compare the arguments and jump to exit.
33 ; After the prologue is set.
36 ; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
38 ; Store %a in the alloca.
39 ; ARM-ENABLE: push {r0}
40 ; THUMB-ENABLE: str r0, [sp, #-4]
41 ; DISABLE: str r0, [sp]
42 ; Set the alloca address in the second argument.
43 ; CHECK-NEXT: mov r1, sp
44 ; Set the first argument to zero.
45 ; CHECK-NEXT: mov{{s?}} r0, #0
46 ; CHECK-NEXT: bl{{x?}} _doSomething
48 ; With shrink-wrapping, epilogue is just after the call.
49 ; ARM-ENABLE-NEXT: mov sp, r7
50 ; THUMB-ENABLE-NEXT: add sp, #4
51 ; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}
53 ; CHECK: [[EXIT_LABEL]]:
55 ; Without shrink-wrapping, epilogue is in the exit block.
56 ; Epilogue code. (What we pop does not matter.)
57 ; ARM-DISABLE: mov sp, r7
58 ; THUMB-DISABLE: add sp,
59 ; DISABLE-NEXT: pop {r7, pc}
62 define i32 @foo(i32 %a, i32 %b) {
63 %tmp = alloca i32, align 4
64 %tmp2 = icmp slt i32 %a, %b
65 br i1 %tmp2, label %true, label %false
68 store i32 %a, i32* %tmp, align 4
69 %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
73 %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
77 ; Function Attrs: optsize
78 declare i32 @doSomething(i32, i32*)
81 ; Check that we do not perform the restore inside the loop whereas the save
83 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
85 ; Shrink-wrapping allows to skip the prologue in the else case.
86 ; ARM-ENABLE: cmp r0, #0
87 ; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
88 ; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
91 ; Make sure we save the CSR used in the inline asm: r4.
92 ; CHECK: push {r4, r7, lr}
93 ; CHECK-NEXT: add r7, sp, #4
95 ; ARM-DISABLE: cmp r0, #0
96 ; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
97 ; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
99 ; SUM is in r0 because it is coalesced with the second
100 ; argument on the else path.
101 ; CHECK: mov{{s?}} [[SUM:r0]], #0
102 ; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
105 ; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
106 ; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
107 ; ARM: subs [[IV]], [[IV]], #1
108 ; THUMB: subs [[IV]], #1
109 ; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
110 ; THUMB-NEXT: add [[SUM]], [[TMP]]
111 ; CHECK-NEXT: bne [[LOOP]]
115 ; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
116 ; ENABLE-NEXT: pop {r4, r7, pc}
118 ; Duplicated epilogue.
119 ; DISABLE: pop {r4, r7, pc}
121 ; CHECK: [[ELSE_LABEL]]: @ %if.else
122 ; Shift second argument by one and store into returned register.
123 ; CHECK: lsl{{s?}} r0, r1, #1
124 ; DISABLE-NEXT: pop {r4, r7, pc}
127 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
129 %tobool = icmp eq i32 %cond, 0
130 br i1 %tobool, label %if.else, label %for.preheader
133 tail call void asm "nop", ""()
136 for.body: ; preds = %entry, %for.body
137 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
138 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
139 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
140 %add = add nsw i32 %call, %sum.04
141 %inc = add nuw nsw i32 %i.05, 1
142 %exitcond = icmp eq i32 %inc, 10
143 br i1 %exitcond, label %for.end, label %for.body
145 for.end: ; preds = %for.body
146 %shl = shl i32 %add, 3
149 if.else: ; preds = %entry
150 %mul = shl nsw i32 %N, 1
153 if.end: ; preds = %if.else, %for.end
154 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
158 declare i32 @something(...)
160 ; Check that we do not perform the shrink-wrapping inside the loop even
161 ; though that would be legal. The cost model must prevent that.
162 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
164 ; Make sure we save the CSR used in the inline asm: r4.
166 ; CHECK: mov{{s?}} [[SUM:r0]], #0
167 ; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
170 ; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
171 ; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
172 ; ARM: subs [[IV]], [[IV]], #1
173 ; THUMB: subs [[IV]], #1
174 ; ARM: add [[SUM]], [[TMP]], [[SUM]]
175 ; THUMB: add [[SUM]], [[TMP]]
176 ; CHECK-NEXT: bne [[LOOP_LABEL]]
181 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
183 br label %for.preheader
186 tail call void asm "nop", ""()
189 for.body: ; preds = %for.body, %entry
190 %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
191 %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
192 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
193 %add = add nsw i32 %call, %sum.03
194 %inc = add nuw nsw i32 %i.04, 1
195 %exitcond = icmp eq i32 %inc, 10
196 br i1 %exitcond, label %for.exit, label %for.body
199 tail call void asm "nop", ""()
202 for.end: ; preds = %for.body
206 ; Check with a more complex case that we do not have save within the loop and
208 ; CHECK-LABEL: loopInfoSaveOutsideLoop:
210 ; ARM-ENABLE: cmp r0, #0
211 ; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
212 ; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
215 ; Make sure we save the CSR used in the inline asm: r4.
216 ; CHECK: push {r4, r7, lr}
217 ; CHECK-NEXT: add r7, sp, #4
219 ; ARM-DISABLE: cmp r0, #0
220 ; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
221 ; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
223 ; SUM is in r0 because it is coalesced with the second
224 ; argument on the else path.
225 ; CHECK: mov{{s?}} [[SUM:r0]], #0
226 ; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
229 ; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
230 ; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
231 ; ARM: subs [[IV]], [[IV]], #1
232 ; THUMB: subs [[IV]], #1
233 ; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
234 ; THUMB-NEXT: add [[SUM]], [[TMP]]
235 ; CHECK-NEXT: bne [[LOOP]]
239 ; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
240 ; ENABLE: pop {r4, r7, pc}
242 ; Duplicated epilogue.
243 ; DISABLE: pop {r4, r7, pc}
245 ; CHECK: [[ELSE_LABEL]]: @ %if.else
246 ; Shift second argument by one and store into returned register.
247 ; CHECK: lsl{{s?}} r0, r1, #1
248 ; DISABLE-NEXT: pop {r4, r7, pc}
251 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
253 %tobool = icmp eq i32 %cond, 0
254 br i1 %tobool, label %if.else, label %for.preheader
257 tail call void asm "nop", ""()
260 for.body: ; preds = %entry, %for.body
261 %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
262 %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
263 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
264 %add = add nsw i32 %call, %sum.04
265 %inc = add nuw nsw i32 %i.05, 1
266 %exitcond = icmp eq i32 %inc, 10
267 br i1 %exitcond, label %for.end, label %for.body
269 for.end: ; preds = %for.body
270 tail call void asm "nop", "~{r4}"()
271 %shl = shl i32 %add, 3
274 if.else: ; preds = %entry
275 %mul = shl nsw i32 %N, 1
278 if.end: ; preds = %if.else, %for.end
279 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
283 declare void @somethingElse(...)
285 ; Check with a more complex case that we do not have restore within the loop and
287 ; CHECK-LABEL: loopInfoRestoreOutsideLoop:
289 ; ARM-ENABLE: cmp r0, #0
290 ; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
291 ; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
294 ; Make sure we save the CSR used in the inline asm: r4.
295 ; CHECK: push {r4, r7, lr}
296 ; CHECK-NEXT: add r7, sp, #4
298 ; ARM-DISABLE: cmp r0, #0
299 ; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
300 ; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
302 ; SUM is in r0 because it is coalesced with the second
303 ; argument on the else path.
304 ; CHECK: mov{{s?}} [[SUM:r0]], #0
305 ; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
308 ; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
309 ; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
310 ; ARM: subs [[IV]], [[IV]], #1
311 ; THUMB: subs [[IV]], #1
312 ; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
313 ; THUMB-NEXT: add [[SUM]], [[TMP]]
314 ; CHECK-NEXT: bne [[LOOP]]
318 ; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
319 ; ENABLE-NEXT: pop {r4, r7, pc}
321 ; Duplicated epilogue.
322 ; DISABLE: pop {r4, r7, pc}
324 ; CHECK: [[ELSE_LABEL]]: @ %if.else
325 ; Shift second argument by one and store into returned register.
326 ; CHECK: lsl{{s?}} r0, r1, #1
327 ; DISABLE-NEXT: pop {r4, r7, pc}
330 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
332 %tobool = icmp eq i32 %cond, 0
333 br i1 %tobool, label %if.else, label %if.then
335 if.then: ; preds = %entry
336 tail call void asm "nop", "~{r4}"()
339 for.body: ; preds = %for.body, %if.then
340 %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
341 %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
342 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
343 %add = add nsw i32 %call, %sum.04
344 %inc = add nuw nsw i32 %i.05, 1
345 %exitcond = icmp eq i32 %inc, 10
346 br i1 %exitcond, label %for.end, label %for.body
348 for.end: ; preds = %for.body
349 %shl = shl i32 %add, 3
352 if.else: ; preds = %entry
353 %mul = shl nsw i32 %N, 1
356 if.end: ; preds = %if.else, %for.end
357 %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
361 ; Check that we handle function with no frame information correctly.
362 ; CHECK-LABEL: emptyFrame:
364 ; CHECK-NEXT: mov{{s?}} r0, #0
366 define i32 @emptyFrame() {
371 ; Check that we handle inline asm correctly.
372 ; CHECK-LABEL: inlineAsm:
374 ; ARM-ENABLE: cmp r0, #0
375 ; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
376 ; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
379 ; Make sure we save the CSR used in the inline asm: r4.
380 ; CHECK: push {r4, r7, lr}
381 ; CHECK-NEXT: add r7, sp, #4
383 ; ARM-DISABLE: cmp r0, #0
384 ; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
385 ; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
387 ; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10
390 ; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
391 ; ARM: subs [[IV]], [[IV]], #1
392 ; THUMB: subs [[IV]], #1
393 ; CHECK: add{{(\.w)?}} r4, r4, #1
394 ; CHECK: bne [[LOOP]]
397 ; CHECK: mov{{s?}} r0, #0
399 ; Duplicated epilogue.
400 ; DISABLE: pop {r4, r7, pc}
402 ; CHECK: [[ELSE_LABEL]]: @ %if.else
403 ; Shift second argument by one and store into returned register.
404 ; CHECK: lsl{{s?}} r0, r1, #1
405 ; DISABLE-NEXT: pop {r4, r7, pc}
408 define i32 @inlineAsm(i32 %cond, i32 %N) {
410 %tobool = icmp eq i32 %cond, 0
411 br i1 %tobool, label %if.else, label %for.preheader
414 tail call void asm "nop", ""()
417 for.body: ; preds = %entry, %for.body
418 %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
419 tail call void asm sideeffect "add r4, #1", "~{r4}"()
420 %inc = add nuw nsw i32 %i.03, 1
421 %exitcond = icmp eq i32 %inc, 10
422 br i1 %exitcond, label %for.exit, label %for.body
425 tail call void asm "nop", ""()
428 if.else: ; preds = %entry
429 %mul = shl nsw i32 %N, 1
432 if.end: ; preds = %for.body, %if.else
433 %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
437 ; Check that we handle calls to variadic functions correctly.
438 ; CHECK-LABEL: callVariadicFunc:
440 ; ARM-ENABLE: cmp r0, #0
441 ; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
442 ; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
445 ; CHECK: push {r7, lr}
446 ; CHECK-NEXT: mov r7, sp
447 ; CHECK-NEXT: sub sp, {{(sp, )?}}#12
449 ; ARM-DISABLE: cmp r0, #0
450 ; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
451 ; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
453 ; Setup of the varags.
455 ; CHECK-NEXT: mov r2, r1
456 ; CHECK-NEXT: mov r3, r1
457 ; ARM-NEXT: str r1, [sp]
458 ; ARM-NEXT: str r1, [sp, #4]
459 ; THUMB-NEXT: strd r1, r1, [sp]
460 ; CHECK-NEXT: str r1, [sp, #8]
461 ; CHECK-NEXT: bl{{x?}} _someVariadicFunc
462 ; CHECK-NEXT: lsl{{s?}} r0, r0, #3
463 ; ARM-NEXT: mov sp, r7
464 ; THUMB-NEXT: add sp, #12
465 ; CHECK-NEXT: pop {r7, pc}
467 ; CHECK: [[ELSE_LABEL]]: @ %if.else
468 ; Shift second argument by one and store into returned register.
469 ; CHECK: lsl{{s?}} r0, r1, #1
474 ; ARM-DISABLE-NEXT: mov sp, r7
475 ; THUMB-DISABLE-NEXT: add sp, #12
476 ; DISABLE-NEXT: pop {r7, pc}
477 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
479 %tobool = icmp eq i32 %cond, 0
480 br i1 %tobool, label %if.else, label %if.then
482 if.then: ; preds = %entry
483 %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
484 %shl = shl i32 %call, 3
487 if.else: ; preds = %entry
488 %mul = shl nsw i32 %N, 1
491 if.end: ; preds = %if.else, %if.then
492 %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
496 declare i32 @someVariadicFunc(i32, ...)
498 ; Make sure we do not insert unreachable code after noreturn function.
499 ; Although this is not incorrect to insert such code, it is useless
500 ; and it hurts the binary size.
502 ; CHECK-LABEL: noreturn:
505 ; CHECK: tst{{(\.w)?}} r0, #255
506 ; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]]
508 ; CHECK: mov{{s?}} r0, #42
514 ; CHECK: [[ABORT]]: @ %if.abort
518 ; CHECK: bl{{x?}} _abort
520 define i32 @noreturn(i8 signext %bad_thing) {
522 %tobool = icmp eq i8 %bad_thing, 0
523 br i1 %tobool, label %if.end, label %if.abort
526 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
527 tail call void @abort() #0
534 declare void @abort() #0
536 attributes #0 = { noreturn nounwind }
538 ; Make sure that we handle infinite loops properly When checking that the Save
539 ; and Restore blocks are control flow equivalent, the loop searches for the
540 ; immediate (post) dominator for the (restore) save blocks. When either the Save
541 ; or Restore block is located in an infinite loop the only immediate (post)
542 ; dominator is itself. In this case, we cannot perform shrink wrapping, but we
543 ; should return gracefully and continue compilation.
544 ; The only condition for this test is the compilation finishes correctly.
545 ; CHECK-LABEL: infiniteloop
547 define void @infiniteloop() {
549 br i1 undef, label %if.then, label %if.end
552 %ptr = alloca i32, i32 4
555 for.body: ; preds = %for.body, %entry
556 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
557 %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
558 %add = add nsw i32 %call, %sum.03
559 store i32 %add, i32* %ptr
566 ; Another infinite loop test this time with a body bigger than just one block.
567 ; CHECK-LABEL: infiniteloop2
569 define void @infiniteloop2() {
571 br i1 undef, label %if.then, label %if.end
574 %ptr = alloca i32, i32 4
577 for.body: ; preds = %for.body, %entry
578 %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
579 %call = tail call i32 asm "mov $0, #0", "=r,~{r4}"()
580 %add = add nsw i32 %call, %sum.03
581 store i32 %add, i32* %ptr
582 br i1 undef, label %body1, label %body2
585 tail call void asm sideeffect "nop", "~{r4}"()
589 tail call void asm sideeffect "nop", "~{r4}"()
596 ; Another infinite loop test this time with two nested infinite loop.
597 ; CHECK-LABEL: infiniteloop3
599 define void @infiniteloop3() {
601 br i1 undef, label %loop2a, label %body
603 body: ; preds = %entry
604 br i1 undef, label %loop2a, label %end
606 loop1: ; preds = %loop2a, %loop2b
607 %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
608 %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
609 %0 = icmp eq i32* %var, null
610 %next.load = load i32*, i32** undef
611 br i1 %0, label %loop2a, label %loop2b
613 loop2a: ; preds = %loop1, %body, %entry
614 %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
615 %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
618 loop2b: ; preds = %loop1
619 %gep1 = bitcast i32* %var.phi to i32*
620 %next.ptr = bitcast i32* %gep1 to i32**
621 store i32* %next.phi, i32** %next.ptr