test/CodeGen/X86/avx-vzeroupper.ll

   1 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
   2
   3 declare i32 @foo()
   4 declare <4 x float> @do_sse(<4 x float>)
   5 declare <8 x float> @do_avx(<8 x float>)
   6 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
   7 @x = common global <4 x float> zeroinitializer, align 16
   8 @g = common global <8 x float> zeroinitializer, align 32
   9
  10 ;; Basic checking - don't emit any vzeroupper instruction
  11
  12 ; CHECK: _test00
  13 define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
  14 entry:
  15   ; CHECK-NOT: vzeroupper
  16   %add.i = fadd <4 x float> %a, %b
  17   %call3 = call <4 x float> @do_sse(<4 x float> %add.i) nounwind
  18   ; CHECK: ret
  19   ret <4 x float> %call3
  20 }
  21
  22 ;; Check parameter 256-bit parameter passing
  23
  24 ; CHECK: _test01
  25 define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
  26 entry:
  27   %tmp = load <4 x float>, <4 x float>* @x, align 16
  28   ; CHECK: vzeroupper
  29   ; CHECK-NEXT: callq _do_sse
  30   %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
  31   store <4 x float> %call, <4 x float>* @x, align 16
  32   ; CHECK-NOT: vzeroupper
  33   ; CHECK: callq _do_sse
  34   %call2 = tail call <4 x float> @do_sse(<4 x float> %call) nounwind
  35   store <4 x float> %call2, <4 x float>* @x, align 16
  36   ; CHECK: ret
  37   ret <8 x float> %c
  38 }
  39
  40 ;; Check that vzeroupper is emitted for tail calls.
  41
  42 ; CHECK: _test02
  43 define <4 x float> @test02(<8 x float> %a, <8 x float> %b) nounwind uwtable ssp {
  44 entry:
  45   %add.i = fadd <8 x float> %a, %b
  46   %add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
  47   ; CHECK: vzeroupper
  48   ; CHECK: jmp _do_sse
  49   %call3 = tail call <4 x float> @do_sse(<4 x float> %add.low) nounwind
  50   ret <4 x float> %call3
  51 }
  52
  53 ;; Test the pass convergence and also that vzeroupper is only issued when necessary,
  54 ;; for this function it should be only once
  55
  56 ; CHECK: _test03
  57 define <4 x float> @test03(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
  58 entry:
  59   %add.i = fadd <4 x float> %a, %b
  60   br label %while.cond
  61
  62 while.cond:
  63   %call = tail call i32 @foo()
  64   %tobool = icmp eq i32 %call, 0
  65   br i1 %tobool, label %for.body, label %while.cond
  66
  67 for.body:
  68   ; CHECK: LBB
  69   ; CHECK-NOT: vzeroupper
  70   %i.018 = phi i32 [ 0, %while.cond ], [ %1, %for.body ]
  71   %c.017 = phi <4 x float> [ %add.i, %while.cond ], [ %call14, %for.body ]
  72   ; CHECK: callq _do_sse
  73   %call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
  74   ; CHECK-NEXT: callq _do_sse
  75   %call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
  76   %tmp11 = load <8 x float>, <8 x float>* @g, align 32
  77   %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
  78   ; CHECK: vzeroupper
  79   ; CHECK-NEXT: callq _do_sse
  80   %call14 = tail call <4 x float> @do_sse(<4 x float> %0) nounwind
  81   %1 = add nsw i32 %i.018, 1
  82   %exitcond = icmp eq i32 %1, 4
  83   br i1 %exitcond, label %for.end, label %for.body
  84
  85 for.end:
  86   ret <4 x float> %call14
  87 }
  88
  89 ;; Check that we also perform vzeroupper when we return from a function.
  90
  91 ; CHECK: _test04
  92 define <4 x float> @test04(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
  93 entry:
  94   %shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  95   ; CHECK-NOT: vzeroupper
  96   ; CHECK: call
  97   %call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
  98   %shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  99   ; CHECK: vzeroupper
 100   ; CHECK: ret
 101   ret <4 x float> %shuf2
 102 }