test/CodeGen/X86/lower-bitcast.ll

   1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
   2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
   3
   4
   5 define double @test1(double %A) {
   6   %1 = bitcast double %A to <2 x i32>
   7   %add = add <2 x i32> %1, <i32 3, i32 5>
   8   %2 = bitcast <2 x i32> %add to double
   9   ret double %2
  10 }
  11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
  12 ; single paddd instruction. At the moment we produce the sequence
  13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
  14 ;
  15 ; CHECK-LABEL: test1
  16 ; CHECK-NOT: movsd
  17 ; CHECK: pshufd
  18 ; CHECK-NEXT: paddd
  19 ; CHECK-NEXT: pshufd
  20 ; CHECK-NEXT: ret
  21 ;
  22 ; CHECK-WIDE-LABEL: test1
  23 ; CHECK-WIDE-NOT: movsd
  24 ; CHECK-WIDE: paddd
  25 ; CHECK-WIDE-NEXT: ret
  26
  27
  28 define double @test2(double %A, double %B) {
  29   %1 = bitcast double %A to <2 x i32>
  30   %2 = bitcast double %B to <2 x i32>
  31   %add = add <2 x i32> %1, %2
  32   %3 = bitcast <2 x i32> %add to double
  33   ret double %3
  34 }
  35 ; CHECK-LABEL: test2
  36 ; CHECK-NOT: movsd
  37 ; CHECK: paddd
  38 ; CHECK-NEXT: ret
  39 ;
  40 ; CHECK-WIDE-LABEL: test2
  41 ; CHECK-WIDE-NOT: movsd
  42 ; CHECK-WIDE: paddd
  43 ; CHECK-WIDE-NEXT: ret
  44
  45
  46 define i64 @test3(i64 %A) {
  47   %1 = bitcast i64 %A to <2 x float>
  48   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  49   %2 = bitcast <2 x float> %add to i64
  50   ret i64 %2
  51 }
  52 ; CHECK-LABEL: test3
  53 ; CHECK-NOT: pshufd
  54 ; CHECK: addps
  55 ; CHECK-NOT: pshufd
  56 ; CHECK: ret
  57 ;
  58 ; CHECK-WIDE-LABEL: test3
  59 ; CHECK-WIDE-NOT: pshufd
  60 ; CHECK-WIDE: addps
  61 ; CHECK-WIDE-NOT: pshufd
  62 ; CHECK-WIDE: ret
  63
  64
  65 define i64 @test4(i64 %A) {
  66   %1 = bitcast i64 %A to <2 x i32>
  67   %add = add <2 x i32> %1, <i32 3, i32 5>
  68   %2 = bitcast <2 x i32> %add to i64
  69   ret i64 %2
  70 }
  71 ; FIXME: At the moment we still produce the sequence paddd+pshufd.
  72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
  73 ; the widening legalization.
  74 ;
  75 ; CHECK-LABEL: test4
  76 ; CHECK: movd
  77 ; CHECK-NOT: pshufd
  78 ; CHECK-NEXT: paddd
  79 ; CHECK-NEXT: pshufd
  80 ; CHECK: ret
  81 ;
  82 ; CHECK-WIDE-LABEL: test4
  83 ; CHECK-WIDE: movd %{{rdi|rcx}},
  84 ; CHECK-WIDE-NEXT: paddd
  85 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax
  86 ; CHECK-WIDE: ret
  87
  88
  89 define double @test5(double %A) {
  90   %1 = bitcast double %A to <2 x float>
  91   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  92   %2 = bitcast <2 x float> %add to double
  93   ret double %2
  94 }
  95 ; CHECK-LABEL: test5
  96 ; CHECK: addps
  97 ; CHECK-NEXT: ret
  98 ;
  99 ; CHECK-WIDE-LABEL: test5
 100 ; CHECK-WIDE: addps
 101 ; CHECK-WIDE-NEXT: ret
 102
 103
 104 define double @test6(double %A) {
 105   %1 = bitcast double %A to <4 x i16>
 106   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
 107   %2 = bitcast <4 x i16> %add to double
 108   ret double %2
 109 }
 110 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
 111 ; single paddw instruction. This is fixed with the widening legalization.
 112 ;
 113 ; CHECK-LABEL: test6
 114 ; CHECK-NOT: movsd
 115 ; CHECK: punpcklwd
 116 ; CHECK-NEXT: paddw
 117 ; CHECK-NEXT: pshufb
 118 ; CHECK-NEXT: ret
 119 ;
 120 ; CHECK-WIDE-LABEL: test6
 121 ; CHECK-WIDE-NOT: mov
 122 ; CHECK-WIDE-NOT: punpcklwd
 123 ; CHECK-WIDE: paddw
 124 ; CHECK-WIDE-NEXT: ret
 125
 126
 127 define double @test7(double %A, double %B) {
 128   %1 = bitcast double %A to <4 x i16>
 129   %2 = bitcast double %B to <4 x i16>
 130   %add = add <4 x i16> %1, %2
 131   %3 = bitcast <4 x i16> %add to double
 132   ret double %3
 133 }
 134 ; CHECK-LABEL: test7
 135 ; CHECK-NOT: movsd
 136 ; CHECK-NOT: punpcklwd
 137 ; CHECK: paddw
 138 ; CHECK-NEXT: ret
 139 ;
 140 ; CHECK-WIDE-LABEL: test7
 141 ; CHECK-WIDE-NOT: movsd
 142 ; CHECK-WIDE-NOT: punpcklwd
 143 ; CHECK-WIDE: paddw
 144 ; CHECK-WIDE-NEXT: ret
 145
 146
 147 define double @test8(double %A) {
 148   %1 = bitcast double %A to <8 x i8>
 149   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
 150   %2 = bitcast <8 x i8> %add to double
 151   ret double %2
 152 }
 153 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 154 ; single paddb instruction. At the moment we produce the sequence
 155 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
 156 ;
 157 ; CHECK-LABEL: test8
 158 ; CHECK-NOT: movsd
 159 ; CHECK: punpcklbw
 160 ; CHECK-NEXT: paddb
 161 ; CHECK-NEXT: pshufb
 162 ; CHECK-NEXT: ret
 163 ;
 164 ; CHECK-WIDE-LABEL: test8
 165 ; CHECK-WIDE-NOT: movsd
 166 ; CHECK-WIDE-NOT: punpcklbw
 167 ; CHECK-WIDE: paddb
 168 ; CHECK-WIDE-NEXT: ret
 169
 170
 171 define double @test9(double %A, double %B) {
 172   %1 = bitcast double %A to <8 x i8>
 173   %2 = bitcast double %B to <8 x i8>
 174   %add = add <8 x i8> %1, %2
 175   %3 = bitcast <8 x i8> %add to double
 176   ret double %3
 177 }
 178 ; CHECK-LABEL: test9
 179 ; CHECK-NOT: movsd
 180 ; CHECK-NOT: punpcklbw
 181 ; CHECK: paddb
 182 ; CHECK-NEXT: ret
 183 ;
 184 ; CHECK-WIDE-LABEL: test9
 185 ; CHECK-WIDE-NOT: movsd
 186 ; CHECK-WIDE-NOT: punpcklbw
 187 ; CHECK-WIDE: paddb
 188 ; CHECK-WIDE-NEXT: ret
 189