test/CodeGen/X86/lower-bitcast.ll

   1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
   2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
   3
   4
   5 define double @test1(double %A) {
   6   %1 = bitcast double %A to <2 x i32>
   7   %add = add <2 x i32> %1, <i32 3, i32 5>
   8   %2 = bitcast <2 x i32> %add to double
   9   ret double %2
  10 }
  11 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
  12 ; single paddd instruction. At the moment we produce the sequence
  13 ; pshufd+paddq+pshufd. This is fixed with the widening legalization.
  14 ;
  15 ; CHECK-LABEL: test1
  16 ; CHECK-NOT: movsd
  17 ; CHECK: pshufd
  18 ; CHECK-NEXT: paddd
  19 ; CHECK-NEXT: pshufd
  20 ; CHECK-NEXT: ret
  21 ;
  22 ; CHECK-WIDE-LABEL: test1
  23 ; CHECK-WIDE-NOT: movsd
  24 ; CHECK-WIDE: paddd
  25 ; CHECK-WIDE-NEXT: ret
  26
  27
  28 define double @test2(double %A, double %B) {
  29   %1 = bitcast double %A to <2 x i32>
  30   %2 = bitcast double %B to <2 x i32>
  31   %add = add <2 x i32> %1, %2
  32   %3 = bitcast <2 x i32> %add to double
  33   ret double %3
  34 }
  35 ; CHECK-LABEL: test2
  36 ; CHECK-NOT: movsd
  37 ; CHECK: paddd
  38 ; CHECK-NEXT: ret
  39 ;
  40 ; CHECK-WIDE-LABEL: test2
  41 ; CHECK-WIDE-NOT: movsd
  42 ; CHECK-WIDE: paddd
  43 ; CHECK-WIDE-NEXT: ret
  44
  45
  46 define i64 @test3(i64 %A) {
  47   %1 = bitcast i64 %A to <2 x float>
  48   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  49   %2 = bitcast <2 x float> %add to i64
  50   ret i64 %2
  51 }
  52 ; CHECK-LABEL: test3
  53 ; CHECK-NOT: pshufd
  54 ; CHECK: addps
  55 ; CHECK-NOT: pshufd
  56 ; CHECK: ret
  57 ;
  58 ; CHECK-WIDE-LABEL: test3
  59 ; CHECK-WIDE-NOT: pshufd
  60 ; CHECK-WIDE: addps
  61 ; CHECK-WIDE-NOT: pshufd
  62 ; CHECK-WIDE: ret
  63
  64
  65 define i64 @test4(i64 %A) {
  66   %1 = bitcast i64 %A to <2 x i32>
  67   %add = add <2 x i32> %1, <i32 3, i32 5>
  68   %2 = bitcast <2 x i32> %add to i64
  69   ret i64 %2
  70 }
  71 ; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
  72 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
  73 ; the widening legalization.
  74 ;
  75 ; CHECK-LABEL: test4
  76 ; CHECK: pshufd
  77 ; CHECK-NEXT: paddd
  78 ; CHECK-NEXT: pshufd
  79 ; CHECK: ret
  80 ;
  81 ; CHECK-WIDE-LABEL: test4
  82 ; CHECK-WIDE: movd %{{rdi|rcx}},
  83 ; CHECK-WIDE-NEXT: paddd
  84 ; CHECK-WIDE-NEXT: movd {{.*}}, %rax
  85 ; CHECK-WIDE: ret
  86
  87
  88 define double @test5(double %A) {
  89   %1 = bitcast double %A to <2 x float>
  90   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  91   %2 = bitcast <2 x float> %add to double
  92   ret double %2
  93 }
  94 ; CHECK-LABEL: test5
  95 ; CHECK: addps
  96 ; CHECK-NEXT: ret
  97 ;
  98 ; CHECK-WIDE-LABEL: test5
  99 ; CHECK-WIDE: addps
 100 ; CHECK-WIDE-NEXT: ret
 101
 102
 103 define double @test6(double %A) {
 104   %1 = bitcast double %A to <4 x i16>
 105   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
 106   %2 = bitcast <4 x i16> %add to double
 107   ret double %2
 108 }
 109 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
 110 ; single paddw instruction. This is fixed with the widening legalization.
 111 ;
 112 ; CHECK-LABEL: test6
 113 ; CHECK-NOT: movsd
 114 ; CHECK: punpcklwd
 115 ; CHECK-NEXT: paddw
 116 ; CHECK-NEXT: pshufb
 117 ; CHECK-NEXT: ret
 118 ;
 119 ; CHECK-WIDE-LABEL: test6
 120 ; CHECK-WIDE-NOT: mov
 121 ; CHECK-WIDE-NOT: punpcklwd
 122 ; CHECK-WIDE: paddw
 123 ; CHECK-WIDE-NEXT: ret
 124
 125
 126 define double @test7(double %A, double %B) {
 127   %1 = bitcast double %A to <4 x i16>
 128   %2 = bitcast double %B to <4 x i16>
 129   %add = add <4 x i16> %1, %2
 130   %3 = bitcast <4 x i16> %add to double
 131   ret double %3
 132 }
 133 ; CHECK-LABEL: test7
 134 ; CHECK-NOT: movsd
 135 ; CHECK-NOT: punpcklwd
 136 ; CHECK: paddw
 137 ; CHECK-NEXT: ret
 138 ;
 139 ; CHECK-WIDE-LABEL: test7
 140 ; CHECK-WIDE-NOT: movsd
 141 ; CHECK-WIDE-NOT: punpcklwd
 142 ; CHECK-WIDE: paddw
 143 ; CHECK-WIDE-NEXT: ret
 144
 145
 146 define double @test8(double %A) {
 147   %1 = bitcast double %A to <8 x i8>
 148   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
 149   %2 = bitcast <8 x i8> %add to double
 150   ret double %2
 151 }
 152 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 153 ; single paddb instruction. At the moment we produce the sequence
 154 ; pshufd+paddw+pshufd. This is fixed with the widening legalization.
 155 ;
 156 ; CHECK-LABEL: test8
 157 ; CHECK-NOT: movsd
 158 ; CHECK: punpcklbw
 159 ; CHECK-NEXT: paddb
 160 ; CHECK-NEXT: pshufb
 161 ; CHECK-NEXT: ret
 162 ;
 163 ; CHECK-WIDE-LABEL: test8
 164 ; CHECK-WIDE-NOT: movsd
 165 ; CHECK-WIDE-NOT: punpcklbw
 166 ; CHECK-WIDE: paddb
 167 ; CHECK-WIDE-NEXT: ret
 168
 169
 170 define double @test9(double %A, double %B) {
 171   %1 = bitcast double %A to <8 x i8>
 172   %2 = bitcast double %B to <8 x i8>
 173   %add = add <8 x i8> %1, %2
 174   %3 = bitcast <8 x i8> %add to double
 175   ret double %3
 176 }
 177 ; CHECK-LABEL: test9
 178 ; CHECK-NOT: movsd
 179 ; CHECK-NOT: punpcklbw
 180 ; CHECK: paddb
 181 ; CHECK-NEXT: ret
 182 ;
 183 ; CHECK-WIDE-LABEL: test9
 184 ; CHECK-WIDE-NOT: movsd
 185 ; CHECK-WIDE-NOT: punpcklbw
 186 ; CHECK-WIDE: paddb
 187 ; CHECK-WIDE-NEXT: ret
 188