test/CodeGen/X86/lower-bitcast.ll

   1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
   2
   3
   4 define double @test1(double %A) {
   5   %1 = bitcast double %A to <2 x i32>
   6   %add = add <2 x i32> %1, <i32 3, i32 5>
   7   %2 = bitcast <2 x i32> %add to double
   8   ret double %2
   9 }
  10 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
  11 ; single paddd instruction. At the moment we produce the sequence
  12 ; pshufd+paddq+pshufd.
  13
  14 ; CHECK-LABEL: test1
  15 ; CHECK-NOT: movsd
  16 ; CHECK: pshufd
  17 ; CHECK-NEXT: paddq
  18 ; CHECK-NEXT: pshufd
  19 ; CHECK-NEXT: ret
  20
  21
  22 define double @test2(double %A, double %B) {
  23   %1 = bitcast double %A to <2 x i32>
  24   %2 = bitcast double %B to <2 x i32>
  25   %add = add <2 x i32> %1, %2
  26   %3 = bitcast <2 x i32> %add to double
  27   ret double %3
  28 }
  29 ; FIXME: Ideally we should be able to fold the entire body of @test2 into a
  30 ; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
  31 ; sequence pshufd+pshufd+paddq+pshufd.
  32
  33 ; CHECK-LABEL: test2
  34 ; CHECK-NOT: movsd
  35 ; CHECK: pshufd
  36 ; CHECK-NEXT: pshufd
  37 ; CHECK-NEXT: paddq
  38 ; CHECK-NEXT: pshufd
  39 ; CHECK-NEXT: ret
  40
  41
  42 define i64 @test3(i64 %A) {
  43   %1 = bitcast i64 %A to <2 x float>
  44   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  45   %2 = bitcast <2 x float> %add to i64
  46   ret i64 %2
  47 }
  48 ; CHECK-LABEL: test3
  49 ; CHECK-NOT: pshufd
  50 ; CHECK: addps
  51 ; CHECK-NOT: pshufd
  52 ; CHECK: ret
  53
  54
  55 define i64 @test4(i64 %A) {
  56   %1 = bitcast i64 %A to <2 x i32>
  57   %add = add <2 x i32> %1, <i32 3, i32 5>
  58   %2 = bitcast <2 x i32> %add to i64
  59   ret i64 %2
  60 }
  61 ; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
  62 ; Ideally, we should fold that sequence into a single paddd.
  63
  64 ; CHECK-LABEL: test4
  65 ; CHECK: pshufd
  66 ; CHECK-NEXT: paddq
  67 ; CHECK-NEXT: pshufd
  68 ; CHECK: ret
  69
  70
  71 define double @test5(double %A) {
  72   %1 = bitcast double %A to <2 x float>
  73   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  74   %2 = bitcast <2 x float> %add to double
  75   ret double %2
  76 }
  77 ; CHECK-LABEL: test5
  78 ; CHECK: addps
  79 ; CHECK-NEXT: ret
  80
  81
  82 define double @test6(double %A) {
  83   %1 = bitcast double %A to <4 x i16>
  84   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
  85   %2 = bitcast <4 x i16> %add to double
  86   ret double %2
  87 }
  88 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
  89 ; single paddw instruction.
  90
  91 ; CHECK-LABEL: test6
  92 ; CHECK-NOT: movsd
  93 ; CHECK: punpcklwd
  94 ; CHECK-NEXT: paddd
  95 ; CHECK-NEXT: pshufb
  96 ; CHECK-NEXT: ret
  97
  98
  99 define double @test7(double %A, double %B) {
 100   %1 = bitcast double %A to <4 x i16>
 101   %2 = bitcast double %B to <4 x i16>
 102   %add = add <4 x i16> %1, %2
 103   %3 = bitcast <4 x i16> %add to double
 104   ret double %3
 105 }
 106 ; FIXME: Ideally we should be able to fold the entire body of @test7 into a
 107 ; single 'paddw %xmm1, %xmm0' instruction. At the moment we produce the
 108 ; sequence pshufd+pshufd+paddd+pshufd.
 109
 110 ; CHECK-LABEL: test7
 111 ; CHECK-NOT: movsd
 112 ; CHECK: punpcklwd
 113 ; CHECK-NEXT: punpcklwd
 114 ; CHECK-NEXT: paddd
 115 ; CHECK-NEXT: pshufb
 116 ; CHECK-NEXT: ret
 117
 118
 119 define double @test8(double %A) {
 120   %1 = bitcast double %A to <8 x i8>
 121   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
 122   %2 = bitcast <8 x i8> %add to double
 123   ret double %2
 124 }
 125 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 126 ; single paddb instruction. At the moment we produce the sequence
 127 ; pshufd+paddw+pshufd.
 128
 129 ; CHECK-LABEL: test8
 130 ; CHECK-NOT: movsd
 131 ; CHECK: punpcklbw
 132 ; CHECK-NEXT: paddw
 133 ; CHECK-NEXT: pshufb
 134 ; CHECK-NEXT: ret
 135
 136
 137 define double @test9(double %A, double %B) {
 138   %1 = bitcast double %A to <8 x i8>
 139   %2 = bitcast double %B to <8 x i8>
 140   %add = add <8 x i8> %1, %2
 141   %3 = bitcast <8 x i8> %add to double
 142   ret double %3
 143 }
 144 ; FIXME: Ideally we should be able to fold the entire body of @test9 into a
 145 ; single 'paddb %xmm1, %xmm0' instruction. At the moment we produce the
 146 ; sequence pshufd+pshufd+paddw+pshufd.
 147
 148 ; CHECK-LABEL: test9
 149 ; CHECK-NOT: movsd
 150 ; CHECK: punpcklbw
 151 ; CHECK-NEXT: punpcklbw
 152 ; CHECK-NEXT: paddw
 153 ; CHECK-NEXT: pshufb
 154 ; CHECK-NEXT: ret
 155