test/CodeGen/X86/exedepsfix-broadcast.ll

   1 ; RUN: llc -O3 -mtriple=x86_64-apple-macosx -o - < %s -mattr=+avx2 -enable-unsafe-fp-math -mcpu=core2 | FileCheck %s
   2 ; Check that the ExeDepsFix pass correctly fixes the domain for broadcast instructions.
   3 ; <rdar://problem/16354675>
   4
   5 ; CHECK-LABEL: ExeDepsFix_broadcastss
   6 ; CHECK: broadcastss
   7 ; CHECK: vandps
   8 ; CHECK: vmaxps
   9 ; CHECK: ret
  10 define <4 x float> @ExeDepsFix_broadcastss(<4 x float> %arg, <4 x float> %arg2) {
  11   %bitcast = bitcast <4 x float> %arg to <4 x i32>
  12   %and = and <4 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  13   %floatcast = bitcast <4 x i32> %and to <4 x float>
  14   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
  15   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
  16   ret <4 x float> %max
  17 }
  18
  19 ; CHECK-LABEL: ExeDepsFix_broadcastss256
  20 ; CHECK: broadcastss
  21 ; CHECK: vandps
  22 ; CHECK: vmaxps
  23 ; CHECK: ret
  24 define <8 x float> @ExeDepsFix_broadcastss256(<8 x float> %arg, <8 x float> %arg2) {
  25   %bitcast = bitcast <8 x float> %arg to <8 x i32>
  26   %and = and <8 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  27   %floatcast = bitcast <8 x i32> %and to <8 x float>
  28   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
  29   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
  30   ret <8 x float> %max
  31 }
  32
  33
  34 ; CHECK-LABEL: ExeDepsFix_broadcastss_inreg
  35 ; CHECK: broadcastss
  36 ; CHECK: vandps
  37 ; CHECK: vmaxps
  38 ; CHECK: ret
  39 define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %arg2, i32 %broadcastvalue) {
  40   %bitcast = bitcast <4 x float> %arg to <4 x i32>
  41   %in = insertelement <4 x i32> undef, i32 %broadcastvalue, i32 0
  42   %mask = shufflevector <4 x i32> %in, <4 x i32> undef, <4 x i32> zeroinitializer
  43   %and = and <4 x i32> %bitcast, %mask
  44   %floatcast = bitcast <4 x i32> %and to <4 x float>
  45   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
  46   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
  47   ret <4 x float> %max
  48 }
  49
  50 ; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg
  51 ; CHECK: broadcastss
  52 ; CHECK: vandps
  53 ; CHECK: vmaxps
  54 ; CHECK: ret
  55 define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float> %arg2, i32 %broadcastvalue) {
  56   %bitcast = bitcast <8 x float> %arg to <8 x i32>
  57   %in = insertelement <8 x i32> undef, i32 %broadcastvalue, i32 0
  58   %mask = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
  59   %and = and <8 x i32> %bitcast, %mask
  60   %floatcast = bitcast <8 x i32> %and to <8 x float>
  61   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
  62   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
  63   ret <8 x float> %max
  64 }
  65
  66 ; CHECK-LABEL: ExeDepsFix_broadcastsd
  67 ; In that case the broadcast is directly folded into vandpd.
  68 ; CHECK: vandpd
  69 ; CHECK: vmaxpd
  70 ; CHECK:ret
  71 define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) {
  72   %bitcast = bitcast <2 x double> %arg to <2 x i64>
  73   %and = and <2 x i64> %bitcast, <i64 2147483647, i64 2147483647>
  74   %floatcast = bitcast <2 x i64> %and to <2 x double>
  75   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
  76   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
  77   ret <2 x double> %max
  78 }
  79
  80 ; CHECK-LABEL: ExeDepsFix_broadcastsd256
  81 ; CHECK: broadcastsd
  82 ; CHECK: vandpd
  83 ; CHECK: vmaxpd
  84 ; CHECK: ret
  85 define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %arg2) {
  86   %bitcast = bitcast <4 x double> %arg to <4 x i64>
  87   %and = and <4 x i64> %bitcast, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
  88   %floatcast = bitcast <4 x i64> %and to <4 x double>
  89   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
  90   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
  91   ret <4 x double> %max
  92 }
  93
  94
  95 ; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg
  96 ; ExeDepsFix works top down, thus it coalesces vmovlhps domain with
  97 ; vandps and there is nothing more you can do to match vmaxpd.
  98 ; CHECK: vmovlhps
  99 ; CHECK: vandps
 100 ; CHECK: vmaxpd
 101 ; CHECK: ret
 102 define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
 103   %bitcast = bitcast <2 x double> %arg to <2 x i64>
 104   %in = insertelement <2 x i64> undef, i64 %broadcastvalue, i32 0
 105   %mask = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> zeroinitializer
 106   %and = and <2 x i64> %bitcast, %mask
 107   %floatcast = bitcast <2 x i64> %and to <2 x double>
 108   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
 109   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
 110   ret <2 x double> %max
 111 }
 112
 113 ; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg
 114 ; CHECK: broadcastsd
 115 ; CHECK: vandpd
 116 ; CHECK: vmaxpd
 117 ; CHECK: ret
 118 define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x double> %arg2, i64 %broadcastvalue) {
 119   %bitcast = bitcast <4 x double> %arg to <4 x i64>
 120   %in = insertelement <4 x i64> undef, i64 %broadcastvalue, i32 0
 121   %mask = shufflevector <4 x i64> %in, <4 x i64> undef, <4 x i32> zeroinitializer
 122   %and = and <4 x i64> %bitcast, %mask
 123   %floatcast = bitcast <4 x i64> %and to <4 x double>
 124   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
 125   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
 126   ret <4 x double> %max
 127 }
 128