test/CodeGen/AArch64/neon-mul-div.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
   2
   3
   4 define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
   5 ;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b
   6         %tmp3 = mul <8 x i8> %A, %B;
   7         ret <8 x i8> %tmp3
   8 }
   9
  10 define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
  11 ;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b
  12         %tmp3 = mul <16 x i8> %A, %B;
  13         ret <16 x i8> %tmp3
  14 }
  15
  16 define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
  17 ;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h
  18         %tmp3 = mul <4 x i16> %A, %B;
  19         ret <4 x i16> %tmp3
  20 }
  21
  22 define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
  23 ;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h
  24         %tmp3 = mul <8 x i16> %A, %B;
  25         ret <8 x i16> %tmp3
  26 }
  27
  28 define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
  29 ;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
  30         %tmp3 = mul <2 x i32> %A, %B;
  31         ret <2 x i32> %tmp3
  32 }
  33
  34 define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
  35 ;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
  36         %tmp3 = mul <4 x i32> %A, %B;
  37         ret <4 x i32> %tmp3
  38 }
  39
  40  define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
  41 ;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
  42         %tmp3 = fmul <2 x float> %A, %B;
  43         ret <2 x float> %tmp3
  44 }
  45
  46 define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
  47 ;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
  48         %tmp3 = fmul <4 x float> %A, %B;
  49         ret <4 x float> %tmp3
  50 }
  51 define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
  52 ;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
  53         %tmp3 = fmul <2 x double> %A, %B;
  54         ret <2 x double> %tmp3
  55 }
  56
  57
  58  define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
  59 ;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s
  60         %tmp3 = fdiv <2 x float> %A, %B;
  61         ret <2 x float> %tmp3
  62 }
  63
  64 define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
  65 ;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s
  66         %tmp3 = fdiv <4 x float> %A, %B;
  67         ret <4 x float> %tmp3
  68 }
  69 define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
  70 ;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d
  71         %tmp3 = fdiv <2 x double> %A, %B;
  72         ret <2 x double> %tmp3
  73 }
  74
  75 declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>)
  76 declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>)
  77
  78 define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
  79 ; CHECK: poly_mulv8i8:
  80    %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
  81 ; CHECK: pmul v0.8b, v0.8b, v1.8b
  82    ret <8 x i8> %prod
  83 }
  84
  85 define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
  86 ; CHECK: poly_mulv16i8:
  87    %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
  88 ; CHECK: pmul v0.16b, v0.16b, v1.16b
  89    ret <16 x i8> %prod
  90 }
  91
  92 declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>)
  93 declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>)
  94 declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>)
  95 declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>)
  96
  97 define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
  98 ; CHECK: test_sqdmulh_v4i16:
  99    %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 100 ; CHECK: sqdmulh v0.4h, v0.4h, v1.4h
 101    ret <4 x i16> %prod
 102 }
 103
 104 define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 105 ; CHECK: test_sqdmulh_v8i16:
 106    %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 107 ; CHECK: sqdmulh v0.8h, v0.8h, v1.8h
 108    ret <8 x i16> %prod
 109 }
 110
 111 define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 112 ; CHECK: test_sqdmulh_v2i32:
 113    %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 114 ; CHECK: sqdmulh v0.2s, v0.2s, v1.2s
 115    ret <2 x i32> %prod
 116 }
 117
 118 define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 119 ; CHECK: test_sqdmulh_v4i32:
 120    %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 121 ; CHECK: sqdmulh v0.4s, v0.4s, v1.4s
 122    ret <4 x i32> %prod
 123 }
 124
 125 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
 126 declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
 127 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
 128 declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
 129
 130 define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 131 ; CHECK: test_sqrdmulh_v4i16:
 132    %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
 133 ; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h
 134    ret <4 x i16> %prod
 135 }
 136
 137 define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 138 ; CHECK: test_sqrdmulh_v8i16:
 139    %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
 140 ; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h
 141    ret <8 x i16> %prod
 142 }
 143
 144 define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 145 ; CHECK: test_sqrdmulh_v2i32:
 146    %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
 147 ; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s
 148    ret <2 x i32> %prod
 149 }
 150
 151 define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 152 ; CHECK: test_sqrdmulh_v4i32:
 153    %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
 154 ; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s
 155    ret <4 x i32> %prod
 156 }
 157
 158 declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>)
 159 declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>)
 160 declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>)
 161
 162 define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
 163 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 164 ; CHECK: fmulx v0.2s, v0.2s, v1.2s
 165         %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
 166         ret <2 x float> %val
 167 }
 168
 169 define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
 170 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 171 ; CHECK: fmulx v0.4s, v0.4s, v1.4s
 172         %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
 173         ret <4 x float> %val
 174 }
 175
 176 define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
 177 ; Using registers other than v0, v1 and v2 are possible, but would be odd.
 178 ; CHECK: fmulx v0.2d, v0.2d, v1.2d
 179         %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
 180         ret <2 x double> %val
 181 }