test/CodeGen/X86/sse-scalar-fp-arith.ll

   1 ; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   3 ; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
   4
   5 target triple = "x86_64-unknown-unknown"
   6
   7 ; Ensure that the backend no longer emits unnecessary vector insert
   8 ; instructions immediately after SSE scalar fp instructions
   9 ; like addss or mulss.
  10
  11 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
  12 ; SSE-LABEL: test_add_ss:
  13 ; SSE:       # BB#0:
  14 ; SSE-NEXT:    addss %xmm1, %xmm0
  15 ; SSE-NEXT:    retq
  16 ;
  17 ; AVX-LABEL: test_add_ss:
  18 ; AVX:       # BB#0:
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %1 = extractelement <4 x float> %b, i32 0
  22   %2 = extractelement <4 x float> %a, i32 0
  23   %add = fadd float %2, %1
  24   %3 = insertelement <4 x float> %a, float %add, i32 0
  25   ret <4 x float> %3
  26 }
  27
  28 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
  29 ; SSE-LABEL: test_sub_ss:
  30 ; SSE:       # BB#0:
  31 ; SSE-NEXT:    subss %xmm1, %xmm0
  32 ; SSE-NEXT:    retq
  33 ;
  34 ; AVX-LABEL: test_sub_ss:
  35 ; AVX:       # BB#0:
  36 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
  37 ; AVX-NEXT:    retq
  38   %1 = extractelement <4 x float> %b, i32 0
  39   %2 = extractelement <4 x float> %a, i32 0
  40   %sub = fsub float %2, %1
  41   %3 = insertelement <4 x float> %a, float %sub, i32 0
  42   ret <4 x float> %3
  43 }
  44
  45 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
  46 ; SSE-LABEL: test_mul_ss:
  47 ; SSE:       # BB#0:
  48 ; SSE-NEXT:    mulss %xmm1, %xmm0
  49 ; SSE-NEXT:    retq
  50 ;
  51 ; AVX-LABEL: test_mul_ss:
  52 ; AVX:       # BB#0:
  53 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  54 ; AVX-NEXT:    retq
  55   %1 = extractelement <4 x float> %b, i32 0
  56   %2 = extractelement <4 x float> %a, i32 0
  57   %mul = fmul float %2, %1
  58   %3 = insertelement <4 x float> %a, float %mul, i32 0
  59   ret <4 x float> %3
  60 }
  61
  62 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
  63 ; SSE-LABEL: test_div_ss:
  64 ; SSE:       # BB#0:
  65 ; SSE-NEXT:    divss %xmm1, %xmm0
  66 ; SSE-NEXT:    retq
  67 ;
  68 ; AVX-LABEL: test_div_ss:
  69 ; AVX:       # BB#0:
  70 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  71 ; AVX-NEXT:    retq
  72   %1 = extractelement <4 x float> %b, i32 0
  73   %2 = extractelement <4 x float> %a, i32 0
  74   %div = fdiv float %2, %1
  75   %3 = insertelement <4 x float> %a, float %div, i32 0
  76   ret <4 x float> %3
  77 }
  78
  79 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
  80 ; SSE-LABEL: test_add_sd:
  81 ; SSE:       # BB#0:
  82 ; SSE-NEXT:    addsd %xmm1, %xmm0
  83 ; SSE-NEXT:    retq
  84 ;
  85 ; AVX-LABEL: test_add_sd:
  86 ; AVX:       # BB#0:
  87 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  88 ; AVX-NEXT:    retq
  89   %1 = extractelement <2 x double> %b, i32 0
  90   %2 = extractelement <2 x double> %a, i32 0
  91   %add = fadd double %2, %1
  92   %3 = insertelement <2 x double> %a, double %add, i32 0
  93   ret <2 x double> %3
  94 }
  95
  96 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
  97 ; SSE-LABEL: test_sub_sd:
  98 ; SSE:       # BB#0:
  99 ; SSE-NEXT:    subsd %xmm1, %xmm0
 100 ; SSE-NEXT:    retq
 101 ;
 102 ; AVX-LABEL: test_sub_sd:
 103 ; AVX:       # BB#0:
 104 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 105 ; AVX-NEXT:    retq
 106   %1 = extractelement <2 x double> %b, i32 0
 107   %2 = extractelement <2 x double> %a, i32 0
 108   %sub = fsub double %2, %1
 109   %3 = insertelement <2 x double> %a, double %sub, i32 0
 110   ret <2 x double> %3
 111 }
 112
 113 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 114 ; SSE-LABEL: test_mul_sd:
 115 ; SSE:       # BB#0:
 116 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 117 ; SSE-NEXT:    retq
 118 ;
 119 ; AVX-LABEL: test_mul_sd:
 120 ; AVX:       # BB#0:
 121 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 122 ; AVX-NEXT:    retq
 123   %1 = extractelement <2 x double> %b, i32 0
 124   %2 = extractelement <2 x double> %a, i32 0
 125   %mul = fmul double %2, %1
 126   %3 = insertelement <2 x double> %a, double %mul, i32 0
 127   ret <2 x double> %3
 128 }
 129
 130 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 131 ; SSE-LABEL: test_div_sd:
 132 ; SSE:       # BB#0:
 133 ; SSE-NEXT:    divsd %xmm1, %xmm0
 134 ; SSE-NEXT:    retq
 135 ;
 136 ; AVX-LABEL: test_div_sd:
 137 ; AVX:       # BB#0:
 138 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 139 ; AVX-NEXT:    retq
 140   %1 = extractelement <2 x double> %b, i32 0
 141   %2 = extractelement <2 x double> %a, i32 0
 142   %div = fdiv double %2, %1
 143   %3 = insertelement <2 x double> %a, double %div, i32 0
 144   ret <2 x double> %3
 145 }
 146
 147 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
 148 ; SSE-LABEL: test2_add_ss:
 149 ; SSE:       # BB#0:
 150 ; SSE-NEXT:    addss %xmm0, %xmm1
 151 ; SSE-NEXT:    movaps %xmm1, %xmm0
 152 ; SSE-NEXT:    retq
 153 ;
 154 ; AVX-LABEL: test2_add_ss:
 155 ; AVX:       # BB#0:
 156 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 157 ; AVX-NEXT:    retq
 158   %1 = extractelement <4 x float> %a, i32 0
 159   %2 = extractelement <4 x float> %b, i32 0
 160   %add = fadd float %1, %2
 161   %3 = insertelement <4 x float> %b, float %add, i32 0
 162   ret <4 x float> %3
 163 }
 164
 165 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 166 ; SSE-LABEL: test2_sub_ss:
 167 ; SSE:       # BB#0:
 168 ; SSE-NEXT:    subss %xmm0, %xmm1
 169 ; SSE-NEXT:    movaps %xmm1, %xmm0
 170 ; SSE-NEXT:    retq
 171 ;
 172 ; AVX-LABEL: test2_sub_ss:
 173 ; AVX:       # BB#0:
 174 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 175 ; AVX-NEXT:    retq
 176   %1 = extractelement <4 x float> %a, i32 0
 177   %2 = extractelement <4 x float> %b, i32 0
 178   %sub = fsub float %2, %1
 179   %3 = insertelement <4 x float> %b, float %sub, i32 0
 180   ret <4 x float> %3
 181 }
 182
 183 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 184 ; SSE-LABEL: test2_mul_ss:
 185 ; SSE:       # BB#0:
 186 ; SSE-NEXT:    mulss %xmm0, %xmm1
 187 ; SSE-NEXT:    movaps %xmm1, %xmm0
 188 ; SSE-NEXT:    retq
 189 ;
 190 ; AVX-LABEL: test2_mul_ss:
 191 ; AVX:       # BB#0:
 192 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 193 ; AVX-NEXT:    retq
 194   %1 = extractelement <4 x float> %a, i32 0
 195   %2 = extractelement <4 x float> %b, i32 0
 196   %mul = fmul float %1, %2
 197   %3 = insertelement <4 x float> %b, float %mul, i32 0
 198   ret <4 x float> %3
 199 }
 200
 201 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
 202 ; SSE-LABEL: test2_div_ss:
 203 ; SSE:       # BB#0:
 204 ; SSE-NEXT:    divss %xmm0, %xmm1
 205 ; SSE-NEXT:    movaps %xmm1, %xmm0
 206 ; SSE-NEXT:    retq
 207 ;
 208 ; AVX-LABEL: test2_div_ss:
 209 ; AVX:       # BB#0:
 210 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 211 ; AVX-NEXT:    retq
 212   %1 = extractelement <4 x float> %a, i32 0
 213   %2 = extractelement <4 x float> %b, i32 0
 214   %div = fdiv float %2, %1
 215   %3 = insertelement <4 x float> %b, float %div, i32 0
 216   ret <4 x float> %3
 217 }
 218
 219 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
 220 ; SSE-LABEL: test2_add_sd:
 221 ; SSE:       # BB#0:
 222 ; SSE-NEXT:    addsd %xmm0, %xmm1
 223 ; SSE-NEXT:    movaps %xmm1, %xmm0
 224 ; SSE-NEXT:    retq
 225 ;
 226 ; AVX-LABEL: test2_add_sd:
 227 ; AVX:       # BB#0:
 228 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 229 ; AVX-NEXT:    retq
 230   %1 = extractelement <2 x double> %a, i32 0
 231   %2 = extractelement <2 x double> %b, i32 0
 232   %add = fadd double %1, %2
 233   %3 = insertelement <2 x double> %b, double %add, i32 0
 234   ret <2 x double> %3
 235 }
 236
 237 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 238 ; SSE-LABEL: test2_sub_sd:
 239 ; SSE:       # BB#0:
 240 ; SSE-NEXT:    subsd %xmm0, %xmm1
 241 ; SSE-NEXT:    movaps %xmm1, %xmm0
 242 ; SSE-NEXT:    retq
 243 ;
 244 ; AVX-LABEL: test2_sub_sd:
 245 ; AVX:       # BB#0:
 246 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 247 ; AVX-NEXT:    retq
 248   %1 = extractelement <2 x double> %a, i32 0
 249   %2 = extractelement <2 x double> %b, i32 0
 250   %sub = fsub double %2, %1
 251   %3 = insertelement <2 x double> %b, double %sub, i32 0
 252   ret <2 x double> %3
 253 }
 254
 255 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 256 ; SSE-LABEL: test2_mul_sd:
 257 ; SSE:       # BB#0:
 258 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 259 ; SSE-NEXT:    movaps %xmm1, %xmm0
 260 ; SSE-NEXT:    retq
 261 ;
 262 ; AVX-LABEL: test2_mul_sd:
 263 ; AVX:       # BB#0:
 264 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 265 ; AVX-NEXT:    retq
 266   %1 = extractelement <2 x double> %a, i32 0
 267   %2 = extractelement <2 x double> %b, i32 0
 268   %mul = fmul double %1, %2
 269   %3 = insertelement <2 x double> %b, double %mul, i32 0
 270   ret <2 x double> %3
 271 }
 272
 273 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
 274 ; SSE-LABEL: test2_div_sd:
 275 ; SSE:       # BB#0:
 276 ; SSE-NEXT:    divsd %xmm0, %xmm1
 277 ; SSE-NEXT:    movaps %xmm1, %xmm0
 278 ; SSE-NEXT:    retq
 279 ;
 280 ; AVX-LABEL: test2_div_sd:
 281 ; AVX:       # BB#0:
 282 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 283 ; AVX-NEXT:    retq
 284   %1 = extractelement <2 x double> %a, i32 0
 285   %2 = extractelement <2 x double> %b, i32 0
 286   %div = fdiv double %2, %1
 287   %3 = insertelement <2 x double> %b, double %div, i32 0
 288   ret <2 x double> %3
 289 }
 290
 291 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
 292 ; SSE-LABEL: test_multiple_add_ss:
 293 ; SSE:       # BB#0:
 294 ; SSE-NEXT:    addss %xmm0, %xmm1
 295 ; SSE-NEXT:    addss %xmm1, %xmm0
 296 ; SSE-NEXT:    retq
 297 ;
 298 ; AVX-LABEL: test_multiple_add_ss:
 299 ; AVX:       # BB#0:
 300 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 301 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 302 ; AVX-NEXT:    retq
 303   %1 = extractelement <4 x float> %b, i32 0
 304   %2 = extractelement <4 x float> %a, i32 0
 305   %add = fadd float %2, %1
 306   %add2 = fadd float %2, %add
 307   %3 = insertelement <4 x float> %a, float %add2, i32 0
 308   ret <4 x float> %3
 309 }
 310
 311 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
 312 ; SSE-LABEL: test_multiple_sub_ss:
 313 ; SSE:       # BB#0:
 314 ; SSE-NEXT:    movaps %xmm0, %xmm2
 315 ; SSE-NEXT:    subss %xmm1, %xmm2
 316 ; SSE-NEXT:    subss %xmm2, %xmm0
 317 ; SSE-NEXT:    retq
 318 ;
 319 ; AVX-LABEL: test_multiple_sub_ss:
 320 ; AVX:       # BB#0:
 321 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
 322 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 323 ; AVX-NEXT:    retq
 324   %1 = extractelement <4 x float> %b, i32 0
 325   %2 = extractelement <4 x float> %a, i32 0
 326   %sub = fsub float %2, %1
 327   %sub2 = fsub float %2, %sub
 328   %3 = insertelement <4 x float> %a, float %sub2, i32 0
 329   ret <4 x float> %3
 330 }
 331
 332 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
 333 ; SSE-LABEL: test_multiple_mul_ss:
 334 ; SSE:       # BB#0:
 335 ; SSE-NEXT:    mulss %xmm0, %xmm1
 336 ; SSE-NEXT:    mulss %xmm1, %xmm0
 337 ; SSE-NEXT:    retq
 338 ;
 339 ; AVX-LABEL: test_multiple_mul_ss:
 340 ; AVX:       # BB#0:
 341 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
 342 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 343 ; AVX-NEXT:    retq
 344   %1 = extractelement <4 x float> %b, i32 0
 345   %2 = extractelement <4 x float> %a, i32 0
 346   %mul = fmul float %2, %1
 347   %mul2 = fmul float %2, %mul
 348   %3 = insertelement <4 x float> %a, float %mul2, i32 0
 349   ret <4 x float> %3
 350 }
 351
 352 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
 353 ; SSE-LABEL: test_multiple_div_ss:
 354 ; SSE:       # BB#0:
 355 ; SSE-NEXT:    movaps %xmm0, %xmm2
 356 ; SSE-NEXT:    divss %xmm1, %xmm2
 357 ; SSE-NEXT:    divss %xmm2, %xmm0
 358 ; SSE-NEXT:    retq
 359 ;
 360 ; AVX-LABEL: test_multiple_div_ss:
 361 ; AVX:       # BB#0:
 362 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
 363 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 364 ; AVX-NEXT:    retq
 365   %1 = extractelement <4 x float> %b, i32 0
 366   %2 = extractelement <4 x float> %a, i32 0
 367   %div = fdiv float %2, %1
 368   %div2 = fdiv float %2, %div
 369   %3 = insertelement <4 x float> %a, float %div2, i32 0
 370   ret <4 x float> %3
 371 }
 372
 373 ; With SSE4.1 or greater, the shuffles in the following tests may
 374 ; be lowered to X86Blendi nodes.
 375
 376 define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
 377 ; SSE-LABEL: blend_add_ss:
 378 ; SSE:       # BB#0:
 379 ; SSE-NEXT:    addss %xmm1, %xmm0
 380 ; SSE-NEXT:    retq
 381 ;
 382 ; AVX-LABEL: blend_add_ss:
 383 ; AVX:       # BB#0:
 384 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 385 ; AVX-NEXT:    retq
 386
 387   %ext = extractelement <4 x float> %a, i32 0
 388   %op = fadd float %b, %ext
 389   %ins = insertelement <4 x float> undef, float %op, i32 0
 390   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 391   ret <4 x float> %shuf
 392 }
 393
 394 define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
 395 ; SSE-LABEL: blend_sub_ss:
 396 ; SSE:       # BB#0:
 397 ; SSE-NEXT:    subss %xmm1, %xmm0
 398 ; SSE-NEXT:    retq
 399 ;
 400 ; AVX-LABEL: blend_sub_ss:
 401 ; AVX:       # BB#0:
 402 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 403 ; AVX-NEXT:    retq
 404
 405   %ext = extractelement <4 x float> %a, i32 0
 406   %op = fsub float %ext, %b
 407   %ins = insertelement <4 x float> undef, float %op, i32 0
 408   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 409   ret <4 x float> %shuf
 410 }
 411
 412 define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
 413 ; SSE-LABEL: blend_mul_ss:
 414 ; SSE:       # BB#0:
 415 ; SSE-NEXT:    mulss %xmm1, %xmm0
 416 ; SSE-NEXT:    retq
 417 ;
 418 ; AVX-LABEL: blend_mul_ss:
 419 ; AVX:       # BB#0:
 420 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 421 ; AVX-NEXT:    retq
 422
 423   %ext = extractelement <4 x float> %a, i32 0
 424   %op = fmul float %b, %ext
 425   %ins = insertelement <4 x float> undef, float %op, i32 0
 426   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 427   ret <4 x float> %shuf
 428 }
 429
 430 define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
 431 ; SSE-LABEL: blend_div_ss:
 432 ; SSE:       # BB#0:
 433 ; SSE-NEXT:    divss %xmm1, %xmm0
 434 ; SSE-NEXT:    retq
 435 ;
 436 ; AVX-LABEL: blend_div_ss:
 437 ; AVX:       # BB#0:
 438 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 439 ; AVX-NEXT:    retq
 440
 441   %ext = extractelement <4 x float> %a, i32 0
 442   %op = fdiv float %ext, %b
 443   %ins = insertelement <4 x float> undef, float %op, i32 0
 444   %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 445   ret <4 x float> %shuf
 446 }
 447
 448 define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
 449 ; SSE-LABEL: blend_add_sd:
 450 ; SSE:       # BB#0:
 451 ; SSE-NEXT:    addsd %xmm1, %xmm0
 452 ; SSE-NEXT:    retq
 453 ;
 454 ; AVX-LABEL: blend_add_sd:
 455 ; AVX:       # BB#0:
 456 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 457 ; AVX-NEXT:    retq
 458
 459   %ext = extractelement <2 x double> %a, i32 0
 460   %op = fadd double %b, %ext
 461   %ins = insertelement <2 x double> undef, double %op, i32 0
 462   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 463   ret <2 x double> %shuf
 464 }
 465
 466 define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
 467 ; SSE-LABEL: blend_sub_sd:
 468 ; SSE:       # BB#0:
 469 ; SSE-NEXT:    subsd %xmm1, %xmm0
 470 ; SSE-NEXT:    retq
 471 ;
 472 ; AVX-LABEL: blend_sub_sd:
 473 ; AVX:       # BB#0:
 474 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 475 ; AVX-NEXT:    retq
 476
 477   %ext = extractelement <2 x double> %a, i32 0
 478   %op = fsub double %ext, %b
 479   %ins = insertelement <2 x double> undef, double %op, i32 0
 480   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 481   ret <2 x double> %shuf
 482 }
 483
 484 define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
 485 ; SSE-LABEL: blend_mul_sd:
 486 ; SSE:       # BB#0:
 487 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 488 ; SSE-NEXT:    retq
 489 ;
 490 ; AVX-LABEL: blend_mul_sd:
 491 ; AVX:       # BB#0:
 492 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 493 ; AVX-NEXT:    retq
 494
 495   %ext = extractelement <2 x double> %a, i32 0
 496   %op = fmul double %b, %ext
 497   %ins = insertelement <2 x double> undef, double %op, i32 0
 498   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 499   ret <2 x double> %shuf
 500 }
 501
 502 define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
 503 ; SSE-LABEL: blend_div_sd:
 504 ; SSE:       # BB#0:
 505 ; SSE-NEXT:    divsd %xmm1, %xmm0
 506 ; SSE-NEXT:    retq
 507 ;
 508 ; AVX-LABEL: blend_div_sd:
 509 ; AVX:       # BB#0:
 510 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 511 ; AVX-NEXT:    retq
 512
 513   %ext = extractelement <2 x double> %a, i32 0
 514   %op = fdiv double %ext, %b
 515   %ins = insertelement <2 x double> undef, double %op, i32 0
 516   %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 517   ret <2 x double> %shuf
 518 }
 519
 520 ; Ensure that the backend selects SSE/AVX scalar fp instructions
 521 ; from a packed fp instruction plus a vector insert.
 522
 523 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
 524 ; SSE-LABEL: insert_test_add_ss:
 525 ; SSE:       # BB#0:
 526 ; SSE-NEXT:    addss %xmm1, %xmm0
 527 ; SSE-NEXT:    retq
 528 ;
 529 ; AVX-LABEL: insert_test_add_ss:
 530 ; AVX:       # BB#0:
 531 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 532 ; AVX-NEXT:    retq
 533   %1 = fadd <4 x float> %a, %b
 534   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 535   ret <4 x float> %2
 536 }
 537
 538 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
 539 ; SSE-LABEL: insert_test_sub_ss:
 540 ; SSE:       # BB#0:
 541 ; SSE-NEXT:    subss %xmm1, %xmm0
 542 ; SSE-NEXT:    retq
 543 ;
 544 ; AVX-LABEL: insert_test_sub_ss:
 545 ; AVX:       # BB#0:
 546 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 547 ; AVX-NEXT:    retq
 548   %1 = fsub <4 x float> %a, %b
 549   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 550   ret <4 x float> %2
 551 }
 552
 553 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
 554 ; SSE-LABEL: insert_test_mul_ss:
 555 ; SSE:       # BB#0:
 556 ; SSE-NEXT:    mulss %xmm1, %xmm0
 557 ; SSE-NEXT:    retq
 558 ;
 559 ; AVX-LABEL: insert_test_mul_ss:
 560 ; AVX:       # BB#0:
 561 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 562 ; AVX-NEXT:    retq
 563   %1 = fmul <4 x float> %a, %b
 564   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 565   ret <4 x float> %2
 566 }
 567
 568 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
 569 ; SSE-LABEL: insert_test_div_ss:
 570 ; SSE:       # BB#0:
 571 ; SSE-NEXT:    divss %xmm1, %xmm0
 572 ; SSE-NEXT:    retq
 573 ;
 574 ; AVX-LABEL: insert_test_div_ss:
 575 ; AVX:       # BB#0:
 576 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 577 ; AVX-NEXT:    retq
 578   %1 = fdiv <4 x float> %a, %b
 579   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 580   ret <4 x float> %2
 581 }
 582
 583 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
 584 ; SSE-LABEL: insert_test_add_sd:
 585 ; SSE:       # BB#0:
 586 ; SSE-NEXT:    addsd %xmm1, %xmm0
 587 ; SSE-NEXT:    retq
 588 ;
 589 ; AVX-LABEL: insert_test_add_sd:
 590 ; AVX:       # BB#0:
 591 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 592 ; AVX-NEXT:    retq
 593   %1 = fadd <2 x double> %a, %b
 594   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 595   ret <2 x double> %2
 596 }
 597
 598 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
 599 ; SSE-LABEL: insert_test_sub_sd:
 600 ; SSE:       # BB#0:
 601 ; SSE-NEXT:    subsd %xmm1, %xmm0
 602 ; SSE-NEXT:    retq
 603 ;
 604 ; AVX-LABEL: insert_test_sub_sd:
 605 ; AVX:       # BB#0:
 606 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 607 ; AVX-NEXT:    retq
 608   %1 = fsub <2 x double> %a, %b
 609   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 610   ret <2 x double> %2
 611 }
 612
 613 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
 614 ; SSE-LABEL: insert_test_mul_sd:
 615 ; SSE:       # BB#0:
 616 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 617 ; SSE-NEXT:    retq
 618 ;
 619 ; AVX-LABEL: insert_test_mul_sd:
 620 ; AVX:       # BB#0:
 621 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 622 ; AVX-NEXT:    retq
 623   %1 = fmul <2 x double> %a, %b
 624   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 625   ret <2 x double> %2
 626 }
 627
 628 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
 629 ; SSE-LABEL: insert_test_div_sd:
 630 ; SSE:       # BB#0:
 631 ; SSE-NEXT:    divsd %xmm1, %xmm0
 632 ; SSE-NEXT:    retq
 633 ;
 634 ; AVX-LABEL: insert_test_div_sd:
 635 ; AVX:       # BB#0:
 636 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 637 ; AVX-NEXT:    retq
 638   %1 = fdiv <2 x double> %a, %b
 639   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 640   ret <2 x double> %2
 641 }
 642
 643 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
 644 ; SSE-LABEL: insert_test2_add_ss:
 645 ; SSE:       # BB#0:
 646 ; SSE-NEXT:    addss %xmm0, %xmm1
 647 ; SSE-NEXT:    movaps %xmm1, %xmm0
 648 ; SSE-NEXT:    retq
 649 ;
 650 ; AVX-LABEL: insert_test2_add_ss:
 651 ; AVX:       # BB#0:
 652 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 653 ; AVX-NEXT:    retq
 654   %1 = fadd <4 x float> %b, %a
 655   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 656   ret <4 x float> %2
 657 }
 658
 659 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 660 ; SSE-LABEL: insert_test2_sub_ss:
 661 ; SSE:       # BB#0:
 662 ; SSE-NEXT:    subss %xmm0, %xmm1
 663 ; SSE-NEXT:    movaps %xmm1, %xmm0
 664 ; SSE-NEXT:    retq
 665 ;
 666 ; AVX-LABEL: insert_test2_sub_ss:
 667 ; AVX:       # BB#0:
 668 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 669 ; AVX-NEXT:    retq
 670   %1 = fsub <4 x float> %b, %a
 671   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 672   ret <4 x float> %2
 673 }
 674
 675 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 676 ; SSE-LABEL: insert_test2_mul_ss:
 677 ; SSE:       # BB#0:
 678 ; SSE-NEXT:    mulss %xmm0, %xmm1
 679 ; SSE-NEXT:    movaps %xmm1, %xmm0
 680 ; SSE-NEXT:    retq
 681 ;
 682 ; AVX-LABEL: insert_test2_mul_ss:
 683 ; AVX:       # BB#0:
 684 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 685 ; AVX-NEXT:    retq
 686   %1 = fmul <4 x float> %b, %a
 687   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 688   ret <4 x float> %2
 689 }
 690
 691 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
 692 ; SSE-LABEL: insert_test2_div_ss:
 693 ; SSE:       # BB#0:
 694 ; SSE-NEXT:    divss %xmm0, %xmm1
 695 ; SSE-NEXT:    movaps %xmm1, %xmm0
 696 ; SSE-NEXT:    retq
 697 ;
 698 ; AVX-LABEL: insert_test2_div_ss:
 699 ; AVX:       # BB#0:
 700 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 701 ; AVX-NEXT:    retq
 702   %1 = fdiv <4 x float> %b, %a
 703   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 704   ret <4 x float> %2
 705 }
 706
 707 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
 708 ; SSE-LABEL: insert_test2_add_sd:
 709 ; SSE:       # BB#0:
 710 ; SSE-NEXT:    addsd %xmm0, %xmm1
 711 ; SSE-NEXT:    movaps %xmm1, %xmm0
 712 ; SSE-NEXT:    retq
 713 ;
 714 ; AVX-LABEL: insert_test2_add_sd:
 715 ; AVX:       # BB#0:
 716 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 717 ; AVX-NEXT:    retq
 718   %1 = fadd <2 x double> %b, %a
 719   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 720   ret <2 x double> %2
 721 }
 722
 723 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 724 ; SSE-LABEL: insert_test2_sub_sd:
 725 ; SSE:       # BB#0:
 726 ; SSE-NEXT:    subsd %xmm0, %xmm1
 727 ; SSE-NEXT:    movaps %xmm1, %xmm0
 728 ; SSE-NEXT:    retq
 729 ;
 730 ; AVX-LABEL: insert_test2_sub_sd:
 731 ; AVX:       # BB#0:
 732 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 733 ; AVX-NEXT:    retq
 734   %1 = fsub <2 x double> %b, %a
 735   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 736   ret <2 x double> %2
 737 }
 738
 739 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 740 ; SSE-LABEL: insert_test2_mul_sd:
 741 ; SSE:       # BB#0:
 742 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 743 ; SSE-NEXT:    movaps %xmm1, %xmm0
 744 ; SSE-NEXT:    retq
 745 ;
 746 ; AVX-LABEL: insert_test2_mul_sd:
 747 ; AVX:       # BB#0:
 748 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 749 ; AVX-NEXT:    retq
 750   %1 = fmul <2 x double> %b, %a
 751   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 752   ret <2 x double> %2
 753 }
 754
 755 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
 756 ; SSE-LABEL: insert_test2_div_sd:
 757 ; SSE:       # BB#0:
 758 ; SSE-NEXT:    divsd %xmm0, %xmm1
 759 ; SSE-NEXT:    movaps %xmm1, %xmm0
 760 ; SSE-NEXT:    retq
 761 ;
 762 ; AVX-LABEL: insert_test2_div_sd:
 763 ; AVX:       # BB#0:
 764 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 765 ; AVX-NEXT:    retq
 766   %1 = fdiv <2 x double> %b, %a
 767   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 768   ret <2 x double> %2
 769 }
 770
 771 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
 772 ; SSE-LABEL: insert_test3_add_ss:
 773 ; SSE:       # BB#0:
 774 ; SSE-NEXT:    addss %xmm1, %xmm0
 775 ; SSE-NEXT:    retq
 776 ;
 777 ; AVX-LABEL: insert_test3_add_ss:
 778 ; AVX:       # BB#0:
 779 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 780 ; AVX-NEXT:    retq
 781   %1 = fadd <4 x float> %a, %b
 782   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 783   ret <4 x float> %2
 784 }
 785
 786 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
 787 ; SSE-LABEL: insert_test3_sub_ss:
 788 ; SSE:       # BB#0:
 789 ; SSE-NEXT:    subss %xmm1, %xmm0
 790 ; SSE-NEXT:    retq
 791 ;
 792 ; AVX-LABEL: insert_test3_sub_ss:
 793 ; AVX:       # BB#0:
 794 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 795 ; AVX-NEXT:    retq
 796   %1 = fsub <4 x float> %a, %b
 797   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 798   ret <4 x float> %2
 799 }
 800
 801 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
 802 ; SSE-LABEL: insert_test3_mul_ss:
 803 ; SSE:       # BB#0:
 804 ; SSE-NEXT:    mulss %xmm1, %xmm0
 805 ; SSE-NEXT:    retq
 806 ;
 807 ; AVX-LABEL: insert_test3_mul_ss:
 808 ; AVX:       # BB#0:
 809 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 810 ; AVX-NEXT:    retq
 811   %1 = fmul <4 x float> %a, %b
 812   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 813   ret <4 x float> %2
 814 }
 815
 816 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
 817 ; SSE-LABEL: insert_test3_div_ss:
 818 ; SSE:       # BB#0:
 819 ; SSE-NEXT:    divss %xmm1, %xmm0
 820 ; SSE-NEXT:    retq
 821 ;
 822 ; AVX-LABEL: insert_test3_div_ss:
 823 ; AVX:       # BB#0:
 824 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 825 ; AVX-NEXT:    retq
 826   %1 = fdiv <4 x float> %a, %b
 827   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 828   ret <4 x float> %2
 829 }
 830
 831 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
 832 ; SSE-LABEL: insert_test3_add_sd:
 833 ; SSE:       # BB#0:
 834 ; SSE-NEXT:    addsd %xmm1, %xmm0
 835 ; SSE-NEXT:    retq
 836 ;
 837 ; AVX-LABEL: insert_test3_add_sd:
 838 ; AVX:       # BB#0:
 839 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 840 ; AVX-NEXT:    retq
 841   %1 = fadd <2 x double> %a, %b
 842   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 843   ret <2 x double> %2
 844 }
 845
 846 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
 847 ; SSE-LABEL: insert_test3_sub_sd:
 848 ; SSE:       # BB#0:
 849 ; SSE-NEXT:    subsd %xmm1, %xmm0
 850 ; SSE-NEXT:    retq
 851 ;
 852 ; AVX-LABEL: insert_test3_sub_sd:
 853 ; AVX:       # BB#0:
 854 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 855 ; AVX-NEXT:    retq
 856   %1 = fsub <2 x double> %a, %b
 857   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 858   ret <2 x double> %2
 859 }
 860
 861 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
 862 ; SSE-LABEL: insert_test3_mul_sd:
 863 ; SSE:       # BB#0:
 864 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 865 ; SSE-NEXT:    retq
 866 ;
 867 ; AVX-LABEL: insert_test3_mul_sd:
 868 ; AVX:       # BB#0:
 869 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 870 ; AVX-NEXT:    retq
 871   %1 = fmul <2 x double> %a, %b
 872   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 873   ret <2 x double> %2
 874 }
 875
 876 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
 877 ; SSE-LABEL: insert_test3_div_sd:
 878 ; SSE:       # BB#0:
 879 ; SSE-NEXT:    divsd %xmm1, %xmm0
 880 ; SSE-NEXT:    retq
 881 ;
 882 ; AVX-LABEL: insert_test3_div_sd:
 883 ; AVX:       # BB#0:
 884 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 885 ; AVX-NEXT:    retq
 886   %1 = fdiv <2 x double> %a, %b
 887   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 888   ret <2 x double> %2
 889 }
 890
 891 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
 892 ; SSE-LABEL: insert_test4_add_ss:
 893 ; SSE:       # BB#0:
 894 ; SSE-NEXT:    addss %xmm0, %xmm1
 895 ; SSE-NEXT:    movaps %xmm1, %xmm0
 896 ; SSE-NEXT:    retq
 897 ;
 898 ; AVX-LABEL: insert_test4_add_ss:
 899 ; AVX:       # BB#0:
 900 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 901 ; AVX-NEXT:    retq
 902   %1 = fadd <4 x float> %b, %a
 903   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 904   ret <4 x float> %2
 905 }
 906
 907 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
 908 ; SSE-LABEL: insert_test4_sub_ss:
 909 ; SSE:       # BB#0:
 910 ; SSE-NEXT:    subss %xmm0, %xmm1
 911 ; SSE-NEXT:    movaps %xmm1, %xmm0
 912 ; SSE-NEXT:    retq
 913 ;
 914 ; AVX-LABEL: insert_test4_sub_ss:
 915 ; AVX:       # BB#0:
 916 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 917 ; AVX-NEXT:    retq
 918   %1 = fsub <4 x float> %b, %a
 919   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 920   ret <4 x float> %2
 921 }
 922
 923 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
 924 ; SSE-LABEL: insert_test4_mul_ss:
 925 ; SSE:       # BB#0:
 926 ; SSE-NEXT:    mulss %xmm0, %xmm1
 927 ; SSE-NEXT:    movaps %xmm1, %xmm0
 928 ; SSE-NEXT:    retq
 929 ;
 930 ; AVX-LABEL: insert_test4_mul_ss:
 931 ; AVX:       # BB#0:
 932 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 933 ; AVX-NEXT:    retq
 934   %1 = fmul <4 x float> %b, %a
 935   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 936   ret <4 x float> %2
 937 }
 938
 939 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
 940 ; SSE-LABEL: insert_test4_div_ss:
 941 ; SSE:       # BB#0:
 942 ; SSE-NEXT:    divss %xmm0, %xmm1
 943 ; SSE-NEXT:    movaps %xmm1, %xmm0
 944 ; SSE-NEXT:    retq
 945 ;
 946 ; AVX-LABEL: insert_test4_div_ss:
 947 ; AVX:       # BB#0:
 948 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 949 ; AVX-NEXT:    retq
 950   %1 = fdiv <4 x float> %b, %a
 951   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 952   ret <4 x float> %2
 953 }
 954
 955 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
 956 ; SSE-LABEL: insert_test4_add_sd:
 957 ; SSE:       # BB#0:
 958 ; SSE-NEXT:    addsd %xmm0, %xmm1
 959 ; SSE-NEXT:    movaps %xmm1, %xmm0
 960 ; SSE-NEXT:    retq
 961 ;
 962 ; AVX-LABEL: insert_test4_add_sd:
 963 ; AVX:       # BB#0:
 964 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 965 ; AVX-NEXT:    retq
 966   %1 = fadd <2 x double> %b, %a
 967   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 968   ret <2 x double> %2
 969 }
 970
 971 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
 972 ; SSE-LABEL: insert_test4_sub_sd:
 973 ; SSE:       # BB#0:
 974 ; SSE-NEXT:    subsd %xmm0, %xmm1
 975 ; SSE-NEXT:    movaps %xmm1, %xmm0
 976 ; SSE-NEXT:    retq
 977 ;
 978 ; AVX-LABEL: insert_test4_sub_sd:
 979 ; AVX:       # BB#0:
 980 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 981 ; AVX-NEXT:    retq
 982   %1 = fsub <2 x double> %b, %a
 983   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 984   ret <2 x double> %2
 985 }
 986
 987 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
 988 ; SSE-LABEL: insert_test4_mul_sd:
 989 ; SSE:       # BB#0:
 990 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 991 ; SSE-NEXT:    movaps %xmm1, %xmm0
 992 ; SSE-NEXT:    retq
 993 ;
 994 ; AVX-LABEL: insert_test4_mul_sd:
 995 ; AVX:       # BB#0:
 996 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 997 ; AVX-NEXT:    retq
 998   %1 = fmul <2 x double> %b, %a
 999   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1000   ret <2 x double> %2
1001 }
1002
1003 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
1004 ; SSE-LABEL: insert_test4_div_sd:
1005 ; SSE:       # BB#0:
1006 ; SSE-NEXT:    divsd %xmm0, %xmm1
1007 ; SSE-NEXT:    movaps %xmm1, %xmm0
1008 ; SSE-NEXT:    retq
1009 ;
1010 ; AVX-LABEL: insert_test4_div_sd:
1011 ; AVX:       # BB#0:
1012 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
1013 ; AVX-NEXT:    retq
1014   %1 = fdiv <2 x double> %b, %a
1015   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
1016   ret <2 x double> %2
1017 }