test/CodeGen/X86/sse-scalar-fp-arith.ll

   1 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
   2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
   3 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck --check-prefix=AVX %s
   4
   5 ; Ensure that the backend no longer emits unnecessary vector insert
   6 ; instructions immediately after SSE scalar fp instructions
   7 ; like addss or mulss.
   8
   9 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
  10 ; SSE-LABEL: test_add_ss:
  11 ; SSE:       # BB#0:
  12 ; SSE-NEXT:    addss %xmm1, %xmm0
  13 ; SSE-NEXT:    retq
  14 ;
  15 ; AVX-LABEL: test_add_ss:
  16 ; AVX:       # BB#0:
  17 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  18 ; AVX-NEXT:    retq
  19   %1 = extractelement <4 x float> %b, i32 0
  20   %2 = extractelement <4 x float> %a, i32 0
  21   %add = fadd float %2, %1
  22   %3 = insertelement <4 x float> %a, float %add, i32 0
  23   ret <4 x float> %3
  24 }
  25
  26 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
  27 ; SSE-LABEL: test_sub_ss:
  28 ; SSE:       # BB#0:
  29 ; SSE-NEXT:    subss %xmm1, %xmm0
  30 ; SSE-NEXT:    retq
  31 ;
  32 ; AVX-LABEL: test_sub_ss:
  33 ; AVX:       # BB#0:
  34 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
  35 ; AVX-NEXT:    retq
  36   %1 = extractelement <4 x float> %b, i32 0
  37   %2 = extractelement <4 x float> %a, i32 0
  38   %sub = fsub float %2, %1
  39   %3 = insertelement <4 x float> %a, float %sub, i32 0
  40   ret <4 x float> %3
  41 }
  42
  43 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
  44 ; SSE-LABEL: test_mul_ss:
  45 ; SSE:       # BB#0:
  46 ; SSE-NEXT:    mulss %xmm1, %xmm0
  47 ; SSE-NEXT:    retq
  48 ;
  49 ; AVX-LABEL: test_mul_ss:
  50 ; AVX:       # BB#0:
  51 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  52 ; AVX-NEXT:    retq
  53   %1 = extractelement <4 x float> %b, i32 0
  54   %2 = extractelement <4 x float> %a, i32 0
  55   %mul = fmul float %2, %1
  56   %3 = insertelement <4 x float> %a, float %mul, i32 0
  57   ret <4 x float> %3
  58 }
  59
  60 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
  61 ; SSE-LABEL: test_div_ss:
  62 ; SSE:       # BB#0:
  63 ; SSE-NEXT:    divss %xmm1, %xmm0
  64 ; SSE-NEXT:    retq
  65 ;
  66 ; AVX-LABEL: test_div_ss:
  67 ; AVX:       # BB#0:
  68 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  69 ; AVX-NEXT:    retq
  70   %1 = extractelement <4 x float> %b, i32 0
  71   %2 = extractelement <4 x float> %a, i32 0
  72   %div = fdiv float %2, %1
  73   %3 = insertelement <4 x float> %a, float %div, i32 0
  74   ret <4 x float> %3
  75 }
  76
  77 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
  78 ; SSE-LABEL: test_add_sd:
  79 ; SSE:       # BB#0:
  80 ; SSE-NEXT:    addsd %xmm1, %xmm0
  81 ; SSE-NEXT:    retq
  82 ;
  83 ; AVX-LABEL: test_add_sd:
  84 ; AVX:       # BB#0:
  85 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  86 ; AVX-NEXT:    retq
  87   %1 = extractelement <2 x double> %b, i32 0
  88   %2 = extractelement <2 x double> %a, i32 0
  89   %add = fadd double %2, %1
  90   %3 = insertelement <2 x double> %a, double %add, i32 0
  91   ret <2 x double> %3
  92 }
  93
  94 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
  95 ; SSE-LABEL: test_sub_sd:
  96 ; SSE:       # BB#0:
  97 ; SSE-NEXT:    subsd %xmm1, %xmm0
  98 ; SSE-NEXT:    retq
  99 ;
 100 ; AVX-LABEL: test_sub_sd:
 101 ; AVX:       # BB#0:
 102 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 103 ; AVX-NEXT:    retq
 104   %1 = extractelement <2 x double> %b, i32 0
 105   %2 = extractelement <2 x double> %a, i32 0
 106   %sub = fsub double %2, %1
 107   %3 = insertelement <2 x double> %a, double %sub, i32 0
 108   ret <2 x double> %3
 109 }
 110
 111 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
 112 ; SSE-LABEL: test_mul_sd:
 113 ; SSE:       # BB#0:
 114 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 115 ; SSE-NEXT:    retq
 116 ;
 117 ; AVX-LABEL: test_mul_sd:
 118 ; AVX:       # BB#0:
 119 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 120 ; AVX-NEXT:    retq
 121   %1 = extractelement <2 x double> %b, i32 0
 122   %2 = extractelement <2 x double> %a, i32 0
 123   %mul = fmul double %2, %1
 124   %3 = insertelement <2 x double> %a, double %mul, i32 0
 125   ret <2 x double> %3
 126 }
 127
 128 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
 129 ; SSE-LABEL: test_div_sd:
 130 ; SSE:       # BB#0:
 131 ; SSE-NEXT:    divsd %xmm1, %xmm0
 132 ; SSE-NEXT:    retq
 133 ;
 134 ; AVX-LABEL: test_div_sd:
 135 ; AVX:       # BB#0:
 136 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 137 ; AVX-NEXT:    retq
 138   %1 = extractelement <2 x double> %b, i32 0
 139   %2 = extractelement <2 x double> %a, i32 0
 140   %div = fdiv double %2, %1
 141   %3 = insertelement <2 x double> %a, double %div, i32 0
 142   ret <2 x double> %3
 143 }
 144
 145 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
 146 ; SSE-LABEL: test2_add_ss:
 147 ; SSE:       # BB#0:
 148 ; SSE-NEXT:    addss %xmm0, %xmm1
 149 ; SSE-NEXT:    movaps %xmm1, %xmm0
 150 ; SSE-NEXT:    retq
 151 ;
 152 ; AVX-LABEL: test2_add_ss:
 153 ; AVX:       # BB#0:
 154 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 155 ; AVX-NEXT:    retq
 156   %1 = extractelement <4 x float> %a, i32 0
 157   %2 = extractelement <4 x float> %b, i32 0
 158   %add = fadd float %1, %2
 159   %3 = insertelement <4 x float> %b, float %add, i32 0
 160   ret <4 x float> %3
 161 }
 162
 163 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 164 ; SSE-LABEL: test2_sub_ss:
 165 ; SSE:       # BB#0:
 166 ; SSE-NEXT:    subss %xmm0, %xmm1
 167 ; SSE-NEXT:    movaps %xmm1, %xmm0
 168 ; SSE-NEXT:    retq
 169 ;
 170 ; AVX-LABEL: test2_sub_ss:
 171 ; AVX:       # BB#0:
 172 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 173 ; AVX-NEXT:    retq
 174   %1 = extractelement <4 x float> %a, i32 0
 175   %2 = extractelement <4 x float> %b, i32 0
 176   %sub = fsub float %2, %1
 177   %3 = insertelement <4 x float> %b, float %sub, i32 0
 178   ret <4 x float> %3
 179 }
 180
 181 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 182 ; SSE-LABEL: test2_mul_ss:
 183 ; SSE:       # BB#0:
 184 ; SSE-NEXT:    mulss %xmm0, %xmm1
 185 ; SSE-NEXT:    movaps %xmm1, %xmm0
 186 ; SSE-NEXT:    retq
 187 ;
 188 ; AVX-LABEL: test2_mul_ss:
 189 ; AVX:       # BB#0:
 190 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 191 ; AVX-NEXT:    retq
 192   %1 = extractelement <4 x float> %a, i32 0
 193   %2 = extractelement <4 x float> %b, i32 0
 194   %mul = fmul float %1, %2
 195   %3 = insertelement <4 x float> %b, float %mul, i32 0
 196   ret <4 x float> %3
 197 }
 198
 199 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
 200 ; SSE-LABEL: test2_div_ss:
 201 ; SSE:       # BB#0:
 202 ; SSE-NEXT:    divss %xmm0, %xmm1
 203 ; SSE-NEXT:    movaps %xmm1, %xmm0
 204 ; SSE-NEXT:    retq
 205 ;
 206 ; AVX-LABEL: test2_div_ss:
 207 ; AVX:       # BB#0:
 208 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 209 ; AVX-NEXT:    retq
 210   %1 = extractelement <4 x float> %a, i32 0
 211   %2 = extractelement <4 x float> %b, i32 0
 212   %div = fdiv float %2, %1
 213   %3 = insertelement <4 x float> %b, float %div, i32 0
 214   ret <4 x float> %3
 215 }
 216
 217 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
 218 ; SSE-LABEL: test2_add_sd:
 219 ; SSE:       # BB#0:
 220 ; SSE-NEXT:    addsd %xmm0, %xmm1
 221 ; SSE-NEXT:    movaps %xmm1, %xmm0
 222 ; SSE-NEXT:    retq
 223 ;
 224 ; AVX-LABEL: test2_add_sd:
 225 ; AVX:       # BB#0:
 226 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 227 ; AVX-NEXT:    retq
 228   %1 = extractelement <2 x double> %a, i32 0
 229   %2 = extractelement <2 x double> %b, i32 0
 230   %add = fadd double %1, %2
 231   %3 = insertelement <2 x double> %b, double %add, i32 0
 232   ret <2 x double> %3
 233 }
 234
 235 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 236 ; SSE-LABEL: test2_sub_sd:
 237 ; SSE:       # BB#0:
 238 ; SSE-NEXT:    subsd %xmm0, %xmm1
 239 ; SSE-NEXT:    movaps %xmm1, %xmm0
 240 ; SSE-NEXT:    retq
 241 ;
 242 ; AVX-LABEL: test2_sub_sd:
 243 ; AVX:       # BB#0:
 244 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 245 ; AVX-NEXT:    retq
 246   %1 = extractelement <2 x double> %a, i32 0
 247   %2 = extractelement <2 x double> %b, i32 0
 248   %sub = fsub double %2, %1
 249   %3 = insertelement <2 x double> %b, double %sub, i32 0
 250   ret <2 x double> %3
 251 }
 252
 253 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 254 ; SSE-LABEL: test2_mul_sd:
 255 ; SSE:       # BB#0:
 256 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 257 ; SSE-NEXT:    movaps %xmm1, %xmm0
 258 ; SSE-NEXT:    retq
 259 ;
 260 ; AVX-LABEL: test2_mul_sd:
 261 ; AVX:       # BB#0:
 262 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 263 ; AVX-NEXT:    retq
 264   %1 = extractelement <2 x double> %a, i32 0
 265   %2 = extractelement <2 x double> %b, i32 0
 266   %mul = fmul double %1, %2
 267   %3 = insertelement <2 x double> %b, double %mul, i32 0
 268   ret <2 x double> %3
 269 }
 270
 271 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
 272 ; SSE-LABEL: test2_div_sd:
 273 ; SSE:       # BB#0:
 274 ; SSE-NEXT:    divsd %xmm0, %xmm1
 275 ; SSE-NEXT:    movaps %xmm1, %xmm0
 276 ; SSE-NEXT:    retq
 277 ;
 278 ; AVX-LABEL: test2_div_sd:
 279 ; AVX:       # BB#0:
 280 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 281 ; AVX-NEXT:    retq
 282   %1 = extractelement <2 x double> %a, i32 0
 283   %2 = extractelement <2 x double> %b, i32 0
 284   %div = fdiv double %2, %1
 285   %3 = insertelement <2 x double> %b, double %div, i32 0
 286   ret <2 x double> %3
 287 }
 288
 289 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
 290 ; SSE-LABEL: test_multiple_add_ss:
 291 ; SSE:       # BB#0:
 292 ; SSE-NEXT:    addss %xmm0, %xmm1
 293 ; SSE-NEXT:    addss %xmm1, %xmm0
 294 ; SSE-NEXT:    retq
 295 ;
 296 ; AVX-LABEL: test_multiple_add_ss:
 297 ; AVX:       # BB#0:
 298 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 299 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 300 ; AVX-NEXT:    retq
 301   %1 = extractelement <4 x float> %b, i32 0
 302   %2 = extractelement <4 x float> %a, i32 0
 303   %add = fadd float %2, %1
 304   %add2 = fadd float %2, %add
 305   %3 = insertelement <4 x float> %a, float %add2, i32 0
 306   ret <4 x float> %3
 307 }
 308
 309 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
 310 ; SSE-LABEL: test_multiple_sub_ss:
 311 ; SSE:       # BB#0:
 312 ; SSE-NEXT:    movaps %xmm0, %xmm2
 313 ; SSE-NEXT:    subss %xmm1, %xmm2
 314 ; SSE-NEXT:    subss %xmm2, %xmm0
 315 ; SSE-NEXT:    retq
 316 ;
 317 ; AVX-LABEL: test_multiple_sub_ss:
 318 ; AVX:       # BB#0:
 319 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
 320 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 321 ; AVX-NEXT:    retq
 322   %1 = extractelement <4 x float> %b, i32 0
 323   %2 = extractelement <4 x float> %a, i32 0
 324   %sub = fsub float %2, %1
 325   %sub2 = fsub float %2, %sub
 326   %3 = insertelement <4 x float> %a, float %sub2, i32 0
 327   ret <4 x float> %3
 328 }
 329
 330 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
 331 ; SSE-LABEL: test_multiple_mul_ss:
 332 ; SSE:       # BB#0:
 333 ; SSE-NEXT:    mulss %xmm0, %xmm1
 334 ; SSE-NEXT:    mulss %xmm1, %xmm0
 335 ; SSE-NEXT:    retq
 336 ;
 337 ; AVX-LABEL: test_multiple_mul_ss:
 338 ; AVX:       # BB#0:
 339 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
 340 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 341 ; AVX-NEXT:    retq
 342   %1 = extractelement <4 x float> %b, i32 0
 343   %2 = extractelement <4 x float> %a, i32 0
 344   %mul = fmul float %2, %1
 345   %mul2 = fmul float %2, %mul
 346   %3 = insertelement <4 x float> %a, float %mul2, i32 0
 347   ret <4 x float> %3
 348 }
 349
 350 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
 351 ; SSE-LABEL: test_multiple_div_ss:
 352 ; SSE:       # BB#0:
 353 ; SSE-NEXT:    movaps %xmm0, %xmm2
 354 ; SSE-NEXT:    divss %xmm1, %xmm2
 355 ; SSE-NEXT:    divss %xmm2, %xmm0
 356 ; SSE-NEXT:    retq
 357 ;
 358 ; AVX-LABEL: test_multiple_div_ss:
 359 ; AVX:       # BB#0:
 360 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
 361 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 362 ; AVX-NEXT:    retq
 363   %1 = extractelement <4 x float> %b, i32 0
 364   %2 = extractelement <4 x float> %a, i32 0
 365   %div = fdiv float %2, %1
 366   %div2 = fdiv float %2, %div
 367   %3 = insertelement <4 x float> %a, float %div2, i32 0
 368   ret <4 x float> %3
 369 }
 370
 371 ; Ensure that the backend selects SSE/AVX scalar fp instructions
 372 ; from a packed fp instrution plus a vector insert.
 373
 374 define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
 375 ; SSE-LABEL: insert_test_add_ss:
 376 ; SSE:       # BB#0:
 377 ; SSE-NEXT:    addss %xmm1, %xmm0
 378 ; SSE-NEXT:    retq
 379 ;
 380 ; AVX-LABEL: insert_test_add_ss:
 381 ; AVX:       # BB#0:
 382 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 383 ; AVX-NEXT:    retq
 384   %1 = fadd <4 x float> %a, %b
 385   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 386   ret <4 x float> %2
 387 }
 388
 389 define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
 390 ; SSE-LABEL: insert_test_sub_ss:
 391 ; SSE:       # BB#0:
 392 ; SSE-NEXT:    subss %xmm1, %xmm0
 393 ; SSE-NEXT:    retq
 394 ;
 395 ; AVX-LABEL: insert_test_sub_ss:
 396 ; AVX:       # BB#0:
 397 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 398 ; AVX-NEXT:    retq
 399   %1 = fsub <4 x float> %a, %b
 400   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 401   ret <4 x float> %2
 402 }
 403
 404 define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
 405 ; SSE-LABEL: insert_test_mul_ss:
 406 ; SSE:       # BB#0:
 407 ; SSE-NEXT:    mulss %xmm1, %xmm0
 408 ; SSE-NEXT:    retq
 409 ;
 410 ; AVX-LABEL: insert_test_mul_ss:
 411 ; AVX:       # BB#0:
 412 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 413 ; AVX-NEXT:    retq
 414   %1 = fmul <4 x float> %a, %b
 415   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 416   ret <4 x float> %2
 417 }
 418
 419 define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
 420 ; SSE-LABEL: insert_test_div_ss:
 421 ; SSE:       # BB#0:
 422 ; SSE-NEXT:    divss %xmm1, %xmm0
 423 ; SSE-NEXT:    retq
 424 ;
 425 ; AVX-LABEL: insert_test_div_ss:
 426 ; AVX:       # BB#0:
 427 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 428 ; AVX-NEXT:    retq
 429   %1 = fdiv <4 x float> %a, %b
 430   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 431   ret <4 x float> %2
 432 }
 433
 434 define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
 435 ; SSE-LABEL: insert_test_add_sd:
 436 ; SSE:       # BB#0:
 437 ; SSE-NEXT:    addsd %xmm1, %xmm0
 438 ; SSE-NEXT:    retq
 439 ;
 440 ; AVX-LABEL: insert_test_add_sd:
 441 ; AVX:       # BB#0:
 442 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 443 ; AVX-NEXT:    retq
 444   %1 = fadd <2 x double> %a, %b
 445   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 446   ret <2 x double> %2
 447 }
 448
 449 define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
 450 ; SSE-LABEL: insert_test_sub_sd:
 451 ; SSE:       # BB#0:
 452 ; SSE-NEXT:    subsd %xmm1, %xmm0
 453 ; SSE-NEXT:    retq
 454 ;
 455 ; AVX-LABEL: insert_test_sub_sd:
 456 ; AVX:       # BB#0:
 457 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 458 ; AVX-NEXT:    retq
 459   %1 = fsub <2 x double> %a, %b
 460   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 461   ret <2 x double> %2
 462 }
 463
 464 define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
 465 ; SSE-LABEL: insert_test_mul_sd:
 466 ; SSE:       # BB#0:
 467 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 468 ; SSE-NEXT:    retq
 469 ;
 470 ; AVX-LABEL: insert_test_mul_sd:
 471 ; AVX:       # BB#0:
 472 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 473 ; AVX-NEXT:    retq
 474   %1 = fmul <2 x double> %a, %b
 475   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 476   ret <2 x double> %2
 477 }
 478
 479 define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
 480 ; SSE-LABEL: insert_test_div_sd:
 481 ; SSE:       # BB#0:
 482 ; SSE-NEXT:    divsd %xmm1, %xmm0
 483 ; SSE-NEXT:    retq
 484 ;
 485 ; AVX-LABEL: insert_test_div_sd:
 486 ; AVX:       # BB#0:
 487 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 488 ; AVX-NEXT:    retq
 489   %1 = fdiv <2 x double> %a, %b
 490   %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
 491   ret <2 x double> %2
 492 }
 493
 494 define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
 495 ; SSE-LABEL: insert_test2_add_ss:
 496 ; SSE:       # BB#0:
 497 ; SSE-NEXT:    addss %xmm0, %xmm1
 498 ; SSE-NEXT:    movaps %xmm1, %xmm0
 499 ; SSE-NEXT:    retq
 500 ;
 501 ; AVX-LABEL: insert_test2_add_ss:
 502 ; AVX:       # BB#0:
 503 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 504 ; AVX-NEXT:    retq
 505   %1 = fadd <4 x float> %b, %a
 506   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 507   ret <4 x float> %2
 508 }
 509
 510 define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
 511 ; SSE-LABEL: insert_test2_sub_ss:
 512 ; SSE:       # BB#0:
 513 ; SSE-NEXT:    subss %xmm0, %xmm1
 514 ; SSE-NEXT:    movaps %xmm1, %xmm0
 515 ; SSE-NEXT:    retq
 516 ;
 517 ; AVX-LABEL: insert_test2_sub_ss:
 518 ; AVX:       # BB#0:
 519 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 520 ; AVX-NEXT:    retq
 521   %1 = fsub <4 x float> %b, %a
 522   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 523   ret <4 x float> %2
 524 }
 525
 526 define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
 527 ; SSE-LABEL: insert_test2_mul_ss:
 528 ; SSE:       # BB#0:
 529 ; SSE-NEXT:    mulss %xmm0, %xmm1
 530 ; SSE-NEXT:    movaps %xmm1, %xmm0
 531 ; SSE-NEXT:    retq
 532 ;
 533 ; AVX-LABEL: insert_test2_mul_ss:
 534 ; AVX:       # BB#0:
 535 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 536 ; AVX-NEXT:    retq
 537   %1 = fmul <4 x float> %b, %a
 538   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 539   ret <4 x float> %2
 540 }
 541
 542 define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
 543 ; SSE-LABEL: insert_test2_div_ss:
 544 ; SSE:       # BB#0:
 545 ; SSE-NEXT:    divss %xmm0, %xmm1
 546 ; SSE-NEXT:    movaps %xmm1, %xmm0
 547 ; SSE-NEXT:    retq
 548 ;
 549 ; AVX-LABEL: insert_test2_div_ss:
 550 ; AVX:       # BB#0:
 551 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 552 ; AVX-NEXT:    retq
 553   %1 = fdiv <4 x float> %b, %a
 554   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 555   ret <4 x float> %2
 556 }
 557
 558 define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
 559 ; SSE-LABEL: insert_test2_add_sd:
 560 ; SSE:       # BB#0:
 561 ; SSE-NEXT:    addsd %xmm0, %xmm1
 562 ; SSE-NEXT:    movaps %xmm1, %xmm0
 563 ; SSE-NEXT:    retq
 564 ;
 565 ; AVX-LABEL: insert_test2_add_sd:
 566 ; AVX:       # BB#0:
 567 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 568 ; AVX-NEXT:    retq
 569   %1 = fadd <2 x double> %b, %a
 570   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 571   ret <2 x double> %2
 572 }
 573
 574 define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
 575 ; SSE-LABEL: insert_test2_sub_sd:
 576 ; SSE:       # BB#0:
 577 ; SSE-NEXT:    subsd %xmm0, %xmm1
 578 ; SSE-NEXT:    movaps %xmm1, %xmm0
 579 ; SSE-NEXT:    retq
 580 ;
 581 ; AVX-LABEL: insert_test2_sub_sd:
 582 ; AVX:       # BB#0:
 583 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 584 ; AVX-NEXT:    retq
 585   %1 = fsub <2 x double> %b, %a
 586   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 587   ret <2 x double> %2
 588 }
 589
 590 define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
 591 ; SSE-LABEL: insert_test2_mul_sd:
 592 ; SSE:       # BB#0:
 593 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 594 ; SSE-NEXT:    movaps %xmm1, %xmm0
 595 ; SSE-NEXT:    retq
 596 ;
 597 ; AVX-LABEL: insert_test2_mul_sd:
 598 ; AVX:       # BB#0:
 599 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 600 ; AVX-NEXT:    retq
 601   %1 = fmul <2 x double> %b, %a
 602   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 603   ret <2 x double> %2
 604 }
 605
 606 define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
 607 ; SSE-LABEL: insert_test2_div_sd:
 608 ; SSE:       # BB#0:
 609 ; SSE-NEXT:    divsd %xmm0, %xmm1
 610 ; SSE-NEXT:    movaps %xmm1, %xmm0
 611 ; SSE-NEXT:    retq
 612 ;
 613 ; AVX-LABEL: insert_test2_div_sd:
 614 ; AVX:       # BB#0:
 615 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 616 ; AVX-NEXT:    retq
 617   %1 = fdiv <2 x double> %b, %a
 618   %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 619   ret <2 x double> %2
 620 }
 621
 622 define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
 623 ; SSE-LABEL: insert_test3_add_ss:
 624 ; SSE:       # BB#0:
 625 ; SSE-NEXT:    addss %xmm1, %xmm0
 626 ; SSE-NEXT:    retq
 627 ;
 628 ; AVX-LABEL: insert_test3_add_ss:
 629 ; AVX:       # BB#0:
 630 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 631 ; AVX-NEXT:    retq
 632   %1 = fadd <4 x float> %a, %b
 633   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 634   ret <4 x float> %2
 635 }
 636
 637 define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
 638 ; SSE-LABEL: insert_test3_sub_ss:
 639 ; SSE:       # BB#0:
 640 ; SSE-NEXT:    subss %xmm1, %xmm0
 641 ; SSE-NEXT:    retq
 642 ;
 643 ; AVX-LABEL: insert_test3_sub_ss:
 644 ; AVX:       # BB#0:
 645 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 646 ; AVX-NEXT:    retq
 647   %1 = fsub <4 x float> %a, %b
 648   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 649   ret <4 x float> %2
 650 }
 651
 652 define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
 653 ; SSE-LABEL: insert_test3_mul_ss:
 654 ; SSE:       # BB#0:
 655 ; SSE-NEXT:    mulss %xmm1, %xmm0
 656 ; SSE-NEXT:    retq
 657 ;
 658 ; AVX-LABEL: insert_test3_mul_ss:
 659 ; AVX:       # BB#0:
 660 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 661 ; AVX-NEXT:    retq
 662   %1 = fmul <4 x float> %a, %b
 663   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 664   ret <4 x float> %2
 665 }
 666
 667 define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
 668 ; SSE-LABEL: insert_test3_div_ss:
 669 ; SSE:       # BB#0:
 670 ; SSE-NEXT:    divss %xmm1, %xmm0
 671 ; SSE-NEXT:    retq
 672 ;
 673 ; AVX-LABEL: insert_test3_div_ss:
 674 ; AVX:       # BB#0:
 675 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 676 ; AVX-NEXT:    retq
 677   %1 = fdiv <4 x float> %a, %b
 678   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
 679   ret <4 x float> %2
 680 }
 681
 682 define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
 683 ; SSE-LABEL: insert_test3_add_sd:
 684 ; SSE:       # BB#0:
 685 ; SSE-NEXT:    addsd %xmm1, %xmm0
 686 ; SSE-NEXT:    retq
 687 ;
 688 ; AVX-LABEL: insert_test3_add_sd:
 689 ; AVX:       # BB#0:
 690 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 691 ; AVX-NEXT:    retq
 692   %1 = fadd <2 x double> %a, %b
 693   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 694   ret <2 x double> %2
 695 }
 696
 697 define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
 698 ; SSE-LABEL: insert_test3_sub_sd:
 699 ; SSE:       # BB#0:
 700 ; SSE-NEXT:    subsd %xmm1, %xmm0
 701 ; SSE-NEXT:    retq
 702 ;
 703 ; AVX-LABEL: insert_test3_sub_sd:
 704 ; AVX:       # BB#0:
 705 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 706 ; AVX-NEXT:    retq
 707   %1 = fsub <2 x double> %a, %b
 708   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 709   ret <2 x double> %2
 710 }
 711
 712 define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
 713 ; SSE-LABEL: insert_test3_mul_sd:
 714 ; SSE:       # BB#0:
 715 ; SSE-NEXT:    mulsd %xmm1, %xmm0
 716 ; SSE-NEXT:    retq
 717 ;
 718 ; AVX-LABEL: insert_test3_mul_sd:
 719 ; AVX:       # BB#0:
 720 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 721 ; AVX-NEXT:    retq
 722   %1 = fmul <2 x double> %a, %b
 723   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 724   ret <2 x double> %2
 725 }
 726
 727 define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
 728 ; SSE-LABEL: insert_test3_div_sd:
 729 ; SSE:       # BB#0:
 730 ; SSE-NEXT:    divsd %xmm1, %xmm0
 731 ; SSE-NEXT:    retq
 732 ;
 733 ; AVX-LABEL: insert_test3_div_sd:
 734 ; AVX:       # BB#0:
 735 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 736 ; AVX-NEXT:    retq
 737   %1 = fdiv <2 x double> %a, %b
 738   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
 739   ret <2 x double> %2
 740 }
 741
 742 define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
 743 ; SSE-LABEL: insert_test4_add_ss:
 744 ; SSE:       # BB#0:
 745 ; SSE-NEXT:    addss %xmm0, %xmm1
 746 ; SSE-NEXT:    movaps %xmm1, %xmm0
 747 ; SSE-NEXT:    retq
 748 ;
 749 ; AVX-LABEL: insert_test4_add_ss:
 750 ; AVX:       # BB#0:
 751 ; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
 752 ; AVX-NEXT:    retq
 753   %1 = fadd <4 x float> %b, %a
 754   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 755   ret <4 x float> %2
 756 }
 757
 758 define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
 759 ; SSE-LABEL: insert_test4_sub_ss:
 760 ; SSE:       # BB#0:
 761 ; SSE-NEXT:    subss %xmm0, %xmm1
 762 ; SSE-NEXT:    movaps %xmm1, %xmm0
 763 ; SSE-NEXT:    retq
 764 ;
 765 ; AVX-LABEL: insert_test4_sub_ss:
 766 ; AVX:       # BB#0:
 767 ; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
 768 ; AVX-NEXT:    retq
 769   %1 = fsub <4 x float> %b, %a
 770   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 771   ret <4 x float> %2
 772 }
 773
 774 define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
 775 ; SSE-LABEL: insert_test4_mul_ss:
 776 ; SSE:       # BB#0:
 777 ; SSE-NEXT:    mulss %xmm0, %xmm1
 778 ; SSE-NEXT:    movaps %xmm1, %xmm0
 779 ; SSE-NEXT:    retq
 780 ;
 781 ; AVX-LABEL: insert_test4_mul_ss:
 782 ; AVX:       # BB#0:
 783 ; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
 784 ; AVX-NEXT:    retq
 785   %1 = fmul <4 x float> %b, %a
 786   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 787   ret <4 x float> %2
 788 }
 789
 790 define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
 791 ; SSE-LABEL: insert_test4_div_ss:
 792 ; SSE:       # BB#0:
 793 ; SSE-NEXT:    divss %xmm0, %xmm1
 794 ; SSE-NEXT:    movaps %xmm1, %xmm0
 795 ; SSE-NEXT:    retq
 796 ;
 797 ; AVX-LABEL: insert_test4_div_ss:
 798 ; AVX:       # BB#0:
 799 ; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
 800 ; AVX-NEXT:    retq
 801   %1 = fdiv <4 x float> %b, %a
 802   %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
 803   ret <4 x float> %2
 804 }
 805
 806 define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
 807 ; SSE-LABEL: insert_test4_add_sd:
 808 ; SSE:       # BB#0:
 809 ; SSE-NEXT:    addsd %xmm0, %xmm1
 810 ; SSE-NEXT:    movaps %xmm1, %xmm0
 811 ; SSE-NEXT:    retq
 812 ;
 813 ; AVX-LABEL: insert_test4_add_sd:
 814 ; AVX:       # BB#0:
 815 ; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
 816 ; AVX-NEXT:    retq
 817   %1 = fadd <2 x double> %b, %a
 818   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 819   ret <2 x double> %2
 820 }
 821
 822 define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
 823 ; SSE-LABEL: insert_test4_sub_sd:
 824 ; SSE:       # BB#0:
 825 ; SSE-NEXT:    subsd %xmm0, %xmm1
 826 ; SSE-NEXT:    movaps %xmm1, %xmm0
 827 ; SSE-NEXT:    retq
 828 ;
 829 ; AVX-LABEL: insert_test4_sub_sd:
 830 ; AVX:       # BB#0:
 831 ; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
 832 ; AVX-NEXT:    retq
 833   %1 = fsub <2 x double> %b, %a
 834   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 835   ret <2 x double> %2
 836 }
 837
 838 define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
 839 ; SSE-LABEL: insert_test4_mul_sd:
 840 ; SSE:       # BB#0:
 841 ; SSE-NEXT:    mulsd %xmm0, %xmm1
 842 ; SSE-NEXT:    movaps %xmm1, %xmm0
 843 ; SSE-NEXT:    retq
 844 ;
 845 ; AVX-LABEL: insert_test4_mul_sd:
 846 ; AVX:       # BB#0:
 847 ; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
 848 ; AVX-NEXT:    retq
 849   %1 = fmul <2 x double> %b, %a
 850   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 851   ret <2 x double> %2
 852 }
 853
 854 define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
 855 ; SSE-LABEL: insert_test4_div_sd:
 856 ; SSE:       # BB#0:
 857 ; SSE-NEXT:    divsd %xmm0, %xmm1
 858 ; SSE-NEXT:    movaps %xmm1, %xmm0
 859 ; SSE-NEXT:    retq
 860 ;
 861 ; AVX-LABEL: insert_test4_div_sd:
 862 ; AVX:       # BB#0:
 863 ; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
 864 ; AVX-NEXT:    retq
 865   %1 = fdiv <2 x double> %b, %a
 866   %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
 867   ret <2 x double> %2
 868 }