From 29f50e978353954afd7db22ef842edbea0066518 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 13 Sep 2015 11:28:45 +0000 Subject: [PATCH] [X86][MMX] Added shuffle decodes for MMX/3DNow! shuffles. Added shuffle decodes for MMX PUNPCK + PSHUFW shuffles. Added shuffle decodes for 3DNow! PSWAPD shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247526 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/InstPrinter/X86InstComments.cpp | 61 ++++ lib/Target/X86/Utils/X86ShuffleDecode.cpp | 15 +- lib/Target/X86/Utils/X86ShuffleDecode.h | 3 + test/CodeGen/X86/3dnow-intrinsics.ll | 4 +- test/CodeGen/X86/mmx-intrinsics.ll | 291 ++++++++++++------ 5 files changed, 273 insertions(+), 101 deletions(-) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 91b144a4482..247b969cb07 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -438,6 +438,46 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, ShuffleMask); break; + case X86::MMX_PSHUFWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::MMX_PSHUFWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + if (MI->getOperand(MI->getNumOperands() - 1).isImm()) + DecodePSHUFMask(MVT::v4i16, + MI->getOperand(MI->getNumOperands() - 1).getImm(), + ShuffleMask); + break; + + case X86::PSWAPDrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSWAPDrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSWAPMask(MVT::v2i32, ShuffleMask); + break; + + case X86::MMX_PUNPCKHBWirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKHBWirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v8i8, ShuffleMask); + break; + case X86::MMX_PUNPCKHWDirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKHWDirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v4i16, ShuffleMask); + break; + case X86::MMX_PUNPCKHDQirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKHDQirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v2i32, ShuffleMask); + break; case X86::PUNPCKHBWrr: case X86::VPUNPCKHBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -527,6 +567,27 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeUNPCKHMask(MVT::v8i64, ShuffleMask); break; + case X86::MMX_PUNPCKLBWirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKLBWirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v8i8, ShuffleMask); + break; + case X86::MMX_PUNPCKLWDirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKLWDirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v4i16, ShuffleMask); + break; + case X86::MMX_PUNPCKLDQirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + case X86::MMX_PUNPCKLDQirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v2i32, ShuffleMask); + break; case X86::PUNPCKLBWrr: case X86::VPUNPCKLBWrr: Src2Name = getRegName(MI->getOperand(2).getReg()); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 17acdb3aa21..e66c0ff11eb 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -140,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, } } -/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. +/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; unsigned NewImm = Imm; @@ -191,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm, } } +void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumHalfElts = NumElts / 2; + + for (unsigned l = 0; l != NumHalfElts; ++l) + ShuffleMask.push_back(l + NumHalfElts); + for (unsigned h = 0; h != NumHalfElts; ++h) + ShuffleMask.push_back(h); +} + /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. @@ -222,7 +233,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask) { // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. unsigned NumLanes = VT.getSizeInBits() / 128; - if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 47a335d8141..81f8c5dc9ab 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -54,6 +54,9 @@ void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl &ShuffleMask); +/// \brief Decodes a PSWAPD 3DNow! instruction. +void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask); + /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll index 0b27bf2d185..fe8b95ec465 100644 --- a/test/CodeGen/X86/3dnow-intrinsics.ll +++ b/test/CodeGen/X86/3dnow-intrinsics.ll @@ -277,7 +277,7 @@ entry: declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { -; CHECK: pswapd +; CHECK: pswapd {{.*#+}} mm0 = mem[1,0] entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) @@ -286,7 +286,7 @@ entry: } define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { -; CHECK: pswapd +; CHECK: pswapd {{.*#+}} mm0 = mem[1,0] entry: %0 = bitcast <2 x i32> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) diff --git a/test/CodeGen/X86/mmx-intrinsics.ll b/test/CodeGen/X86/mmx-intrinsics.ll index 39d481b16e7..d9bcdc4effd 100644 --- a/test/CodeGen/X86/mmx-intrinsics.ll +++ b/test/CodeGen/X86/mmx-intrinsics.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s -; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s -; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s -; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 +; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phaddw +; ALL-LABEL: @test1 +; ALL: phaddw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -22,7 +23,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpgtd +; ALL-LABEL: @test88 +; ALL: pcmpgtd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -38,7 +40,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpgtw +; ALL-LABEL: @test87 +; ALL: pcmpgtw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -54,7 +57,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpgtb +; ALL-LABEL: @test86 +; ALL: pcmpgtb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -70,7 +74,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpeqd +; ALL-LABEL: @test85 +; ALL: pcmpeqd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -86,7 +91,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpeqw +; ALL-LABEL: @test84 +; ALL: pcmpeqw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -102,7 +108,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pcmpeqb +; ALL-LABEL: @test83 +; ALL: pcmpeqb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -118,7 +125,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpckldq +; ALL-LABEL: @test82 +; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0] +; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0] entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -134,7 +143,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpcklwd +; ALL-LABEL: @test81 +; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1] +; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1] entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -150,7 +161,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpcklbw +; ALL-LABEL: @test80 +; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] +; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -166,7 +179,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpckhdq +; ALL-LABEL: @test79 +; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1] +; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1] entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -182,7 +197,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpckhwd +; ALL-LABEL: @test78 +; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3] +; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3] entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -198,7 +215,9 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: punpckhbw +; ALL-LABEL: @test77 +; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] +; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -214,7 +233,8 @@ entry: declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: packuswb +; ALL-LABEL: @test76 +; ALL: packuswb entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -230,7 +250,8 @@ entry: declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: packssdw +; ALL-LABEL: @test75 +; ALL: packssdw entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -246,7 +267,8 @@ entry: declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: packsswb +; ALL-LABEL: @test74 +; ALL: packsswb entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -262,7 +284,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psrad +; ALL-LABEL: @test73 +; ALL: psrad entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -276,7 +299,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psraw +; ALL-LABEL: @test72 +; ALL: psraw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -290,7 +314,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psrlq +; ALL-LABEL: @test71 +; ALL: psrlq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -302,7 +327,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psrld +; ALL-LABEL: @test70 +; ALL: psrld entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -316,7 +342,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psrlw +; ALL-LABEL: @test69 +; ALL: psrlw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -330,7 +357,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psllq +; ALL-LABEL: @test68 +; ALL: psllq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -342,7 +370,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pslld +; ALL-LABEL: @test67 +; ALL: pslld entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -356,7 +385,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: psllw +; ALL-LABEL: @test66 +; ALL: psllw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -370,7 +400,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psrad +; ALL-LABEL: @test65 +; ALL: psrad entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -386,7 +417,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psraw +; ALL-LABEL: @test64 +; ALL: psraw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -402,7 +434,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psrlq +; ALL-LABEL: @test63 +; ALL: psrlq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -416,7 +449,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psrld +; ALL-LABEL: @test62 +; ALL: psrld entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -432,7 +466,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psrlw +; ALL-LABEL: @test61 +; ALL: psrlw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -448,7 +483,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psllq +; ALL-LABEL: @test60 +; ALL: psllq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -462,7 +498,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pslld +; ALL-LABEL: @test59 +; ALL: pslld entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -478,7 +515,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psllw +; ALL-LABEL: @test58 +; ALL: psllw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -494,7 +532,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pxor +; ALL-LABEL: @test56 +; ALL: pxor entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -510,7 +549,8 @@ entry: declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: por +; ALL-LABEL: @test55 +; ALL: por entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -526,7 +566,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pandn +; ALL-LABEL: @test54 +; ALL: pandn entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -542,7 +583,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pand +; ALL-LABEL: @test53 +; ALL: pand entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -558,7 +600,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmullw +; ALL-LABEL: @test52 +; ALL: pmullw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -572,7 +615,8 @@ entry: } define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmullw +; ALL-LABEL: @test51 +; ALL: pmullw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -588,7 +632,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmulhw +; ALL-LABEL: @test50 +; ALL: pmulhw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -604,7 +649,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmaddwd +; ALL-LABEL: @test49 +; ALL: pmaddwd entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -620,7 +666,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubusw +; ALL-LABEL: @test48 +; ALL: psubusw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -636,7 +683,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubusb +; ALL-LABEL: @test47 +; ALL: psubusb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -652,7 +700,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubsw +; ALL-LABEL: @test46 +; ALL: psubsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -668,7 +717,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubsb +; ALL-LABEL: @test45 +; ALL: psubsb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -682,7 +732,8 @@ entry: } define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubq +; ALL-LABEL: @test44 +; ALL: psubq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -698,7 +749,8 @@ declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubd +; ALL-LABEL: @test43 +; ALL: psubd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -714,7 +766,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubw +; ALL-LABEL: @test42 +; ALL: psubw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -730,7 +783,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psubb +; ALL-LABEL: @test41 +; ALL: psubb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -746,7 +800,8 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddusw +; ALL-LABEL: @test40 +; ALL: paddusw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -762,7 +817,8 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddusb +; ALL-LABEL: @test39 +; ALL: paddusb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -778,7 +834,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddsw +; ALL-LABEL: @test38 +; ALL: paddsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -794,7 +851,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddsb +; ALL-LABEL: @test37 +; ALL: paddsb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -810,7 +868,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddq +; ALL-LABEL: @test36 +; ALL: paddq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -824,7 +883,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddd +; ALL-LABEL: @test35 +; ALL: paddd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -840,7 +900,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddw +; ALL-LABEL: @test34 +; ALL: paddw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -856,7 +917,8 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: paddb +; ALL-LABEL: @test33 +; ALL: paddb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -872,7 +934,8 @@ entry: declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psadbw +; ALL-LABEL: @test32 +; ALL: psadbw entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -886,7 +949,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pminsw +; ALL-LABEL: @test31 +; ALL: pminsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -902,7 +966,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pminub +; ALL-LABEL: @test30 +; ALL: pminub entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -918,7 +983,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmaxsw +; ALL-LABEL: @test29 +; ALL: pmaxsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -934,7 +1000,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmaxub +; ALL-LABEL: @test28 +; ALL: pmaxub entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -950,7 +1017,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pavgw +; ALL-LABEL: @test27 +; ALL: pavgw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -966,7 +1034,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pavgb +; ALL-LABEL: @test26 +; ALL: pavgb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -982,7 +1051,8 @@ entry: declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp { -; CHECK: movntq +; ALL-LABEL: @test25 +; ALL: movntq entry: %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx* %0 = extractelement <1 x i64> %a, i32 0 @@ -994,7 +1064,8 @@ entry: declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pmovmskb +; ALL-LABEL: @test24 +; ALL: pmovmskb entry: %0 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx @@ -1005,7 +1076,8 @@ entry: declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp { -; CHECK: maskmovq +; ALL-LABEL: @test23 +; ALL: maskmovq entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> @@ -1018,7 +1090,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmulhuw +; ALL-LABEL: @test22 +; ALL: pmulhuw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1034,7 +1107,9 @@ entry: declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pshufw +; ALL-LABEL: @test21 +; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] +; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1046,9 +1121,10 @@ entry: } define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: test21_2 -; CHECK: pshufw -; CHECK: movd +; ALL-LABEL: @test21_2 +; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] +; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] +; ALL: movd entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1062,7 +1138,8 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmuludq +; ALL-LABEL: @test20 +; ALL: pmuludq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1076,7 +1153,8 @@ entry: declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: cvtpi2pd +; ALL-LABEL: @test19 +; ALL: cvtpi2pd entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -1087,7 +1165,8 @@ entry: declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { -; CHECK: cvttpd2pi +; ALL-LABEL: @test18 +; ALL: cvttpd2pi entry: %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -1099,7 +1178,8 @@ entry: declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { -; CHECK: cvtpd2pi +; ALL-LABEL: @test17 +; ALL: cvtpd2pi entry: %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -1111,7 +1191,8 @@ entry: declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: palignr +; ALL-LABEL: @test16 +; ALL: palignr entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -1125,7 +1206,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pabsd +; ALL-LABEL: @test15 +; ALL: pabsd entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -1139,7 +1221,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pabsw +; ALL-LABEL: @test14 +; ALL: pabsw entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1153,7 +1236,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { -; CHECK: pabsb +; ALL-LABEL: @test13 +; ALL: pabsb entry: %0 = bitcast <1 x i64> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to x86_mmx @@ -1167,7 +1251,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psignd +; ALL-LABEL: @test12 +; ALL: psignd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1183,7 +1268,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psignw +; ALL-LABEL: @test11 +; ALL: psignw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1199,7 +1285,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: psignb +; ALL-LABEL: @test10 +; ALL: psignb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1215,7 +1302,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pshufb +; ALL-LABEL: @test9 +; ALL: pshufb entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1231,7 +1319,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmulhrsw +; ALL-LABEL: @test8 +; ALL: pmulhrsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1247,7 +1336,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: pmaddubsw +; ALL-LABEL: @test7 +; ALL: pmaddubsw entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1263,7 +1353,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phsubsw +; ALL-LABEL: @test6 +; ALL: phsubsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1279,7 +1370,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phsubd +; ALL-LABEL: @test5 +; ALL: phsubd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1295,7 +1387,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phsubw +; ALL-LABEL: @test4 +; ALL: phsubw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1311,7 +1404,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phaddsw +; ALL-LABEL: @test3 +; ALL: phaddsw entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1327,7 +1421,8 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; CHECK: phaddd +; ALL-LABEL: @test2 +; ALL: phaddd entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1341,16 +1436,18 @@ entry: } define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { -; CHECK: cvtpi2ps +; ALL-LABEL: @test89 +; ALL: cvtpi2ps %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) ret <4 x float> %c } declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone -; CHECK-LABEL: test90 +; ALL-LABEL: test90 define void @test90() { -; CHECK: emms +; ALL-LABEL: @test90 +; ALL: emms call void @llvm.x86.mmx.emms() ret void } -- 2.34.1