From: Simon Pilgrim Date: Wed, 19 Aug 2015 21:11:58 +0000 (+0000) Subject: [DAGCombiner] Added SMAX/SMIN/UMAX/UMIN constant folding X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0898cdd5182c21090ab805fabb56136cef3ed01a;p=oota-llvm.git [DAGCombiner] Added SMAX/SMIN/UMAX/UMIN constant folding We still need to add constant folding of vector comparisons to fold the tests for targets that don't support the respective min/max nodes I needed to update 2011-12-06-AVXVectorExtractCombine to load a vector instead of using a constant vector to prevent it folding Differential Revision: http://reviews.llvm.org/D12118 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245503 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index cbffe006bee..11249f05986 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -687,7 +687,7 @@ public: SDValue N3, SDValue N4); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5); - + // Specialize again based on number of operands for nodes with a VTList // rather than a single VT. SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs); @@ -1072,6 +1072,10 @@ public: // target info. switch (Opcode) { case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::MUL: case ISD::MULHU: case ISD::MULHS: diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bb8bf334485..d58c33a499d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -245,6 +245,7 @@ namespace { SDValue visitUMULO(SDNode *N); SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); + SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); @@ -1341,6 +1342,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UMULO: return visitUMULO(N); case ISD::SDIVREM: return visitSDIVREM(N); case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -2624,6 +2629,30 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + + return SDValue(); +} + /// If this is a binary operator with two operands of the same opcode, try to /// simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e3e83748a1..0ae52b2caf0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3182,6 +3182,10 @@ static std::pair FoldValue(unsigned Opcode, const APInt &C1, case ISD::SRA: return std::make_pair(C1.ashr(C2), true); case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); case ISD::UDIV: if (!C2.getBoolValue()) break; @@ -3356,6 +3360,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::MUL: case ISD::SDIV: case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -5273,7 +5281,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = + MaskedGatherSDNode *N = new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, VTs, VT, MMO); CSEMap.InsertNode(N, IP); diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll index 2a1a5c9fb3e..e6ba7551421 100644 --- a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll +++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll @@ -4,13 +4,14 @@ define void @test(<4 x i32>* nocapture %p) nounwind { ; CHECK-LABEL: test: ; CHECK: vpxor %xmm0, %xmm0, %xmm0 - ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0 - ; CHECK-NEXT: vmovdqu %xmm0, (%rdi) + ; CHECK-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 + ; CHECK-NEXT: vmovdqu %xmm0, (%rdi) ; CHECK-NEXT: ret - %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> , <4 x i32> zeroinitializer) nounwind - %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> - %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> - store <4 x i32> %c, <4 x i32>* %p, align 1 + %a = load <4 x i32>, <4 x i32>* %p, align 1 + %b = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind + %c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> + %d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + store <4 x i32> %d, <4 x i32>* %p, align 1 ret void } diff --git a/test/CodeGen/X86/vec_minmax_sint.ll b/test/CodeGen/X86/vec_minmax_sint.ll index 618c6a7f682..80ceb80663a 100644 --- a/test/CodeGen/X86/vec_minmax_sint.ll +++ b/test/CodeGen/X86/vec_minmax_sint.ll @@ -1807,20 +1807,17 @@ define <4 x i32> @max_gt_v4i32c() { ; ; SSE41-LABEL: max_gt_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_gt_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -1850,40 +1847,20 @@ define <8 x i32> @max_gt_v8i32c() { ; ; SSE41-LABEL: max_gt_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_gt_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_gt_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_gt_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_gt_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp sgt <8 x i32> %1, %2 @@ -1894,14 +1871,12 @@ define <8 x i32> @max_gt_v8i32c() { define <8 x i16> @max_gt_v8i16c() { ; SSE-LABEL: max_gt_v8i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE-NEXT: retq ; ; AVX-LABEL: max_gt_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -1913,32 +1888,14 @@ define <8 x i16> @max_gt_v8i16c() { define <16 x i16> @max_gt_v16i16c() { ; SSE-LABEL: max_gt_v16i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0 -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE-NEXT: retq ; -; AVX1-LABEL: max_gt_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_gt_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_gt_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_gt_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp sgt <16 x i16> %1, %2 @@ -1960,20 +1917,17 @@ define <16 x i8> @max_gt_v16i8c() { ; ; SSE41-LABEL: max_gt_v16i8c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v16i8c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_gt_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 @@ -2213,20 +2167,17 @@ define <4 x i32> @max_ge_v4i32c() { ; ; SSE41-LABEL: max_ge_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_ge_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -2260,40 +2211,20 @@ define <8 x i32> @max_ge_v8i32c() { ; ; SSE41-LABEL: max_ge_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxsd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxsd {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_ge_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpmaxsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_ge_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_ge_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpmaxsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_ge_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp sge <8 x i32> %1, %2 @@ -2304,14 +2235,12 @@ define <8 x i32> @max_ge_v8i32c() { define <8 x i16> @max_ge_v8i16c() { ; SSE-LABEL: max_ge_v8i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE-NEXT: retq ; ; AVX-LABEL: max_ge_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -2323,32 +2252,14 @@ define <8 x i16> @max_ge_v8i16c() { define <16 x i16> @max_ge_v16i16c() { ; SSE-LABEL: max_ge_v16i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm0 -; SSE-NEXT: pmaxsw {{.*}}(%rip), %xmm1 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE-NEXT: retq ; -; AVX1-LABEL: max_ge_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpmaxsw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_ge_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_ge_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpmaxsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_ge_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp sge <16 x i16> %1, %2 @@ -2372,20 +2283,17 @@ define <16 x i8> @max_ge_v16i8c() { ; ; SSE41-LABEL: max_ge_v16i8c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE41-NEXT: pmaxsb {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v16i8c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE42-NEXT: pmaxsb {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_ge_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpmaxsb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 @@ -2601,20 +2509,17 @@ define <4 x i32> @min_lt_v4i32c() { ; ; SSE41-LABEL: min_lt_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_lt_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -2644,40 +2549,20 @@ define <8 x i32> @min_lt_v8i32c() { ; ; SSE41-LABEL: min_lt_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_lt_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_lt_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_lt_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_lt_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp slt <8 x i32> %1, %2 @@ -2688,14 +2573,12 @@ define <8 x i32> @min_lt_v8i32c() { define <8 x i16> @min_lt_v8i16c() { ; SSE-LABEL: min_lt_v8i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; SSE-NEXT: retq ; ; AVX-LABEL: min_lt_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -2707,32 +2590,14 @@ define <8 x i16> @min_lt_v8i16c() { define <16 x i16> @min_lt_v16i16c() { ; SSE-LABEL: min_lt_v16i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: min_lt_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_lt_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_lt_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_lt_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp slt <16 x i16> %1, %2 @@ -2754,20 +2619,17 @@ define <16 x i8> @min_lt_v16i8c() { ; ; SSE41-LABEL: min_lt_v16i8c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v16i8c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_lt_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 @@ -3007,20 +2869,17 @@ define <4 x i32> @min_le_v4i32c() { ; ; SSE41-LABEL: min_le_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_le_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -3054,40 +2913,20 @@ define <8 x i32> @min_le_v8i32c() { ; ; SSE41-LABEL: min_le_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminsd {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminsd {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_le_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpminsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_le_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_le_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpminsd {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_le_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp sle <8 x i32> %1, %2 @@ -3098,14 +2937,12 @@ define <8 x i32> @min_le_v8i32c() { define <8 x i16> @min_le_v8i16c() { ; SSE-LABEL: min_le_v8i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; SSE-NEXT: retq ; ; AVX-LABEL: min_le_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -3117,32 +2954,14 @@ define <8 x i16> @min_le_v8i16c() { define <16 x i16> @min_le_v16i16c() { ; SSE-LABEL: min_le_v16i16c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSE-NEXT: pminsw {{.*}}(%rip), %xmm1 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] +; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE-NEXT: retq ; -; AVX1-LABEL: min_le_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpminsw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_le_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_le_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpminsw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_le_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp sle <16 x i16> %1, %2 @@ -3166,20 +2985,17 @@ define <16 x i8> @min_le_v16i8c() { ; ; SSE41-LABEL: min_le_v16i8c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE41-NEXT: pminsb {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v16i8c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE42-NEXT: pminsb {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_le_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpminsb {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 diff --git a/test/CodeGen/X86/vec_minmax_uint.ll b/test/CodeGen/X86/vec_minmax_uint.ll index 25b672c7068..63afd777abb 100644 --- a/test/CodeGen/X86/vec_minmax_uint.ll +++ b/test/CodeGen/X86/vec_minmax_uint.ll @@ -1942,20 +1942,17 @@ define <4 x i32> @max_gt_v4i32c() { ; ; SSE41-LABEL: max_gt_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_gt_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -1983,40 +1980,20 @@ define <8 x i32> @max_gt_v8i32c() { ; ; SSE41-LABEL: max_gt_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_gt_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_gt_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_gt_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_gt_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp ugt <8 x i32> %1, %2 @@ -2037,20 +2014,17 @@ define <8 x i16> @max_gt_v8i16c() { ; ; SSE41-LABEL: max_gt_v8i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v8i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_gt_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -2078,40 +2052,20 @@ define <16 x i16> @max_gt_v16i16c() { ; ; SSE41-LABEL: max_gt_v16i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_gt_v16i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_gt_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_gt_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_gt_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_gt_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp ugt <16 x i16> %1, %2 @@ -2122,14 +2076,12 @@ define <16 x i16> @max_gt_v16i16c() { define <16 x i8> @max_gt_v16i8c() { ; SSE-LABEL: max_gt_v16i8c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE-NEXT: retq ; ; AVX-LABEL: max_gt_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 @@ -2364,20 +2316,17 @@ define <4 x i32> @max_ge_v4i32c() { ; ; SSE41-LABEL: max_ge_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_ge_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -2407,40 +2356,20 @@ define <8 x i32> @max_ge_v8i32c() { ; ; SSE41-LABEL: max_ge_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxud {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxud {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_ge_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpmaxud {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_ge_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_ge_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpmaxud {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_ge_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp uge <8 x i32> %1, %2 @@ -2464,20 +2393,17 @@ define <8 x i16> @max_ge_v8i16c() { ; ; SSE41-LABEL: max_ge_v8i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v8i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; SSE42-NEXT: retq ; ; AVX-LABEL: max_ge_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -2510,40 +2436,20 @@ define <16 x i16> @max_ge_v16i16c() { ; ; SSE41-LABEL: max_ge_v16i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE41-NEXT: retq ; ; SSE42-LABEL: max_ge_v16i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pmaxuw {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] ; SSE42-NEXT: retq ; -; AVX1-LABEL: max_ge_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpmaxuw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: max_ge_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: max_ge_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpmaxuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: max_ge_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp uge <16 x i16> %1, %2 @@ -2554,14 +2460,12 @@ define <16 x i16> @max_ge_v16i16c() { define <16 x i8> @max_ge_v16i8c() { ; SSE-LABEL: max_ge_v16i8c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE-NEXT: pmaxub {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; SSE-NEXT: retq ; ; AVX-LABEL: max_ge_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpmaxub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0 @@ -2773,20 +2677,17 @@ define <4 x i32> @min_lt_v4i32c() { ; ; SSE41-LABEL: min_lt_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_lt_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -2814,40 +2715,20 @@ define <8 x i32> @min_lt_v8i32c() { ; ; SSE41-LABEL: min_lt_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_lt_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_lt_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_lt_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_lt_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp ult <8 x i32> %1, %2 @@ -2870,20 +2751,17 @@ define <8 x i16> @min_lt_v8i16c() { ; ; SSE41-LABEL: min_lt_v8i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v8i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_lt_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 1, i32 0 @@ -2911,40 +2789,20 @@ define <16 x i16> @min_lt_v16i16c() { ; ; SSE41-LABEL: min_lt_v16i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_lt_v16i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_lt_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_lt_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_lt_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_lt_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 1, i32 0 %3 = icmp ult <16 x i16> %1, %2 @@ -2955,14 +2813,12 @@ define <16 x i16> @min_lt_v16i16c() { define <16 x i8> @min_lt_v16i8c() { ; SSE-LABEL: min_lt_v16i8c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE-NEXT: pminub {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE-NEXT: retq ; ; AVX-LABEL: min_lt_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 1, i32 0 @@ -3197,20 +3053,17 @@ define <4 x i32> @min_le_v4i32c() { ; ; SSE41-LABEL: min_le_v4i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v4i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_le_v4i32c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967295,1,7] -; AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] ; AVX-NEXT: retq %1 = insertelement <4 x i32> , i32 -7, i32 0 %2 = insertelement <4 x i32> , i32 -1, i32 0 @@ -3240,40 +3093,20 @@ define <8 x i32> @min_le_v8i32c() { ; ; SSE41-LABEL: min_le_v8i32c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminud {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v8i32c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,3,5,7] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminud {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_le_v8i32c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967289,4294967291,4294967293,4294967295] -; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,3,5,7] -; AVX1-NEXT: vpminud {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_le_v8i32c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX2-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_le_v8i32c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [4294967289,4294967291,4294967293,4294967295,1,3,5,7] -; AVX512-NEXT: vpminud {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_le_v8i32c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] +; AVX-NEXT: retq %1 = insertelement <8 x i32> , i32 -7, i32 0 %2 = insertelement <8 x i32> , i32 -1, i32 0 %3 = icmp ule <8 x i32> %1, %2 @@ -3297,20 +3130,17 @@ define <8 x i16> @min_le_v8i16c() { ; ; SSE41-LABEL: min_le_v8i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v8i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; SSE42-NEXT: retq ; ; AVX-LABEL: min_le_v8i16c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65531,65533,65535,1,3,5,7] -; AVX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] ; AVX-NEXT: retq %1 = insertelement <8 x i16> , i16 -7, i32 0 %2 = insertelement <8 x i16> , i16 -1, i32 0 @@ -3343,40 +3173,20 @@ define <16 x i16> @min_le_v16i16c() { ; ; SSE41-LABEL: min_le_v16i16c: ; SSE41: # BB#0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pminuw {{.*}}(%rip), %xmm1 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] +; SSE41-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE41-NEXT: retq ; ; SSE42-LABEL: min_le_v16i16c: ; SSE42: # BB#0: -; SSE42-NEXT: movdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm0 -; SSE42-NEXT: pminuw {{.*}}(%rip), %xmm1 +; SSE42-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] +; SSE42-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] ; SSE42-NEXT: retq ; -; AVX1-LABEL: min_le_v16i16c: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [65529,65530,65531,65532,65533,65534,65535,0] -; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4,5,6,7,8] -; AVX1-NEXT: vpminuw {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: min_le_v16i16c: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX2-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: min_le_v16i16c: -; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa {{.*#+}} ymm0 = [65529,65530,65531,65532,65533,65534,65535,0,1,2,3,4,5,6,7,8] -; AVX512-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: min_le_v16i16c: +; AVX: # BB#0: +; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] +; AVX-NEXT: retq %1 = insertelement <16 x i16> , i16 -7, i32 0 %2 = insertelement <16 x i16> , i16 -1, i32 0 %3 = icmp ule <16 x i16> %1, %2 @@ -3387,14 +3197,12 @@ define <16 x i16> @min_le_v16i16c() { define <16 x i8> @min_le_v16i8c() { ; SSE-LABEL: min_le_v16i8c: ; SSE: # BB#0: -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; SSE-NEXT: pminub {{.*}}(%rip), %xmm0 +; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; SSE-NEXT: retq ; ; AVX-LABEL: min_le_v16i8c: ; AVX: # BB#0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [249,250,251,252,253,254,255,0,1,2,3,4,5,6,7,8] -; AVX-NEXT: vpminub {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] ; AVX-NEXT: retq %1 = insertelement <16 x i8> , i8 -7, i32 0 %2 = insertelement <16 x i8> , i8 -1, i32 0