From 2bc87a6f42016238e4068afba576fe7678d182f0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 24 Oct 2015 18:44:52 +0000 Subject: [PATCH] [DAGCombiner] Generalize masking of constant rotates. We don't need a mask of a rotation result to be a constant splat - any constant scalar/vector can be usefully folded. Followup to D13851. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251197 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++--- test/CodeGen/X86/vector-rotate-128.ll | 52 ++++++++---------------- test/CodeGen/X86/vector-rotate-256.ll | 38 +++++++---------- 3 files changed, 42 insertions(+), 63 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 53d47c7165e..7cc318ec423 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3796,7 +3796,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isConstOrConstSplat(Op.getOperand(1))) { + if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3997,18 +3997,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - APInt Mask = APInt::getAllOnesValue(EltSizeInBits); + APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); + SDValue Mask = DAG.getConstant(AllBits, DL, VT); if (LHSMask.getNode()) { APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); - Mask &= isConstOrConstSplat(LHSMask)->getAPIntValue() | RHSBits; + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, + DAG.getConstant(RHSBits, DL, VT))); } if (RHSMask.getNode()) { APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); - Mask &= isConstOrConstSplat(RHSMask)->getAPIntValue() | LHSBits; + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, + DAG.getConstant(LHSBits, DL, VT))); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } return Rot.getNode(); diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll index b02b98be067..52601872f90 100644 --- a/test/CodeGen/X86/vector-rotate-128.ll +++ b/test/CodeGen/X86/vector-rotate-128.ll @@ -1446,8 +1446,8 @@ define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind { ; X32-SSE-NEXT: retl %shl = shl <2 x i64> %a, %lshr = lshr <2 x i64> %a, - %rmask = and <2 x i64> %lshr, - %lmask = and <2 x i64> %shl, + %rmask = and <2 x i64> %lshr, + %lmask = and <2 x i64> %shl, %or = or <2 x i64> %lmask, %rmask ret <2 x i64> %or } @@ -1464,38 +1464,20 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: splatconstant_rotate_mask_v4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vpslld $4, %xmm0, %xmm1 -; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: splatconstant_rotate_mask_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vpslld $4, %xmm0, %xmm1 -; AVX2-NEXT: vpsrld $28, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq -; -; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i32: -; XOPAVX1: # BB#0: -; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; XOPAVX1-NEXT: retq +; AVX-LABEL: splatconstant_rotate_mask_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpslld $4, %xmm0, %xmm1 +; AVX-NEXT: vpsrld $28, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq ; -; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i32: -; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 -; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; XOPAVX2-NEXT: retq +; XOP-LABEL: splatconstant_rotate_mask_v4i32: +; XOP: # BB#0: +; XOP-NEXT: vprotd $4, %xmm0, %xmm0 +; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: retq ; ; X32-SSE-LABEL: splatconstant_rotate_mask_v4i32: ; X32-SSE: # BB#0: @@ -1509,8 +1491,8 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; X32-SSE-NEXT: retl %shl = shl <4 x i32> %a, %lshr = lshr <4 x i32> %a, - %rmask = and <4 x i32> %lshr, - %lmask = and <4 x i32> %shl, + %rmask = and <4 x i32> %lshr, + %lmask = and <4 x i32> %shl, %or = or <4 x i32> %lmask, %rmask ret <4 x i32> %or } diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll index 56d2354463e..4cd17c7043a 100644 --- a/test/CodeGen/X86/vector-rotate-256.ll +++ b/test/CodeGen/X86/vector-rotate-256.ll @@ -892,10 +892,8 @@ define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind { ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlq $49, %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -905,20 +903,17 @@ define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: vpsllq $15, %ymm0, %ymm1 ; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64: ; XOPAVX1: # BB#0: -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; XOPAVX1-NEXT: vprotq $15, %xmm1, %xmm1 -; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; XOPAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64: @@ -931,8 +926,8 @@ define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind { ; XOPAVX2-NEXT: retq %shl = shl <4 x i64> %a, %lshr = lshr <4 x i64> %a, - %rmask = and <4 x i64> %lshr, - %lmask = and <4 x i64> %shl, + %rmask = and <4 x i64> %lshr, + %lmask = and <4 x i64> %shl, %or = or <4 x i64> %lmask, %rmask ret <4 x i64> %or } @@ -956,10 +951,8 @@ define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind { ; AVX2: # BB#0: ; AVX2-NEXT: vpslld $4, %ymm0, %ymm1 ; AVX2-NEXT: vpsrld $28, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; @@ -974,17 +967,16 @@ define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind { ; ; XOPAVX2-LABEL: splatconstant_rotate_mask_v8i32: ; XOPAVX2: # BB#0: -; XOPAVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm2 +; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; XOPAVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shl = shl <8 x i32> %a, %lshr = lshr <8 x i32> %a, - %rmask = and <8 x i32> %lshr, - %lmask = and <8 x i32> %shl, + %rmask = and <8 x i32> %lshr, + %lmask = and <8 x i32> %shl, %or = or <8 x i32> %lmask, %rmask ret <8 x i32> %or } -- 2.34.1