+ int ScaleLog = 8 - Shift.getConstantOperandVal(1);
+ if (ScaleLog <= 0 || ScaleLog >= 4 ||
+ Mask != (0xffu << ScaleLog))
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue Eight = DAG.getConstant(8, MVT::i8);
+ SDValue NewMask = DAG.getConstant(0xff, VT);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
+ SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ DAG.RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewMask.getNode()->getNodeId() == -1 ||
+ NewMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ DAG.RepositionNode(X.getNode(), NewMask.getNode());
+ NewMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ DAG.RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (ShlCount.getNode()->getNodeId() == -1 ||
+ ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ DAG.RepositionNode(X.getNode(), ShlCount.getNode());
+ ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Shl.getNode()->getNodeId() == -1 ||
+ Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), Shl.getNode());
+ Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ DAG.ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+}
+
+// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
+// allows us to fold the shift into this addressing mode. Returns false if the
+// transform succeeded.
+static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SHL ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ return true;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
+ return true;
+
+ EVT VT = N.getValueType();
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
+ SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
+
+ // Insert the new nodes into the topological ordering.
+ if (NewMask.getNode()->getNodeId() == -1 ||
+ NewMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ DAG.RepositionNode(X.getNode(), NewMask.getNode());
+ NewMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewAnd.getNode()->getNodeId() == -1 ||
+ NewAnd.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ DAG.RepositionNode(Shift.getNode(), NewAnd.getNode());
+ NewAnd.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (NewShift.getNode()->getNodeId() == -1 ||
+ NewShift.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewShift.getNode());
+ NewShift.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ DAG.ReplaceAllUsesWith(N, NewShift);
+
+ AM.Scale = 1 << ShiftAmt;
+ AM.IndexReg = NewAnd;
+ return false;
+}
+
+// Implement some heroics to detect shifts of masked values where the mask can
+// be replaced by extending the shift and undoing that in the addressing mode
+// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
+// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
+// the addressing mode. This results in code such as:
+//
+// int f(short *y, int *lookup_table) {
+// ...
+// return *y + lookup_table[*y >> 11];
+// }
+//
+// Turning into:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $11, %ecx
+// addl (%rsi,%rcx,4), %eax
+//
+// Instead of:
+// movzwl (%rdi), %eax
+// movl %eax, %ecx
+// shrl $9, %ecx
+// andl $124, %rcx
+// addl (%rsi,%rcx), %eax
+//
+// Note that this function assumes the mask is provided as a mask *after* the
+// value is shifted. The input chain may or may not match that, but computing
+// such a mask is trivial.
+static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+ uint64_t Mask,
+ SDValue Shift, SDValue X,
+ X86ISelAddressMode &AM) {
+ if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
+ !isa<ConstantSDNode>(Shift.getOperand(1)))
+ return true;
+
+ unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+ unsigned MaskLZ = CountLeadingZeros_64(Mask);
+ unsigned MaskTZ = CountTrailingZeros_64(Mask);
+
+ // The amount of shift we're trying to fit into the addressing mode is taken
+ // from the trailing zeros of the mask.
+ unsigned AMShiftAmt = MaskTZ;
+
+ // There is nothing we can do here unless the mask is removing some bits.
+ // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+ if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+ // We also need to ensure that mask is a continuous run of bits.
+ if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+
+ // Scale the leading zero count down based on the actual size of the value.
+ // Also scale it down based on the size of the shift.
+ MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+
+ // The final check is to ensure that any masked out high bits of X are
+ // already known to be zero. Otherwise, the mask has a semantic impact
+ // other than masking out a couple of low bits. Unfortunately, because of
+ // the mask, zero extensions will be removed from operands in some cases.
+ // This code works extra hard to look through extensions because we can
+ // replace them with zero extensions cheaply if necessary.
+ bool ReplacingAnyExtend = false;
+ if (X.getOpcode() == ISD::ANY_EXTEND) {
+ unsigned ExtendBits =
+ X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+ // Assume that we'll replace the any-extend with a zero-extend, and
+ // narrow the search to the extended value.
+ X = X.getOperand(0);
+ MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
+ ReplacingAnyExtend = true;
+ }
+ APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
+ MaskLZ);
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne);
+ if (MaskedHighBits != KnownZero) return true;
+
+ // We've identified a pattern that can be transformed into a single shift
+ // and an addressing mode. Make it so.
+ EVT VT = N.getValueType();
+ if (ReplacingAnyExtend) {
+ assert(X.getValueType() != VT);
+ // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
+ SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X);
+ if (NewX.getNode()->getNodeId() == -1 ||
+ NewX.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewX.getNode());
+ NewX.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ X = NewX;
+ }
+ DebugLoc DL = N.getDebugLoc();
+ SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+ SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
+ if (NewSRLAmt.getNode()->getNodeId() == -1 ||
+ NewSRLAmt.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewSRLAmt.getNode());
+ NewSRLAmt.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (NewSRL.getNode()->getNodeId() == -1 ||
+ NewSRL.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewSRL.getNode());
+ NewSRL.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (NewSHLAmt.getNode()->getNodeId() == -1 ||
+ NewSHLAmt.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewSHLAmt.getNode());
+ NewSHLAmt.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (NewSHL.getNode()->getNodeId() == -1 ||
+ NewSHL.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ DAG.RepositionNode(N.getNode(), NewSHL.getNode());
+ NewSHL.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ DAG.ReplaceAllUsesWith(N, NewSHL);
+
+ AM.Scale = 1 << AMShiftAmt;
+ AM.IndexReg = NewSRL;
+ return false;