setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
- // [SU][MIN|MAX] are available for all NEON types apart from i64.
+ // [SU][MIN|MAX] and [SU]ABSDIFF are available for all NEON types apart from
+ // i64.
if (!VT.isFloatingPoint() &&
VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
- for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
+ ISD::SABSDIFF, ISD::UABSDIFF})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
if (Subtarget->isLittleEndian()) {
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
+static SDValue tryCombineLongOpWithDup(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
+ bool IsIntrinsic = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
+ SDValue LHS = N->getOperand(IsIntrinsic ? 1 : 0);
+ SDValue RHS = N->getOperand(IsIntrinsic ? 2 : 1);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
return SDValue();
}
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
+ // N could either be an intrinsic or a sabsdiff/uabsdiff node.
+ if (IsIntrinsic)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
+ else
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_sabd:
+ return DAG.getNode(ISD::SABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_uabd:
+ return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
- return tryCombineLongOpWithDup(IID, N, DCI, DAG);
+ return tryCombineLongOpWithDup(N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ (N->getOperand(0).getOpcode() == ISD::SABSDIFF ||
+ N->getOperand(0).getOpcode() == ISD::UABSDIFF)) {
SDNode *ABDNode = N->getOperand(0).getNode();
- unsigned IID = getIntrinsicID(ABDNode);
- if (IID == Intrinsic::aarch64_neon_sabd ||
- IID == Intrinsic::aarch64_neon_uabd) {
- SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
+ SDValue NewABD = tryCombineLongOpWithDup(ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
- }
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
}
// This is effectively a custom type legalization for AArch64.
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+ TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+ TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- int_aarch64_neon_sabd>;
+ sabsdiff>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- int_aarch64_neon_sabd>;
+ sabsdiff>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- int_aarch64_neon_uabd>;
+ uabsdiff>;
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_aarch64_neon_uabd>;
+ uabsdiff>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",