}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = MF.getFunction()->optForSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
return SelectShiftedRegister(N, true, Reg, Shift);
}
+ bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
+ SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
SDNode *SelectLIBM(SDNode *N);
+ SDNode *SelectFPConvertWithRound(SDNode *N);
SDNode *SelectReadRegister(SDNode *N);
SDNode *SelectWriteRegister(SDNode *N);
private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
+ bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
}
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
+
+ SDNode *GenerateInexactFlagIfNeeded(const SDValue &In, unsigned InTyVariant,
+ SDLoc DL);
};
} // end anonymous namespace
return true;
}
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
// high lane extract.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
SDValue &LaneOp, int &LaneIdx) {
}
// AArch64 mandates that the RHS of the operation must use the smallest
- // register classs that could contain the size being extended from. Thus,
+ // register class that could contain the size being extended from. Thus,
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
// there might not be an actual 32-bit value in the program. We can
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
/// need to create a real ADD instruction from it anyway and there's no point in
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
-/// leads to duplaicated ADRP instructions.
+/// leads to duplicated ADRP instructions.
static bool isWorthFoldingADDlow(SDValue N) {
for (auto Use : N->uses()) {
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
return true;
}
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+ // selected here doesn't support labels/immediates, only base+offset.
+
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
+ RHSC < (0x40 << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
+ }
+ }
+ }
+
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // stp x1, x2, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
if (isa<ConstantSDNode>(RHS)) {
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
unsigned Scale = Log2_32(Size);
- // Skip the immediate can be seleced by load/store addressing mode.
+ // Skip the immediate can be selected by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
// checked by using -ImmOff).
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
// it into an i64.
DstVT = MVT::i32;
}
+ } else if (VT == MVT::f16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
// The resulting code will be at least as good as the original one
// plus it may expose more opportunities for bitfield insert pattern.
// FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM
+ // some optimizations expect AND and not UBFM.
Opd0 = N->getOperand(0);
} else
return false;
/// Does this tree qualify as an attempt to move a bitfield into position,
/// essentially "(and (shl VAL, N), Mask)".
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+ bool BiggerPattern,
SDValue &Src, int &ShiftAmount,
int &MaskWidth) {
EVT VT = Op.getValueType();
Op = Op.getOperand(0);
}
+ // Don't match if the SHL has more than one use, since then we'll end up
+ // generating SHL+UBFIZ instead of just keeping SHL+AND.
+ if (!BiggerPattern && !Op.hasOneUse())
+ return false;
+
uint64_t ShlImm;
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
return false;
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
- // amount.
+ // amount. BiggerPattern is true when this pattern is being matched for BFI,
+ // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+ // which case it is not profitable to insert an extra shift.
+ if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+ return false;
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
return true;
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
- // OR is commutative, check both possibilities (does llvm provide a
- // way to do that directely, e.g., via code matcher?)
- SDValue OrOpd1Val = N->getOperand(1);
- SDNode *OrOpd0 = N->getOperand(0).getNode();
- SDNode *OrOpd1 = N->getOperand(1).getNode();
- for (int i = 0; i < 2;
- ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+ // OR is commutative, check all combinations of operand order and values of
+ // BiggerPattern, i.e.
+ // Opd0, Opd1, BiggerPattern=false
+ // Opd1, Opd0, BiggerPattern=false
+ // Opd0, Opd1, BiggerPattern=true
+ // Opd1, Opd0, BiggerPattern=true
+ // Several of these combinations may match, so check with BiggerPattern=false
+ // first since that will produce better results by matching more instructions
+ // and/or inserting fewer extra instructions.
+ for (int I = 0; I < 4; ++I) {
+
+ bool BiggerPattern = I / 2;
+ SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+ SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+ SDNode *OrOpd1 = OrOpd1Val.getNode();
+
unsigned BFXOpc;
int DstLSB, Width;
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
- NumberOfIgnoredLowBits, true)) {
+ NumberOfIgnoredLowBits, BiggerPattern)) {
// Check that the returned opcode is compatible with the pattern,
// i.e., same type and zero extended (U and not S)
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
// If the mask on the insertee is correct, we have a BFXIL operation. We
// can share the ImmR and ImmS values from the already-computed UBFM.
- } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
- DstLSB, Width)) {
+ } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+ BiggerPattern,
+ Src, DstLSB, Width)) {
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
ImmS = Width - 1;
} else
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return nullptr;
+
+ EVT VT = N->getValueType(0);
+ unsigned Opc;
+ if (VT == MVT::i32)
+ Opc = AArch64::UBFMWri;
+ else if (VT == MVT::i64)
+ Opc = AArch64::UBFMXri;
+ else
+ return nullptr;
+
+ SDValue Op0;
+ int DstLSB, Width;
+ if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+ Op0, DstLSB, Width))
+ return nullptr;
+
+ // ImmR is the rotate right amount.
+ unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+ // ImmS is the most significant bit of the source to be moved.
+ unsigned ImmS = Width - 1;
+
+ SDLoc DL(N);
+ SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+ CurDAG->getTargetConstant(ImmS, DL, VT)};
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+}
+
+/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
+/// signal on round-to-integer operations if needed. C11 leaves it
+/// implementation-defined whether these operations trigger an inexact
+/// exception. IEEE says they don't. Unfortunately, Darwin decided they do so
+/// we sometimes have to insert a special instruction just to set the right bit
+/// in FPSR.
+SDNode *AArch64DAGToDAGISel::GenerateInexactFlagIfNeeded(const SDValue &In,
+ unsigned InTyVariant,
+ SDLoc DL) {
+ if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
+ // Pick the right FRINTX using InTyVariant needed to set the flags.
+ // InTyVariant is 0 for 32-bit and 1 for 64-bit.
+ unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
+ return CurDAG->getMachineNode(FRINTXOpcs[InTyVariant], DL,
+ In.getValueType(), MVT::Glue, In);
+ }
+ return nullptr;
+}
+
SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned Variant;
unsigned Opc;
- unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
if (VT == MVT::f32) {
Variant = 0;
} else
return nullptr; // Unrecognized argument type. Fall back on default codegen.
- // Pick the FRINTX variant needed to set the flags.
- unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
switch (N->getOpcode()) {
default:
return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
SmallVector<SDValue, 2> Ops;
Ops.push_back(In);
- // C11 leaves it implementation-defined whether these operations trigger an
- // inexact exception. IEEE says they don't. Unfortunately, Darwin decided
- // they do so we sometimes have to insert a special instruction just to set
- // the right bit in FPSR.
- if (Subtarget->isTargetDarwin() && !TM.Options.UnsafeFPMath) {
- SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
- Ops.push_back(SDValue(FRINTX, 1));
- }
+ if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, Variant, dl))
+ Ops.push_back(SDValue(FRINTXNode, 1));
return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
+/// SelectFPConvertWithRound - Try to combine FP rounding and
+/// FP-INT conversion.
+SDNode *AArch64DAGToDAGISel::SelectFPConvertWithRound(SDNode *N) {
+ SDNode *Op0 = N->getOperand(0).getNode();
+
+ // Return if the round op is used by other nodes, as this would result in two
+ // FRINTX, one each for round and convert.
+ if (!Op0->hasOneUse())
+ return nullptr;
+
+ unsigned InTyVariant;
+ EVT InTy = Op0->getValueType(0);
+ if (InTy == MVT::f32)
+ InTyVariant = 0;
+ else if (InTy == MVT::f64)
+ InTyVariant = 1;
+ else
+ return nullptr;
+
+ unsigned OutTyVariant;
+ EVT OutTy = N->getValueType(0);
+ if (OutTy == MVT::i32)
+ OutTyVariant = 0;
+ else if (OutTy == MVT::i64)
+ OutTyVariant = 1;
+ else
+ return nullptr;
+
+ assert((N->getOpcode() == ISD::FP_TO_SINT
+ || N->getOpcode() == ISD::FP_TO_UINT) && "Unexpected opcode!");
+ unsigned FpConVariant = N->getOpcode() == ISD::FP_TO_SINT ? 0 : 1;
+
+ unsigned Opc;
+ switch (Op0->getOpcode()) {
+ default:
+ return nullptr;
+ case ISD::FCEIL: {
+ unsigned FCVTPOpcs[2][2][2] = {
+ { { AArch64::FCVTPSUWSr, AArch64::FCVTPSUXSr },
+ { AArch64::FCVTPSUWDr, AArch64::FCVTPSUXDr } },
+ { { AArch64::FCVTPUUWSr, AArch64::FCVTPUUXSr },
+ { AArch64::FCVTPUUWDr, AArch64::FCVTPUUXDr } } };
+ Opc = FCVTPOpcs[FpConVariant][InTyVariant][OutTyVariant];
+ break;
+ }
+ case ISD::FFLOOR: {
+ unsigned FCVTMOpcs[2][2][2] = {
+ { { AArch64::FCVTMSUWSr, AArch64::FCVTMSUXSr },
+ { AArch64::FCVTMSUWDr, AArch64::FCVTMSUXDr } },
+ { { AArch64::FCVTMUUWSr, AArch64::FCVTMUUXSr },
+ { AArch64::FCVTMUUWDr, AArch64::FCVTMUUXDr } } };
+ Opc = FCVTMOpcs[FpConVariant][InTyVariant][OutTyVariant];
+ break;
+ }
+ case ISD::FTRUNC: {
+ unsigned FCVTZOpcs[2][2][2] = {
+ { { AArch64::FCVTZSUWSr, AArch64::FCVTZSUXSr },
+ { AArch64::FCVTZSUWDr, AArch64::FCVTZSUXDr } },
+ { { AArch64::FCVTZUUWSr, AArch64::FCVTZUUXSr },
+ { AArch64::FCVTZUUWDr, AArch64::FCVTZUUXDr } } };
+ Opc = FCVTZOpcs[FpConVariant][InTyVariant][OutTyVariant];
+ break;
+ }
+ case ISD::FROUND: {
+ unsigned FCVTAOpcs[2][2][2] = {
+ { { AArch64::FCVTASUWSr, AArch64::FCVTASUXSr },
+ { AArch64::FCVTASUWDr, AArch64::FCVTASUXDr } },
+ { { AArch64::FCVTAUUWSr, AArch64::FCVTAUUXSr },
+ { AArch64::FCVTAUUWDr, AArch64::FCVTAUUXDr } } };
+ Opc = FCVTAOpcs[FpConVariant][InTyVariant][OutTyVariant];
+ break;
+ }
+ }
+
+ SDLoc DL(N);
+ SDValue In = Op0->getOperand(0);
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(In);
+
+ if (SDNode *FRINTXNode = GenerateInexactFlagIfNeeded(In, InTyVariant, DL))
+ Ops.push_back(SDValue(FRINTXNode, 1));
+
+ return CurDAG->getMachineNode(Opc, DL, OutTy, Ops);
+}
+
bool
AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
unsigned RegWidth) {
// into a single value to be used in the MRS/MSR instruction.
static int getIntOperandFromRegisterString(StringRef RegString) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() == 1)
return -1;
case ISD::SRA:
if (SDNode *I = SelectBitfieldExtractOp(Node))
return I;
+ if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+ return I;
break;
case ISD::OR:
break;
}
}
+ break;
}
case AArch64ISD::LD2post: {
if (VT == MVT::v8i8)
if (SDNode *I = SelectLIBM(Node))
return I;
break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (SDNode *I = SelectFPConvertWithRound(Node))
+ return I;
+ break;
}
// Select the default instruction