namespace {
class AArch64DAGToDAGISel : public SelectionDAGISel {
- AArch64TargetMachine &TM;
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
public:
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
ForCodeSize(false) {}
const char *getPassName() const override {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- AttributeSet FnAttrs = MF.getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ ForCodeSize = MF.getFunction()->optForSize();
+ Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
SDNode *SelectMLAV64LaneV128(SDNode *N);
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
return SelectShiftedRegister(N, true, Reg, Shift);
}
+ bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
- SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
- unsigned SubRegs[]);
+ SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
+ const unsigned SubRegs[]);
SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
- SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
+ SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
- SDNode *SelectLIBM(SDNode *N);
+ SDNode *SelectReadRegister(SDNode *N);
+ SDNode *SelectWriteRegister(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AArch64GenDAGISel.inc"
private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
+ bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
}
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
/// SelectArithImmed - Select an immediate value that can be represented as
return false;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- Val = CurDAG->getTargetConstant(Immed, MVT::i32);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+ SDLoc dl(N);
+ Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
+ Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
return true;
}
return false;
Immed &= 0xFFFFFFULL;
- return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
+ return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
+ Shift);
}
/// getShiftTypeForNode - Translate a shift node to the corresponding
}
}
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the a value is used at least twice, unless we are optimizing
+ // it hurts if the value is used at least twice, unless we are optimizing
// for code size.
if (ForCodeSize || V.hasOneUse())
return true;
unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+ Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
return isWorthFolding(N);
}
return true;
}
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
// high lane extract.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
SDValue &LaneOp, int &LaneIdx) {
/// is a lane in the upper half of a 128-bit vector. Recognize and select this
/// so that we don't emit unnecessary lane extracts.
SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
+ SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
return nullptr;
}
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
break;
}
- return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
+ return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
}
SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
+ SDLoc dl(N);
SDValue SMULLOp0;
SDValue SMULLOp1;
int LaneIdx;
LaneIdx))
return nullptr;
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
} else
llvm_unreachable("Unrecognized intrinsic.");
- return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
+ return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
}
/// Instructions that accept extend modifiers like UXTW expect the register
if (N.getValueType() == MVT::i32)
return N;
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDLoc dl(N);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- SDLoc(N), MVT::i32, N, SubReg);
+ dl, MVT::i32, N, SubReg);
return SDValue(Node, 0);
}
}
// AArch64 mandates that the RHS of the operation must use the smallest
- // register classs that could contain the size being extended from. Thus,
+ // register class that could contain the size being extended from. Thus,
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
// there might not be an actual 32-bit value in the program. We can
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
Reg = narrowIfNeeded(CurDAG, Reg);
- Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
+ Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
+ MVT::i32);
return isWorthFolding(N);
}
+/// If there's a use of this ADDlow that's not itself a load/store then we'll
+/// need to create a real ADD instruction from it anyway and there's no point in
+/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
+/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
+/// leads to duplicated ADRP instructions.
+static bool isWorthFoldingADDlow(SDValue N) {
+ for (auto Use : N->uses()) {
+ if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
+ Use->getOpcode() != ISD::ATOMIC_LOAD &&
+ Use->getOpcode() != ISD::ATOMIC_STORE)
+ return false;
+
+ // ldar and stlr have much more restrictive addressing modes (just a
+ // register).
+ if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
+ return false;
+ }
+
+ return true;
+}
+
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+ // selected here doesn't support labels/immediates, only base+offset.
+
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
+ RHSC < (0x40 << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
+ }
+ }
+ }
+
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // stp x1, x2, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
SDValue &Base, SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
const TargetLowering *TLI = getTargetLowering();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
return true;
}
- if (N.getOpcode() == AArch64ISD::ADDlow) {
+ if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
GlobalAddressSDNode *GAN =
dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
Base = N.getOperand(0);
const GlobalValue *GV = GAN->getGlobal();
unsigned Alignment = GV->getAlignment();
- const DataLayout *DL = TLI->getDataLayout();
- if (Alignment == 0 && !Subtarget->isTargetDarwin())
- Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
+ Type *Ty = GV->getType()->getElementType();
+ if (Alignment == 0 && Ty->isSized())
+ Alignment = DL.getABITypeAlignment(Ty);
if (Alignment >= Size)
return true;
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
return true;
}
}
// add x0, Xbase, #offset
// ldr x0, [x0]
Base = N;
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
return true;
}
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
const TargetLowering *TLI = getTargetLowering();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
}
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
return true;
}
}
}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDLoc dl(N);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
SDValue ImpDef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
- 0);
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
+ TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
return SDValue(Node, 0);
}
if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
return false;
+ SDLoc dl(N);
if (WantExtend) {
AArch64_AM::ShiftExtendType Ext =
getExtendTypeForNode(N.getOperand(0), true);
return false;
Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
} else {
Offset = N.getOperand(0);
- SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
}
unsigned LegalShiftVal = Log2_32(Size);
return false;
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
+ SDLoc dl(N);
// We don't want to match immediate adds here, because they are better lowered
// to the register-immediate addressing modes.
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
return true;
}
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
return true;
}
// There was no shift, whatever else we find.
- DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
// Try to match an unshifted extend on the LHS.
AArch64_AM::InvalidShiftExtend) {
Base = RHS;
Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
if (isWorthFolding(LHS))
return true;
}
AArch64_AM::InvalidShiftExtend) {
Base = LHS;
Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
- SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
+ MVT::i32);
if (isWorthFolding(RHS))
return true;
}
return false;
}
+// Check if the given immediate is preferred by ADD. If an immediate can be
+// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
+// encoded by one MOVZ, return true.
+static bool isPreferredADD(int64_t ImmOff) {
+ // Constant in [0x0, 0xfff] can be encoded in ADD.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+ // Check if it can be encoded in an "ADD LSL #12".
+ if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
+ // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ return false;
+}
+
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
SDValue &Base, SDValue &Offset,
SDValue &SignExtend,
return false;
SDValue LHS = N.getOperand(0);
SDValue RHS = N.getOperand(1);
-
- // We don't want to match immediate adds here, because they are better lowered
- // to the register-immediate addressing modes.
- if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
- return false;
+ SDLoc DL(N);
// Check if this particular node is reused in any non-memory related
// operation. If yes, do not try to fold this node into the address
return false;
}
+ // Watch out if RHS is a wide immediate, it can not be selected into
+ // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
+ // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
+ // instructions like:
+ // MOV X0, WideImmediate
+ // ADD X1, BaseReg, X0
+ // LDR X2, [X1, 0]
+ // For such situation, using [BaseReg, XReg] addressing mode can save one
+ // ADD/SUB:
+ // MOV X0, WideImmediate
+ // LDR X2, [BaseReg, X0]
+ if (isa<ConstantSDNode>(RHS)) {
+ int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ // Skip the immediate can be selected by load/store addressing mode.
+ // Also skip the immediate can be encoded by a single ADD (SUB is also
+ // checked by using -ImmOff).
+ if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
+ isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return false;
+
+ SDValue Ops[] = { RHS };
+ SDNode *MOVI =
+ CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
+ SDValue MOVIV = SDValue(MOVI, 0);
+ // This ADD of two X register will be selected into [Reg+Reg] mode.
+ N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
+ }
+
// Remember if it is worth folding N when it produces extended register.
bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
Base = LHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
return true;
}
if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
Base = RHS;
- DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
return true;
}
// Match any non-shifted, non-extend, non-immediate add expression.
Base = LHS;
Offset = RHS;
- SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
- DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+ SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
// Reg1 + Reg2 is free: no check needed.
return true;
}
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
- static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
- AArch64::dsub2, AArch64::dsub3 };
+ static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = {
+ static const unsigned RegClassIDs[] = {
AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
- static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
- AArch64::qsub2, AArch64::qsub3 };
+ static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3};
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
- unsigned RegClassIDs[],
- unsigned SubRegs[]) {
+ const unsigned RegClassIDs[],
+ const unsigned SubRegs[]) {
// There's no special register-class for a vector-list of 1 element: it's just
// a vector.
if (Regs.size() == 1)
assert(Regs.size() >= 2 && Regs.size() <= 4);
- SDLoc DL(Regs[0].getNode());
+ SDLoc DL(Regs[0]);
SmallVector<SDValue, 4> Ops;
// First operand of REG_SEQUENCE is the desired RegClass.
Ops.push_back(
- CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
+ CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
// Then we get pairs of source & subregister-position for the components.
for (unsigned i = 0; i < Regs.size(); ++i) {
Ops.push_back(Regs[i]);
- Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
}
SDNode *N =
// it into an i64.
DstVT = MVT::i32;
}
+ } else if (VT == MVT::f16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
SDValue Base = LD->getBasePtr();
ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
int OffsetVal = (int)OffsetOp->getZExtValue();
- SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
+ SDLoc dl(N);
+ SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
SDValue Ops[] = { Base, Offset, Chain };
- SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
+ SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
MVT::Other, Ops);
// Either way, we're replacing the node, so tell the caller that.
Done = true;
SDValue LoadedVal = SDValue(Res, 1);
if (InsertTo64) {
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
LoadedVal =
SDValue(CurDAG->getMachineNode(
- AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
- CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
+ AArch64::SUBREG_TO_REG, dl, MVT::i64,
+ CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
+ SubReg),
0);
}
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(2)); // Mem operand;
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(2), // Mem operand;
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
EVT VT = N->getValueType(0);
SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(1)); // Mem operand
- Ops.push_back(N->getOperand(2)); // Incremental
- Ops.push_back(Chain);
+ SDValue Ops[] = {N->getOperand(1), // Mem operand
+ N->getOperand(2), // Incremental
+ Chain};
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 2));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
return St;
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other); // Type for the Chain
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 1)); // base register
- Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
- Ops.push_back(N->getOperand(0)); // Chain
+ SDValue Ops[] = {RegSeq,
+ N->getOperand(NumVecs + 1), // base register
+ N->getOperand(NumVecs + 2), // Incremental
+ N->getOperand(0)}; // Chain
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
return St;
}
+namespace {
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
class WidenVector {
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
};
+} // namespace
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
if (Narrow)
SDValue RegSeq = createQTuple(Regs);
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ RegSeq->getValueType(0), MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq,
+ CurDAG->getTargetConstant(LaneNo, dl,
+ MVT::i64), // Lane Number
+ N->getOperand(NumVecs + 2), // Base register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Update uses of the write back register
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
} else {
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
SuperReg);
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 3), N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
SDValue RegSeq = createQTuple(Regs);
- SmallVector<EVT, 2> ResTys;
- ResTys.push_back(MVT::i64); // Type of the write back register
- ResTys.push_back(MVT::Other);
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
- Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
- Ops.push_back(N->getOperand(0));
+ SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
+ N->getOperand(NumVecs + 2), // Base Register
+ N->getOperand(NumVecs + 3), // Incremental
+ N->getOperand(0)};
SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
// The resulting code will be at least as good as the original one
// plus it may expose more opportunities for bitfield insert pattern.
// FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM
+ // some optimizations expect AND and not UBFM.
Opd0 = N->getOperand(0);
} else
return false;
- assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
- "bad amount in shift node!");
+ // Bail out on large immediates. This happens when no proper
+ // combining/constant folding was performed.
+ if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
LSB = Srl_imm;
- MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
- : CountTrailingOnes_64(And_imm)) -
+ MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
+ : countTrailingOnes<uint64_t>(And_imm)) -
1;
if (ClampMSB)
// Since we're moving the extend before the right shift operation, we need
return true;
}
-static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB) {
- // We are looking for the following pattern which basically extracts a single
- // bit from the source value and places it in the LSB of the destination
- // value, all other bits of the destination value or set to zero:
+static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
+ SDValue &Opd0, unsigned &LSB,
+ unsigned &MSB) {
+ // We are looking for the following pattern which basically extracts several
+ // continuous bits from the source value and places it from the LSB of the
+ // destination value, all other bits of the destination value or set to zero:
//
// Value2 = AND Value, MaskImm
// SRL Value2, ShiftImm
//
- // with MaskImm >> ShiftImm == 1.
+ // with MaskImm >> ShiftImm to search for the bit width.
//
// This gets selected into a single UBFM:
//
- // UBFM Value, ShiftImm, ShiftImm
+ // UBFM Value, ShiftImm, BitWide + Srl_imm -1
//
if (N->getOpcode() != ISD::SRL)
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
- // Check whether we really have a one bit extract here.
- if (And_mask >> Srl_imm == 0x1) {
+ // Check whether we really have several bits extract here.
+ unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
+ if (BitWide && isMask_64(And_mask >> Srl_imm)) {
if (N->getValueType(0) == MVT::i32)
Opc = AArch64::UBFMWri;
else
Opc = AArch64::UBFMXri;
- LSB = MSB = Srl_imm;
-
+ LSB = Srl_imm;
+ MSB = BitWide + Srl_imm - 1;
return true;
}
}
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB,
+ unsigned &Immr, unsigned &Imms,
bool BiggerPattern) {
assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
"N must be a SHR/SRA operation to call this function");
assert((VT == MVT::i32 || VT == MVT::i64) &&
"Type checking must have been done before calling this function");
- // Check for AND + SRL doing a one bit extract.
- if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+ // Check for AND + SRL doing several bits extract.
+ if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
return true;
// we're looking for a shift of a shift
} else
return false;
- assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
+ // Missing combines/constant folding may have left us with strange
+ // constants.
+ if (Shl_imm >= VT.getSizeInBits()) {
+ DEBUG((dbgs() << N
+ << ": Found large shift immediate, this should not happen\n"));
+ return false;
+ }
+
uint64_t Srl_imm = 0;
if (!isIntImmediate(N->getOperand(1), Srl_imm))
return false;
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!");
- // Note: The width operand is encoded as width-1.
- unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
- int sLSB = Srl_imm - Shl_imm;
- if (sLSB < 0)
- return false;
- LSB = sLSB;
- MSB = LSB + Width;
+ int immr = Srl_imm - Shl_imm;
+ Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
+ Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
// SRA requires a signed extraction
if (VT == MVT::i32)
Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
}
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &LSB, unsigned &MSB,
+ SDValue &Opd0, unsigned &Immr, unsigned &Imms,
unsigned NumberOfIgnoredLowBits = 0,
bool BiggerPattern = false) {
if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
return false;
break;
case ISD::AND:
- return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
+ return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
NumberOfIgnoredLowBits, BiggerPattern);
case ISD::SRL:
case ISD::SRA:
- return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
+ return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
}
unsigned NOpc = N->getMachineOpcode();
case AArch64::UBFMXri:
Opc = NOpc;
Opd0 = N->getOperand(0);
- LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+ Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+ Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
return true;
}
// Unreachable
}
SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
- unsigned Opc, LSB, MSB;
+ unsigned Opc, Immr, Imms;
SDValue Opd0;
- if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
+ if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
return nullptr;
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
// If the bit extract operation is 64bit but the original type is 32bit, we
// need to add one EXTRACT_SUBREG.
if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
- SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
- CurDAG->getTargetConstant(MSB, MVT::i64)};
+ SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
+ CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
- SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
MachineSDNode *Node =
- CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
SDValue(BFM, 0), SubReg);
return Node;
}
- SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT)};
+ SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
+ CurDAG->getTargetConstant(Imms, dl, VT)};
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
return Op;
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
unsigned BitWidth = VT.getSizeInBits();
unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
if (ShlAmount > 0) {
// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, SDLoc(Op), VT, Op,
- CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
- CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
+ UBFMOpc, dl, VT, Op,
+ CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
+ CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
} else {
// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
assert(ShlAmount < 0 && "expected right shift");
int ShrAmount = -ShlAmount;
ShiftNode = CurDAG->getMachineNode(
- UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
- CurDAG->getTargetConstant(BitWidth - 1, VT));
+ UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
+ CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
}
return SDValue(ShiftNode, 0);
/// Does this tree qualify as an attempt to move a bitfield into position,
/// essentially "(and (shl VAL, N), Mask)".
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+ bool BiggerPattern,
SDValue &Src, int &ShiftAmount,
int &MaskWidth) {
EVT VT = Op.getValueType();
Op = Op.getOperand(0);
}
+ // Don't match if the SHL has more than one use, since then we'll end up
+ // generating SHL+UBFIZ instead of just keeping SHL+AND.
+ if (!BiggerPattern && !Op.hasOneUse())
+ return false;
+
uint64_t ShlImm;
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
return false;
return false;
ShiftAmount = countTrailingZeros(NonZeroBits);
- MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
+ MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
- // amount.
+ // amount. BiggerPattern is true when this pattern is being matched for BFI,
+ // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+ // which case it is not profitable to insert an extra shift.
+ if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+ return false;
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
return true;
// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
SDValue &Src, unsigned &ImmR,
- unsigned &ImmS, SelectionDAG *CurDAG) {
+ unsigned &ImmS, const APInt &UsefulBits,
+ SelectionDAG *CurDAG) {
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
// Set Opc
// Because of simplify-demanded-bits in DAGCombine, involved masks may not
// have the expected shape. Try to undo that.
- APInt UsefulBits;
- getUsefulBits(SDValue(N, 0), UsefulBits);
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
- // OR is commutative, check both possibilities (does llvm provide a
- // way to do that directely, e.g., via code matcher?)
- SDValue OrOpd1Val = N->getOperand(1);
- SDNode *OrOpd0 = N->getOperand(0).getNode();
- SDNode *OrOpd1 = N->getOperand(1).getNode();
- for (int i = 0; i < 2;
- ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+ // OR is commutative, check all combinations of operand order and values of
+ // BiggerPattern, i.e.
+ // Opd0, Opd1, BiggerPattern=false
+ // Opd1, Opd0, BiggerPattern=false
+ // Opd0, Opd1, BiggerPattern=true
+ // Opd1, Opd0, BiggerPattern=true
+ // Several of these combinations may match, so check with BiggerPattern=false
+ // first since that will produce better results by matching more instructions
+ // and/or inserting fewer extra instructions.
+ for (int I = 0; I < 4; ++I) {
+
+ bool BiggerPattern = I / 2;
+ SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+ SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+ SDNode *OrOpd1 = OrOpd1Val.getNode();
+
unsigned BFXOpc;
int DstLSB, Width;
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
- NumberOfIgnoredLowBits, true)) {
+ NumberOfIgnoredLowBits, BiggerPattern)) {
// Check that the returned opcode is compatible with the pattern,
// i.e., same type and zero extended (U and not S)
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
// If the mask on the insertee is correct, we have a BFXIL operation. We
// can share the ImmR and ImmS values from the already-computed UBFM.
- } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
- DstLSB, Width)) {
+ } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+ BiggerPattern,
+ Src, DstLSB, Width)) {
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
ImmS = Width - 1;
} else
unsigned Opc;
unsigned LSB, MSB;
SDValue Opd0, Opd1;
+ EVT VT = N->getValueType(0);
+ APInt NUsefulBits;
+ getUsefulBits(SDValue(N, 0), NUsefulBits);
+
+ // If all bits are not useful, just return UNDEF.
+ if (!NUsefulBits)
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
- if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+ if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
+ CurDAG))
return nullptr;
- EVT VT = N->getValueType(0);
+ SDLoc dl(N);
SDValue Ops[] = { Opd0,
Opd1,
- CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT) };
+ CurDAG->getTargetConstant(LSB, dl, VT),
+ CurDAG->getTargetConstant(MSB, dl, VT) };
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
-SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return nullptr;
+
EVT VT = N->getValueType(0);
- unsigned Variant;
unsigned Opc;
- unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
-
- if (VT == MVT::f32) {
- Variant = 0;
- } else if (VT == MVT::f64) {
- Variant = 1;
- } else
- return nullptr; // Unrecognized argument type. Fall back on default codegen.
-
- // Pick the FRINTX variant needed to set the flags.
- unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
- switch (N->getOpcode()) {
- default:
- return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
- case ISD::FCEIL: {
- unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
- Opc = FRINTPOpcs[Variant];
- break;
- }
- case ISD::FFLOOR: {
- unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
- Opc = FRINTMOpcs[Variant];
- break;
- }
- case ISD::FTRUNC: {
- unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
- Opc = FRINTZOpcs[Variant];
- break;
- }
- case ISD::FROUND: {
- unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
- Opc = FRINTAOpcs[Variant];
- break;
- }
- }
+ if (VT == MVT::i32)
+ Opc = AArch64::UBFMWri;
+ else if (VT == MVT::i64)
+ Opc = AArch64::UBFMXri;
+ else
+ return nullptr;
- SDLoc dl(N);
- SDValue In = N->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- Ops.push_back(In);
+ SDValue Op0;
+ int DstLSB, Width;
+ if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+ Op0, DstLSB, Width))
+ return nullptr;
- if (!TM.Options.UnsafeFPMath) {
- SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
- Ops.push_back(SDValue(FRINTX, 1));
- }
+ // ImmR is the rotate right amount.
+ unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+ // ImmS is the most significant bit of the source to be moved.
+ unsigned ImmS = Width - 1;
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ SDLoc DL(N);
+ SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+ CurDAG->getTargetConstant(ImmS, DL, VT)};
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
bool
// finding FBits, but it must still be in range.
if (FBits == 0 || FBits > RegWidth) return false;
- FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
+ FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
return true;
}
+// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
+// of the string and obtains the integer values from them and combines these
+// into a single value to be used in the MRS/MSR instruction.
+static int getIntOperandFromRegisterString(StringRef RegString) {
+ SmallVector<StringRef, 5> Fields;
+ RegString.split(Fields, ':');
+
+ if (Fields.size() == 1)
+ return -1;
+
+ assert(Fields.size() == 5
+ && "Invalid number of fields in read register string");
+
+ SmallVector<int, 5> Ops;
+ bool AllIntFields = true;
+
+ for (StringRef Field : Fields) {
+ unsigned IntField;
+ AllIntFields &= !Field.getAsInteger(10, IntField);
+ Ops.push_back(IntField);
+ }
+
+ assert(AllIntFields &&
+ "Unexpected non-integer value in special register string.");
+
+ // Need to combine the integer fields of the string into a single value
+ // based on the bit encoding of MRS/MSR instruction.
+ return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
+ (Ops[3] << 3) | (Ops[4]);
+}
+
+// Lower the read_register intrinsic to an MRS instruction node if the special
+// register string argument is either of the form detailed in the ALCE (the
+// form described in getIntOperandsFromRegsterString) or is a named register
+// known by the MRS SysReg mapper.
+SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ SDLoc DL(N);
+
+ int Reg = getIntOperandFromRegisterString(RegString->getString());
+ if (Reg != -1)
+ return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
+ MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(0));
+
+ // Use the sysreg mapper to map the remaining possible strings to the
+ // value for the register to be used for the instruction operand.
+ AArch64SysReg::MRSMapper mapper;
+ bool IsValidSpecialReg;
+ Reg = mapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+ if (IsValidSpecialReg)
+ return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
+ MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(0));
+
+ return nullptr;
+}
+
+// Lower the write_register intrinsic to an MSR instruction node if the special
+// register string argument is either of the form detailed in the ALCE (the
+// form described in getIntOperandsFromRegsterString) or is a named register
+// known by the MSR SysReg mapper.
+SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
+ const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+ const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ SDLoc DL(N);
+
+ int Reg = getIntOperandFromRegisterString(RegString->getString());
+ if (Reg != -1)
+ return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(2), N->getOperand(0));
+
+ // Check if the register was one of those allowed as the pstatefield value in
+ // the MSR (immediate) instruction. To accept the values allowed in the
+ // pstatefield for the MSR (immediate) instruction, we also require that an
+ // immediate value has been provided as an argument, we know that this is
+ // the case as it has been ensured by semantic checking.
+ AArch64PState::PStateMapper PMapper;
+ bool IsValidSpecialReg;
+ Reg = PMapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+ if (IsValidSpecialReg) {
+ assert (isa<ConstantSDNode>(N->getOperand(2))
+ && "Expected a constant integer expression.");
+ uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned State;
+ if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+ assert(Immed < 2 && "Bad imm");
+ State = AArch64::MSRpstateImm1;
+ } else {
+ assert(Immed < 16 && "Bad imm");
+ State = AArch64::MSRpstateImm4;
+ }
+ return CurDAG->getMachineNode(State, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ CurDAG->getTargetConstant(Immed, DL, MVT::i16),
+ N->getOperand(0));
+ }
+
+ // Use the sysreg mapper to attempt to map the remaining possible strings
+ // to the value for the register to be used for the MSR (register)
+ // instruction operand.
+ AArch64SysReg::MSRMapper Mapper;
+ Reg = Mapper.fromString(RegString->getString(),
+ Subtarget->getFeatureBits(),
+ IsValidSpecialReg);
+
+ if (IsValidSpecialReg)
+ return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
+ CurDAG->getTargetConstant(Reg, DL, MVT::i32),
+ N->getOperand(2), N->getOperand(0));
+
+ return nullptr;
+}
+
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: ");
default:
break;
+ case ISD::READ_REGISTER:
+ if (SDNode *Res = SelectReadRegister(Node))
+ return Res;
+ break;
+
+ case ISD::WRITE_REGISTER:
+ if (SDNode *Res = SelectWriteRegister(Node))
+ return Res;
+ break;
+
case ISD::ADD:
if (SDNode *I = SelectMLAV64LaneV128(Node))
return I;
case ISD::SRA:
if (SDNode *I = SelectBitfieldExtractOp(Node))
return I;
+ if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+ return I;
break;
case ISD::OR:
.getVectorElementType()
.getSizeInBits()) {
default:
- assert(0 && "Unexpected vector element type!");
+ llvm_unreachable("Unexpected vector element type!");
case 64:
SubReg = AArch64::dsub;
break;
case 32:
SubReg = AArch64::ssub;
break;
- case 16: // FALLTHROUGH
+ case 16:
+ SubReg = AArch64::hsub;
+ break;
case 8:
llvm_unreachable("unexpected zext-requiring extract element!");
}
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
const TargetLowering *TLI = getTargetLowering();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- CurDAG->getTargetConstant(Shifter, MVT::i32) };
+ SDValue TFI = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ SDLoc DL(Node);
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
+ CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
}
case ISD::INTRINSIC_W_CHAIN: {
SDValue MemAddr = Node->getOperand(4);
// Place arguments in the right order.
- SmallVector<SDValue, 7> Ops;
- Ops.push_back(ValLo);
- Ops.push_back(ValHi);
- Ops.push_back(MemAddr);
- Ops.push_back(Chain);
+ SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
// Transfer memoperands.
return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
case Intrinsic::aarch64_neon_ld2lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 2, AArch64::LD2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 2, AArch64::LD2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case Intrinsic::aarch64_neon_ld3lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 3, AArch64::LD3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 3, AArch64::LD3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case Intrinsic::aarch64_neon_ld4lane:
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectLoadLane(Node, 4, AArch64::LD4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectLoadLane(Node, 4, AArch64::LD4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST1Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST1Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST1Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST1Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST1Twov2s);
return SelectStore(Node, 3, AArch64::ST1Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST1Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST1Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST1Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST1Threev2s);
return SelectStore(Node, 4, AArch64::ST1Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST1Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST1Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST1Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST1Fourv2s);
return SelectStore(Node, 2, AArch64::ST2Twov8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 2, AArch64::ST2Twov16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 2, AArch64::ST2Twov4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 2, AArch64::ST2Twov8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 2, AArch64::ST2Twov2s);
return SelectStore(Node, 3, AArch64::ST3Threev8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 3, AArch64::ST3Threev16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 3, AArch64::ST3Threev4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 3, AArch64::ST3Threev8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 3, AArch64::ST3Threev2s);
return SelectStore(Node, 4, AArch64::ST4Fourv8b);
else if (VT == MVT::v16i8)
return SelectStore(Node, 4, AArch64::ST4Fourv16b);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectStore(Node, 4, AArch64::ST4Fourv4h);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectStore(Node, 4, AArch64::ST4Fourv8h);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectStore(Node, 4, AArch64::ST4Fourv2s);
case Intrinsic::aarch64_neon_st2lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 2, AArch64::ST2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 2, AArch64::ST2i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case Intrinsic::aarch64_neon_st3lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 3, AArch64::ST3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 3, AArch64::ST3i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case Intrinsic::aarch64_neon_st4lane: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectStoreLane(Node, 4, AArch64::ST4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectStoreLane(Node, 4, AArch64::ST4i16);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
break;
}
}
+ break;
}
case AArch64ISD::LD2post: {
if (VT == MVT::v8i8)
return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
else if (VT == MVT::v16i8)
return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
case AArch64ISD::LD1LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case AArch64ISD::LD2LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case AArch64ISD::LD3LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
case AArch64ISD::LD4LANEpost: {
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
else if (VT == MVT::v16i8)
return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
- else if (VT == MVT::v4i16)
+ else if (VT == MVT::v4i16 || VT == MVT::v4f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
- else if (VT == MVT::v8i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
else if (VT == MVT::v2i32 || VT == MVT::v2f32)
return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
VT = Node->getOperand(1).getValueType();
if (VT == MVT::v16i8 || VT == MVT::v8i8)
return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
+ VT == MVT::v8f16)
return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
VT == MVT::v2f32)
return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
break;
}
-
- case ISD::FCEIL:
- case ISD::FFLOOR:
- case ISD::FTRUNC:
- case ISD::FROUND:
- if (SDNode *I = SelectLIBM(Node))
- return I;
- break;
}
// Select the default instruction