}
SDNode *Select(SDNode *N);
- virtual void InstructionSelect();
+
bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Mode);
bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Offset);
- bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align);
+ bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
SDValue &Label);
SDNode *SelectARMIndexedLoad(SDNode *N);
SDNode *SelectT2IndexedLoad(SDNode *N);
- /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
- SDNode *SelectDYN_ALLOC(SDNode *N);
-
- /// SelectVLD - Select NEON load intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
}
-void ARMDAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue N,
SDValue &BaseReg,
}
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Update,
- SDValue &Opc, SDValue &Align) {
+ SDValue &Addr, SDValue &Align) {
Addr = N;
- // Default to no writeback.
- Update = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
// Default to no alignment.
Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
return NULL;
}
-SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
- SDValue Size = N->getOperand(1);
- SDValue Align = N->getOperand(2);
- SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
- int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
- if (AlignVal < 0)
- // We need to align the stack. Use Thumb1 tAND which is the only thumb
- // instruction that can read and write SP. This matches to a pseudo
- // instruction that has a chain to ensure the result is written back to
- // the stack pointer.
- SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0);
-
- bool isC = isa<ConstantSDNode>(Size);
- uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
- // Handle the most common case for both Thumb1 and Thumb2:
- // tSUBspi - immediate is between 0 ... 508 inclusive.
- if (C <= 508 && ((C & 3) == 0))
- // FIXME: tSUBspi encode scale 4 implicitly.
- return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
- CurDAG->getTargetConstant(C/4, MVT::i32),
- Chain);
-
- if (Subtarget->isThumb1Only()) {
- // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
- // should have negated the size operand already. FIXME: We can't insert
- // new target independent node at this stage so we are forced to negate
- // it earlier. Is there a better solution?
- return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
- Chain);
- } else if (Subtarget->isThumb2()) {
- if (isC && Predicate_t2_so_imm(Size.getNode())) {
- // t2SUBrSPi
- SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
- } else if (isC && Predicate_imm0_4095(Size.getNode())) {
- // t2SUBrSPi12
- SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
- } else {
- // t2SUBrSPs
- SDValue Ops[] = { SP, Size,
- getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
- return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
- }
- }
-
- // FIXME: Add ADD / SUB sp instructions for ARM.
- return 0;
-}
-
/// PairDRegs - Insert a pair of double registers into an implicit def to
/// form a quad register.
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0);
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
- SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+ SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
VT, Undef, V0, SubReg0);
- return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
+ return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
VT, SDValue(Pair, 0), V1, SubReg1);
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VLD2,
- // loading 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- std::vector<EVT> ResTys(4, VT);
+ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
+ std::vector<EVT> ResTys(2 * NumVecs, RegVT);
ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7);
- Chain = SDValue(VLd, 4);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MemAddr.getValueType());
ResTys.push_back(MVT::Other);
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,
- Pred, PredReg, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
- Align, Pred, PredReg, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7);
+ const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
+ Align, Reg0, Pred, Reg0, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 8> Ops;
+ SmallVector<SDValue, 10> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
if (is64BitVector) {
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VST2,
- // storing 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VST1 and VST2,
+ // storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
N->getOperand(Vec+3)));
}
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+ Ops.push_back(Reg0); // post-access address offset
// Store the even subregs.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
N->getOperand(Vec+3)));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), NumVecs+6);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
Ops[0] = SDValue(VStA, 0); // MemAddr
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
- Ops[NumVecs+4] = Pred;
- Ops[NumVecs+5] = PredReg;
- Ops[NumVecs+6] = Chain;
+ Ops[NumVecs+5] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+7);
+ MVT::Other, Ops.data(), NumVecs+6);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
case MVT::v4i32: OpcodeIndex = 1; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 9> Ops;
+ SmallVector<SDValue, 10> Ops;
Ops.push_back(MemAddr);
- Ops.push_back(MemUpdate);
- Ops.push_back(MemOpc);
Ops.push_back(Align);
unsigned Opc = 0;
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
- Ops.push_back(PredReg);
+ Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+7);
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
// For a 64-bit vector load to D registers, nothing more needs to be done.
if (is64BitVector)
return VLdLn;
SDNode *ResNode;
if (Subtarget->isThumb1Only()) {
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
- case ARMISD::DYN_ALLOC:
- return SelectDYN_ALLOC(N);
case ISD::SRL:
if (SDNode *I = SelectV6T2BitfieldExtractOp(N,
Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX))
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
+
+ // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
+ Ops, 5);
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::STORE: {
+ // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = getAL(CurDAG);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
+ AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ }
// Other cases are autogenerated.
break;
}
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VZIPq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VUZPq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
case MVT::v4f32:
case MVT::v4i32: Opc = ARM::VTRNq32; break;
}
- SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
default:
break;
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64 };
+ return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD2d64 };
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD3d64 };
- unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a };
- unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b };
+ ARM::VLD3d32, ARM::VLD1d64T };
+ unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
+ ARM::VLD3q16_UPD,
+ ARM::VLD3q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
+ ARM::VLD3q16odd_UPD,
+ ARM::VLD3q32odd_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD4d64 };
- unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a };
- unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b };
+ ARM::VLD4d32, ARM::VLD1d64Q };
+ unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
+ ARM::VLD4q16_UPD,
+ ARM::VLD4q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
+ ARM::VLD4q16odd_UPD,
+ ARM::VLD4q32odd_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld3lane: {
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4lane: {
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b };
+ unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
+ unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vst2: {
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST2d64 };
+ ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST3d64 };
- unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
- unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+ ARM::VST3d32, ARM::VST1d64T };
+ unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
+ ARM::VST3q16_UPD,
+ ARM::VST3q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
+ ARM::VST3q16odd_UPD,
+ ARM::VST3q32odd_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST4d64 };
- unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
- unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+ ARM::VST4d32, ARM::VST1d64Q };
+ unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
+ ARM::VST4q16_UPD,
+ ARM::VST4q32_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
+ ARM::VST4q16odd_UPD,
+ ARM::VST4q32odd_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a };
- unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst3lane: {
unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a };
- unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4lane: {
unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a };
- unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b };
+ unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
+ unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
}