X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64ISelDAGToDAG.cpp;h=dac4b32cfecdd90bbdac4223affa8f76c42a5e94;hb=f4ec8bfaecef4e38f713b9e05d89869b023e1ce8;hp=4d79c78700ae9fc87ec36b6bdebd5a546fd6381c;hpb=65d1be119b1b5cb279815fe2e1a3b48f86606a3d;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 4d79c78700a..dac4b32cfec 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -109,12 +109,19 @@ public: SDNode* Select(SDNode*); private: + /// Get the opcode for table lookup instruction + unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); + + /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. + /// IsExt is to indicate if the result will be extended with an argument. + SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); + /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating, + SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *Opcode); /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVST(SDNode *N, unsigned NumVecs, bool isUpdating, + SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *Opcodes); /// Form sequences of consecutive 64/128-bit registers for use in NEON @@ -128,6 +135,19 @@ private: /// functions. Those should almost always be called instead. SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[], unsigned SubRegs[]); + + /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. + /// The opcode array specifies the instructions used for load. + SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes); + + /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. + /// The opcode arrays specify the instructions used for load/store. + SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, + unsigned NumVecs, const uint16_t *Opcodes); + + SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, + SDValue Operand); }; } @@ -476,7 +496,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; - case AArch64::LD1WB2V_1D_fixed: return AArch64::LD1WB2V_1D_register; case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; @@ -485,7 +504,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; - case AArch64::LD1WB3V_1D_fixed: return AArch64::LD1WB3V_1D_register; case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; @@ -494,12 +512,38 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; - case AArch64::LD1WB4V_1D_fixed: return AArch64::LD1WB4V_1D_register; case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; + case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; + case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; + case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; + case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; + case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; + case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; + case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; + case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; + + case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; + case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; + case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; + case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; + case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; + case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; + case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; + case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; + + case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; + case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; + case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; + case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; + case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; + case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; + case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; + case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; + case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; @@ -512,7 +556,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; - case AArch64::ST1WB2V_1D_fixed: return AArch64::ST1WB2V_1D_register; case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; @@ -521,7 +564,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; - case AArch64::ST1WB3V_1D_fixed: return AArch64::ST1WB3V_1D_register; case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; @@ -530,37 +572,115 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; - case AArch64::ST1WB4V_1D_fixed: return AArch64::ST1WB4V_1D_register; case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; + + case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; + case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; + case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; + case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; + case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; + case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; + case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; + case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; + + case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; + case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; + case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; + case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; + case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; + case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; + case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; + case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; + + case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; + case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; + case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; + case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; + case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; + case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; + case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; + case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; + + // Post-index of duplicate loads + case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; + case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; + case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; + case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; + case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; + case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; + case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; + case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; + + case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; + case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; + case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; + case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; + case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; + case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; + case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; + case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; + + case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; + case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; + case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; + case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; + case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; + case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; + case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; + case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; + + // Post-index of lane loads + case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; + case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; + case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; + case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; + + case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; + case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; + case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; + case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; + + case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; + case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; + case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; + case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; + + // Post-index of lane stores + case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; + case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; + case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; + case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; + + case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; + case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; + case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; + case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; + + case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; + case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; + case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; + case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; } return Opc; // If not one we handle, return it unchanged. } -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, - bool isUpdating, +SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, + unsigned NumVecs, const uint16_t *Opcodes) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); EVT VT = N->getValueType(0); unsigned OpcodeIndex; - switch (VT.getSimpleVT().SimpleTy) { + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; default: llvm_unreachable("unhandled vector load type"); - case MVT::v8i8: OpcodeIndex = 0; break; - case MVT::v4i16: OpcodeIndex = 1; break; - case MVT::v2f32: - case MVT::v2i32: OpcodeIndex = 2; break; - case MVT::v1f64: - case MVT::v1i64: OpcodeIndex = 3; break; - case MVT::v16i8: OpcodeIndex = 4; break; - case MVT::v8f16: - case MVT::v8i16: OpcodeIndex = 5; break; - case MVT::v4f32: - case MVT::v4i32: OpcodeIndex = 6; break; - case MVT::v2f64: - case MVT::v2i64: OpcodeIndex = 7; break; } unsigned Opc = Opcodes[OpcodeIndex]; @@ -577,9 +697,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Ops.push_back(N->getOperand(0)); // Push back the Chain - std::vector ResTys; - bool is64BitVector = VT.is64BitVector(); - + SmallVector ResTys; + // Push back the type of return super register if (NumVecs == 1) ResTys.push_back(VT); else if (NumVecs == 3) @@ -620,8 +739,8 @@ SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, return NULL; } -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, - bool isUpdating, +SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, + unsigned NumVecs, const uint16_t *Opcodes) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); SDLoc dl(N); @@ -630,28 +749,20 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, MemOp[0] = cast(N)->getMemOperand(); unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) + unsigned Vec0Idx = 3; EVT VT = N->getOperand(Vec0Idx).getValueType(); unsigned OpcodeIndex; - switch (VT.getSimpleVT().SimpleTy) { + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; default: llvm_unreachable("unhandled vector store type"); - case MVT::v8i8: OpcodeIndex = 0; break; - case MVT::v4i16: OpcodeIndex = 1; break; - case MVT::v2f32: - case MVT::v2i32: OpcodeIndex = 2; break; - case MVT::v1f64: - case MVT::v1i64: OpcodeIndex = 3; break; - case MVT::v16i8: OpcodeIndex = 4; break; - case MVT::v8f16: - case MVT::v8i16: OpcodeIndex = 5; break; - case MVT::v4f32: - case MVT::v4i32: OpcodeIndex = 6; break; - case MVT::v2f64: - case MVT::v2i64: OpcodeIndex = 7; break; } unsigned Opc = Opcodes[OpcodeIndex]; - std::vector ResTys; + SmallVector ResTys; if (isUpdating) ResTys.push_back(MVT::i64); ResTys.push_back(MVT::Other); // Type for the Chain @@ -665,7 +776,6 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); Ops.push_back(Inc); } - bool is64BitVector = VT.is64BitVector(); SmallVector Regs(N->op_begin() + Vec0Idx, N->op_begin() + Vec0Idx + NumVecs); @@ -682,6 +792,237 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, return VSt; } +SDValue +AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, + SDValue Operand) { + SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, + VT, VTD, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + Operand, + CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); + return SDValue(Reg, 0); +} + +SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, + unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); + SDLoc dl(N); + + EVT VT = N->getValueType(0); + unsigned OpcodeIndex; + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; + default: llvm_unreachable("unhandled vector duplicate lane load type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SDValue SuperReg; + SmallVector Ops; + Ops.push_back(N->getOperand(1)); // Push back the Memory Address + if (isUpdating) { + SDValue Inc = N->getOperand(2); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + Ops.push_back(N->getOperand(0)); // Push back the Chain + + SmallVector ResTys; + // Push back the type of return super register + if (NumVecs == 3) + ResTys.push_back(MVT::Untyped); + else { + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, + is64BitVector ? NumVecs : NumVecs * 2); + ResTys.push_back(ResTy); + } + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register + ResTys.push_back(MVT::Other); // Type of the Chain + SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLdDup)->setMemRefs(MemOp, MemOp + 1); + + SuperReg = SDValue(VLdDup, 0); + unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; + // Update uses of each registers in super register + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); + // Update uses of the Chain + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); + return NULL; +} + +// We only have 128-bit vector type of load/store lane instructions. +// If it is 64-bit vector, we also select it to the 128-bit instructions. +// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and +// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. +SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, + bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); + SDLoc dl(N); + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; + + SDValue Chain = N->getOperand(0); + unsigned Lane = + cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); + EVT VT = N->getOperand(Vec0Idx).getValueType(); + bool is64BitVector = VT.is64BitVector(); + EVT VT64; // 64-bit Vector Type + + if (is64BitVector) { + VT64 = VT; + VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), + VT.getVectorNumElements() * 2); + } + + unsigned OpcodeIndex; + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = 0; break; + case 16: OpcodeIndex = 1; break; + case 32: OpcodeIndex = 2; break; + case 64: OpcodeIndex = 3; break; + default: llvm_unreachable("unhandled vector lane load/store type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SmallVector ResTys; + if (IsLoad) { + // Push back the type of return super register + if (NumVecs == 3) + ResTys.push_back(MVT::Untyped); + else { + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, + is64BitVector ? NumVecs : NumVecs * 2); + ResTys.push_back(ResTy); + } + } + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register + ResTys.push_back(MVT::Other); // Type of Chain + SmallVector Ops; + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + + SmallVector Regs(N->op_begin() + Vec0Idx, + N->op_begin() + Vec0Idx + NumVecs); + if (is64BitVector) + for (unsigned i = 0; i < Regs.size(); i++) + Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); + SDValue SuperReg = createQTuple(Regs); + + Ops.push_back(SuperReg); // Source Reg + SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); + Ops.push_back(LaneValue); + Ops.push_back(Chain); // Push back the Chain + + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLdLn)->setMemRefs(MemOp, MemOp + 1); + if (!IsLoad) + return VLdLn; + + // Extract the subregisters. + SuperReg = SDValue(VLdLn, 0); + unsigned Sub0 = AArch64::qsub_0; + // Update uses of each registers in super register + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); + if (is64BitVector) { + SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); + } + ReplaceUses(SDValue(N, Vec), SUB0); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); + return NULL; +} + +unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, + unsigned NumOfVec) { + assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); + + unsigned Opc = 0; + switch (NumOfVec) { + default: + break; + case 1: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; + else + Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; + break; + case 2: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; + else + Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; + break; + case 3: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; + else + Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; + break; + case 4: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; + else + Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; + break; + } + + return Opc; +} + +SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, + bool IsExt) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + SDLoc dl(N); + + // Check the element of look up table is 64-bit or not + unsigned Vec0Idx = IsExt ? 2 : 1; + assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && + "The element of lookup table for vtbl and vtbx must be 128-bit"); + + // Check the return value type is 64-bit or not + EVT ResVT = N->getValueType(0); + bool is64BitRes = ResVT.is64BitVector(); + + // Create new SDValue for vector list + SmallVector Regs(N->op_begin() + Vec0Idx, + N->op_begin() + Vec0Idx + NumVecs); + SDValue TblReg = createQTuple(Regs); + unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); + + SmallVector Ops; + if (IsExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(TblReg); + Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); + return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -772,15 +1113,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, TFI, CurDAG->getTargetConstant(0, PtrTy)); } - case ISD::ConstantPool: { - // Constant pools are fine, just create a Target entry. - ConstantPoolSDNode *CN = cast(Node); - const Constant *C = CN->getConstVal(); - SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); - - ReplaceUses(SDValue(Node, 0), CP); - return NULL; - } case ISD::Constant: { SDNode *ResNode = 0; if (cast(Node)->getZExtValue() == 0) { @@ -835,34 +1167,61 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed }; - return SelectVLD(Node, 1, true, Opcodes); + return SelectVLD(Node, true, 1, Opcodes); } case AArch64ISD::NEON_LD2_UPD: { static const uint16_t Opcodes[] = { AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, - AArch64::LD2WB_2S_fixed, AArch64::LD1WB2V_1D_fixed, + AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed }; - return SelectVLD(Node, 2, true, Opcodes); + return SelectVLD(Node, true, 2, Opcodes); } case AArch64ISD::NEON_LD3_UPD: { static const uint16_t Opcodes[] = { AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, - AArch64::LD3WB_2S_fixed, AArch64::LD1WB3V_1D_fixed, + AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed }; - return SelectVLD(Node, 3, true, Opcodes); + return SelectVLD(Node, true, 3, Opcodes); } case AArch64ISD::NEON_LD4_UPD: { static const uint16_t Opcodes[] = { AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, - AArch64::LD4WB_2S_fixed, AArch64::LD1WB4V_1D_fixed, + AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed }; - return SelectVLD(Node, 4, true, Opcodes); + return SelectVLD(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD1x2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, + AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, + AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, + AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed + }; + return SelectVLD(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD1x3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, + AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, + AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, + AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed + }; + return SelectVLD(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD1x4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, + AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, + AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, + AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed + }; + return SelectVLD(Node, true, 4, Opcodes); } case AArch64ISD::NEON_ST1_UPD: { static const uint16_t Opcodes[] = { @@ -871,34 +1230,179 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed }; - return SelectVST(Node, 1, true, Opcodes); + return SelectVST(Node, true, 1, Opcodes); } case AArch64ISD::NEON_ST2_UPD: { static const uint16_t Opcodes[] = { AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, - AArch64::ST2WB_2S_fixed, AArch64::ST1WB2V_1D_fixed, + AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed }; - return SelectVST(Node, 2, true, Opcodes); + return SelectVST(Node, true, 2, Opcodes); } case AArch64ISD::NEON_ST3_UPD: { static const uint16_t Opcodes[] = { AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, - AArch64::ST3WB_2S_fixed, AArch64::ST1WB3V_1D_fixed, + AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed }; - return SelectVST(Node, 3, true, Opcodes); + return SelectVST(Node, true, 3, Opcodes); } case AArch64ISD::NEON_ST4_UPD: { static const uint16_t Opcodes[] = { AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, - AArch64::ST4WB_2S_fixed, AArch64::ST1WB4V_1D_fixed, + AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed }; - return SelectVST(Node, 4, true, Opcodes); + return SelectVST(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD2DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, + AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, + AArch64::LD2R_4S, AArch64::LD2R_2D + }; + return SelectVLDDup(Node, false, 2, Opcodes); + } + case AArch64ISD::NEON_LD3DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, + AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, + AArch64::LD3R_4S, AArch64::LD3R_2D + }; + return SelectVLDDup(Node, false, 3, Opcodes); + } + case AArch64ISD::NEON_LD4DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, + AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, + AArch64::LD4R_4S, AArch64::LD4R_2D + }; + return SelectVLDDup(Node, false, 4, Opcodes); + } + case AArch64ISD::NEON_LD2DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, + AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, + AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, + AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD3DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, + AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, + AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, + AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD4DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, + AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, + AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, + AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD2LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, + AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD3LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, + AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD4LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, + AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 4, Opcodes); + } + case AArch64ISD::NEON_ST2LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, + AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 2, Opcodes); + } + case AArch64ISD::NEON_ST3LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, + AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 3, Opcodes); + } + case AArch64ISD::NEON_ST4LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, + AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 4, Opcodes); + } + case AArch64ISD::NEON_ST1x2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, + AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, + AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, + AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed + }; + return SelectVST(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_ST1x3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, + AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, + AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, + AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed + }; + return SelectVST(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_ST1x4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, + AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, + AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, + AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed + }; + return SelectVST(Node, true, 4, Opcodes); + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); + bool IsExt = false; + switch (IntNo) { + default: + break; + case Intrinsic::aarch64_neon_vtbx1: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl1: + return SelectVTBL(Node, 1, IsExt); + case Intrinsic::aarch64_neon_vtbx2: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl2: + return SelectVTBL(Node, 2, IsExt); + case Intrinsic::aarch64_neon_vtbx3: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl3: + return SelectVTBL(Node, 3, IsExt); + case Intrinsic::aarch64_neon_vtbx4: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl4: + return SelectVTBL(Node, 4, IsExt); + } + break; } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { @@ -906,66 +1410,149 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; - case Intrinsic::arm_neon_vld1: { - static const uint16_t Opcodes[] = { AArch64::LD1_8B, AArch64::LD1_4H, - AArch64::LD1_2S, AArch64::LD1_1D, - AArch64::LD1_16B, AArch64::LD1_8H, - AArch64::LD1_4S, AArch64::LD1_2D }; - return SelectVLD(Node, 1, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, + AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D + }; + return SelectVLD(Node, false, 1, Opcodes); } case Intrinsic::arm_neon_vld2: { - static const uint16_t Opcodes[] = { AArch64::LD2_8B, AArch64::LD2_4H, - AArch64::LD2_2S, AArch64::LD1_2V_1D, - AArch64::LD2_16B, AArch64::LD2_8H, - AArch64::LD2_4S, AArch64::LD2_2D }; - return SelectVLD(Node, 2, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, + AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D + }; + return SelectVLD(Node, false, 2, Opcodes); } case Intrinsic::arm_neon_vld3: { - static const uint16_t Opcodes[] = { AArch64::LD3_8B, AArch64::LD3_4H, - AArch64::LD3_2S, AArch64::LD1_3V_1D, - AArch64::LD3_16B, AArch64::LD3_8H, - AArch64::LD3_4S, AArch64::LD3_2D }; - return SelectVLD(Node, 3, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, + AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D + }; + return SelectVLD(Node, false, 3, Opcodes); } case Intrinsic::arm_neon_vld4: { - static const uint16_t Opcodes[] = { AArch64::LD4_8B, AArch64::LD4_4H, - AArch64::LD4_2S, AArch64::LD1_4V_1D, - AArch64::LD4_16B, AArch64::LD4_8H, - AArch64::LD4_4S, AArch64::LD4_2D }; - return SelectVLD(Node, 4, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, + AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D + }; + return SelectVLD(Node, false, 4, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x2: { + static const uint16_t Opcodes[] = { + AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, + AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, + AArch64::LD1x2_4S, AArch64::LD1x2_2D + }; + return SelectVLD(Node, false, 2, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x3: { + static const uint16_t Opcodes[] = { + AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, + AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, + AArch64::LD1x3_4S, AArch64::LD1x3_2D + }; + return SelectVLD(Node, false, 3, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x4: { + static const uint16_t Opcodes[] = { + AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, + AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, + AArch64::LD1x4_4S, AArch64::LD1x4_2D + }; + return SelectVLD(Node, false, 4, Opcodes); } case Intrinsic::arm_neon_vst1: { - static const uint16_t Opcodes[] = { AArch64::ST1_8B, AArch64::ST1_4H, - AArch64::ST1_2S, AArch64::ST1_1D, - AArch64::ST1_16B, AArch64::ST1_8H, - AArch64::ST1_4S, AArch64::ST1_2D }; - return SelectVST(Node, 1, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, + AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D + }; + return SelectVST(Node, false, 1, Opcodes); } case Intrinsic::arm_neon_vst2: { - static const uint16_t Opcodes[] = { AArch64::ST2_8B, AArch64::ST2_4H, - AArch64::ST2_2S, AArch64::ST1_2V_1D, - AArch64::ST2_16B, AArch64::ST2_8H, - AArch64::ST2_4S, AArch64::ST2_2D }; - return SelectVST(Node, 2, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, + AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D + }; + return SelectVST(Node, false, 2, Opcodes); } case Intrinsic::arm_neon_vst3: { - static const uint16_t Opcodes[] = { AArch64::ST3_8B, AArch64::ST3_4H, - AArch64::ST3_2S, AArch64::ST1_3V_1D, - AArch64::ST3_16B, AArch64::ST3_8H, - AArch64::ST3_4S, AArch64::ST3_2D }; - return SelectVST(Node, 3, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, + AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D + }; + return SelectVST(Node, false, 3, Opcodes); } case Intrinsic::arm_neon_vst4: { - static const uint16_t Opcodes[] = { AArch64::ST4_8B, AArch64::ST4_4H, - AArch64::ST4_2S, AArch64::ST1_4V_1D, - AArch64::ST4_16B, AArch64::ST4_8H, - AArch64::ST4_4S, AArch64::ST4_2D }; - return SelectVST(Node, 4, false, Opcodes); + static const uint16_t Opcodes[] = { + AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, + AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D + }; + return SelectVST(Node, false, 4, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x2: { + static const uint16_t Opcodes[] = { + AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, + AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, + AArch64::ST1x2_4S, AArch64::ST1x2_2D + }; + return SelectVST(Node, false, 2, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x3: { + static const uint16_t Opcodes[] = { + AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, + AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, + AArch64::ST1x3_4S, AArch64::ST1x3_2D + }; + return SelectVST(Node, false, 3, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x4: { + static const uint16_t Opcodes[] = { + AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, + AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, + AArch64::ST1x4_4S, AArch64::ST1x4_2D + }; + return SelectVST(Node, false, 4, Opcodes); + } + case Intrinsic::arm_neon_vld2lane: { + static const uint16_t Opcodes[] = { + AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D + }; + return SelectVLDSTLane(Node, true, false, 2, Opcodes); } + case Intrinsic::arm_neon_vld3lane: { + static const uint16_t Opcodes[] = { + AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D + }; + return SelectVLDSTLane(Node, true, false, 3, Opcodes); } + case Intrinsic::arm_neon_vld4lane: { + static const uint16_t Opcodes[] = { + AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D + }; + return SelectVLDSTLane(Node, true, false, 4, Opcodes); + } + case Intrinsic::arm_neon_vst2lane: { + static const uint16_t Opcodes[] = { + AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D + }; + return SelectVLDSTLane(Node, false, false, 2, Opcodes); + } + case Intrinsic::arm_neon_vst3lane: { + static const uint16_t Opcodes[] = { + AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D + }; + return SelectVLDSTLane(Node, false, false, 3, Opcodes); + } + case Intrinsic::arm_neon_vst4lane: { + static const uint16_t Opcodes[] = { + AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D + }; + return SelectVLDSTLane(Node, false, false, 4, Opcodes); + } + } // End of switch IntNo break; - } + } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN default: break; // Let generic code handle it }