From 630c5e06d633fad142af4b145ee684e90754700e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 13 Sep 2013 07:26:52 +0000 Subject: [PATCH] AArch64: use RegisterOperand for NEON registers. Previously we modelled VPR128 and VPR64 as essentially identical register-classes containing V0-V31 (which had Q0-Q31 as "sub_alias" sub-registers). This model is starting to cause significant problems for code generation, particularly writing EXTRACT/INSERT_SUBREG patterns for converting between the two. The change here switches to classifying VPR64 & VPR128 as RegisterOperands, which are essentially aliases for RegisterClasses with different parsing and printing behaviour. This fits almost exactly with their real status (VPR128 == FPR128 printed strangely, VPR64 == FPR64 printed strangely). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190665 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64AsmPrinter.cpp | 26 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 30 +- lib/Target/AArch64/AArch64InstrInfo.td | 12 +- lib/Target/AArch64/AArch64InstrNEON.td | 262 ++++++++---------- lib/Target/AArch64/AArch64RegisterInfo.td | 48 +--- .../AArch64/AsmParser/AArch64AsmParser.cpp | 82 ++++-- .../Disassembler/AArch64Disassembler.cpp | 32 +-- .../InstPrinter/AArch64InstPrinter.cpp | 8 + .../AArch64/InstPrinter/AArch64InstPrinter.h | 1 + test/MC/AArch64/neon-mov.s | 10 +- .../AArch64/neon-instructions.txt | 7 +- 11 files changed, 241 insertions(+), 277 deletions(-) diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 9498722a629..759809fbb72 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -32,17 +32,18 @@ using namespace llvm; /// argument to be printed as "bN". static bool printModifiedFPRAsmOperand(const MachineOperand &MO, const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { + char RegType, raw_ostream &O) { if (!MO.isReg()) return true; for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); + if (AArch64::FPR8RegClass.contains(*AR)) { + O << RegType << TRI->getEncodingValue(MO.getReg()); return false; } } + + // The register doesn't correspond to anything floating-point like. return true; } @@ -157,7 +158,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // register. Technically, we could allocate the argument as a VPR128, but // that leads to extremely dodgy copies being generated to get the data // there. - if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) + if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) O << AArch64InstPrinter::getRegisterName(MO.getReg()); break; case MachineOperand::MO_Immediate: @@ -211,25 +212,12 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // copies ...). llvm_unreachable("FIXME: Unimplemented register pairs"); case 'b': - // Output 8-bit FP/SIMD scalar register operand, prefixed with b. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR8RegClass, O); case 'h': - // Output 16-bit FP/SIMD scalar register operand, prefixed with h. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR16RegClass, O); case 's': - // Output 32-bit FP/SIMD scalar register operand, prefixed with s. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR32RegClass, O); case 'd': - // Output 64-bit FP/SIMD scalar register operand, prefixed with d. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR64RegClass, O); case 'q': - // Output 128-bit FP/SIMD scalar register operand, prefixed with q. return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR128RegClass, O); + ExtraCode[0], O); case 'A': // Output symbolic address with appropriate relocation modifier (also // suitable for ADRP). diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index b68c43a63cb..8597f073fc5 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -57,17 +57,17 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) if (Subtarget->hasNEON()) { // And the vectors - addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); + addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); } computeRegisterProperties(); @@ -3610,14 +3610,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &AArch64::FPR16RegClass); else if (VT == MVT::f32) return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT == MVT::f64) - return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::VPR64RegClass); - else if (VT == MVT::f128) - return std::make_pair(0U, &AArch64::FPR128RegClass); + return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::VPR128RegClass); + return std::make_pair(0U, &AArch64::FPR128RegClass); break; } } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 07289b0be14..fef3019ef87 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -125,6 +125,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; + //===----------------------------------------------------------------------===// // Call sequence pseudo-instructions //===----------------------------------------------------------------------===// @@ -2196,13 +2198,13 @@ def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra), +def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)), +def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } @@ -5162,4 +5164,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), // Advanced SIMD (NEON) Support // -include "AArch64InstrNEON.td" \ No newline at end of file +include "AArch64InstrNEON.td" diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index b8840aa18c1..f600d248b68 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -215,8 +215,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and // two operands constraints. class NeonI_3VSame_Constraint_impl size, bits<5> opcode, - SDPatternOperator opnode> + RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, + bits<5> opcode, SDPatternOperator opnode> : NeonI_3VSame; // ORR disassembled as MOV if Vn==Vm // Vector Move - register -// Alias for ORR if Vn=Vm and it is the preferred syntax +// Alias for ORR if Vn=Vm. +// FIXME: This is actually the preferred syntax but TableGen can't deal with +// custom printing of aliases. def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", - (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; + (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>; def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", - (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; + (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>; def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{ ConstantSDNode *ImmConstVal = cast(N->getOperand(0)); @@ -571,7 +573,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), // NeonI_compare_aliases class: swaps register operands to implement // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. class NeonI_compare_aliases + Instruction inst, RegisterOperand VPRC> : NeonInstAlias; @@ -1324,7 +1326,7 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; } class NeonI_mov_imm_lsl_aliases + Instruction inst, RegisterOperand VPRC> : NeonInstAlias; @@ -1401,7 +1403,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1, // Vector Floating Point Move Immediate -class NeonI_FMOV_impl : NeonI_1VModImm; def shr_imm64 : shr_imm<"64">; class N2VShift opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; // Rounding/Saturating shift class N2VShift_RQ opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm; defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; class N2VShiftAdd opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> : NeonI_2VShiftImm; // Rounding shift accumulate class N2VShiftAdd_R opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm; // Shift insert by immediate class N2VShiftIns opcode, string asmop, string T, - RegisterClass VPRC, ValueType Ty, Operand ImmTy, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDPatternOperator OpNode> : NeonI_2VShiftImm opcode, string asmop, string DestT, class N2VShR_Narrow_Hi opcode, string asmop, string DestT, string SrcT, Operand ImmTy> : NeonI_2VShiftImm { let Constraints = "$src = $Rd"; @@ -2040,15 +2042,18 @@ multiclass Neon_shiftNarrow_patterns { def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, imm:$Imm)))))), - (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; } multiclass Neon_shiftNarrow_QR_patterns { @@ -2060,17 +2065,20 @@ multiclass Neon_shiftNarrow_QR_patterns { (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_16B") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_8H") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; def : Pat<(Neon_combine (v1i64 VPR64:$src), - (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), + (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), (!cast(prefix # "_4S") - VPR64:$src, VPR128:$Rn, imm:$Imm)>; + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; } defm : Neon_shiftNarrow_patterns<"lshr">; @@ -2086,7 +2094,7 @@ defm : Neon_shiftNarrow_QR_patterns; // Convert fix-point and float-pointing class N2VCvt_Fx opcode, string asmop, string T, - RegisterClass VPRC, ValueType DestTy, ValueType SrcTy, + RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, Operand ImmTy, SDPatternOperator IntOp> : NeonI_2VShiftImm; class NeonI_3VDL size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator ext, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff; class NeonI_3VDW size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator ext, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff size, bits<4> opcode, string asmop, string ResS, string OpS, @@ -2361,7 +2369,7 @@ defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; class NeonI_3VD_2Op size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, - RegisterClass ResVPR, RegisterClass OpVPR, + RegisterOperand ResVPR, RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff opcode, defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; -// pattern for acle intrinsic with 3 operands -class NeonI_3VDN_addhn2_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator get_hi, - ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDN_addhn2_3Op_v1 opcode, - string asmop, - SDPatternOperator opnode> -{ - def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", - opnode, NI_get_hi_8h, v8i16, v8i8>; - def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", - opnode, NI_get_hi_4s, v4i32, v4i16>; - def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", - opnode, NI_get_hi_2d, v2i64, v2i32>; -} - -defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>; -defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>; - // pattern for acle intrinsic with 3 operands class NeonI_3VDN_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType OpTy, ValueType OpSTy> + string asmop, string ResS, string OpS> : NeonI_3VDiff { + [], NoItinerary> { let Constraints = "$src = $Rd"; + let neverHasSideEffects = 1; } multiclass NeonI_3VDN_3Op_v1 opcode, - string asmop, - SDPatternOperator opnode> -{ - def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", - opnode, v8i16, v8i8>; - def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", - opnode, v4i32, v4i16>; - def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", - opnode, v2i64, v2i32>; -} - -defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2", - int_arm_neon_vraddhn>; -defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2", - int_arm_neon_vrsubhn>; + string asmop> { + def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; + def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; + def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; +} + +defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; +defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; + +defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; +defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; + +// Patterns have to be separate because there's a SUBREG_TO_REG in the output +// part. +class NarrowHighHalfPat + : Pat<(Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), + (SrcTy VPR128:$Rm)))))), + (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, VPR128:$Rm)>; + +// addhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// subhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// raddhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; + +// rsubhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; // pattern that need to extend result class NeonI_3VDL_Ext size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy, ValueType OpSTy> : NeonI_3VDiff; class NeonI_3VDL_Aba size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator subop, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy, ValueType OpSTy> : NeonI_3VDiff; class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, string asmop, string ResS, string OpS, SDPatternOperator subop, SDPatternOperator opnode, - RegisterClass OpVPR, + RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy> : NeonI_3VDiff opcode, class Neon_Scalar_D_size_patterns : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (SUBREG_TO_REG (i64 0), - (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64), - (EXTRACT_SUBREG VPR64:$Rm, sub_64)), - sub_64)>; - + (INSTD VPR64:$Rn, VPR64:$Rm)>; // Scalar Integer Add let isCommutable = 1 in { @@ -2994,54 +2990,28 @@ def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>; -def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), - (f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>; -def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>; -def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), - (f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>; - -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), - (v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), - (v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), - (v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), - (v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), - (v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), - (v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), - (v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), - (v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), - (v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), - (v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), - (v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), - sub_alias))>; +def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; + +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index b3a81b1dc0a..e0eca23c64f 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -17,10 +17,6 @@ def sub_64 : SubRegIndex<64>; def sub_32 : SubRegIndex<32>; def sub_16 : SubRegIndex<16>; def sub_8 : SubRegIndex<8>; - -// The VPR registers are handled as sub-registers of FPR equivalents, but -// they're really the same thing. We give this concept a special index. -def sub_alias : SubRegIndex<128>; } // Registers are identified with 5-bit ID numbers. @@ -149,48 +145,28 @@ def FPR32 : RegisterClass<"AArch64", [f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64], 64, - (sequence "D%u", 0, 31)> { -} +def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], + 64, (sequence "D%u", 0, 31)>; + +def FPR128 : RegisterClass<"AArch64", + [f128,v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, + (sequence "Q%u", 0, 31)>; -def FPR128 : RegisterClass<"AArch64", [f128], 128, - (sequence "Q%u", 0, 31)> { -} //===----------------------------------------------------------------------===// // Vector registers: //===----------------------------------------------------------------------===// -// NEON registers simply specify the overall vector, and it's expected that -// Instructions will individually specify the acceptable data layout. In -// principle this leaves two approaches open: -// + An operand, giving a single ADDvvv instruction (for example). This turns -// out to be unworkable in the assembly parser (without every Instruction -// having a "cvt" function, at least) because the constraints can't be -// properly enforced. It also complicates specifying patterns since each -// instruction will accept many types. -// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific -// details about NEON registers, but simplifies most other details. -// -// The second approach was taken. - -foreach Index = 0-31 in { - def V # Index : AArch64RegWithSubs("Q" # Index)], - [sub_alias]>, - DwarfRegNum<[!add(Index, 64)]>; +def VPR64AsmOperand : AsmOperandClass { + let Name = "VPR"; + let PredicateMethod = "isReg"; + let RenderMethod = "addRegOperands"; } -// These two classes contain the same registers, which should be reasonably -// sensible for MC and allocation purposes, but allows them to be treated -// separately for things like stack spilling. -def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64, - (sequence "V%u", 0, 31)>; +def VPR64 : RegisterOperand; -def VPR128 : RegisterClass<"AArch64", - [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, - (sequence "V%u", 0, 31)>; +def VPR128 : RegisterOperand; // Flags register def NZCV : Register<"nzcv"> { diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f7e9c6fe172..51638d90e3c 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1556,22 +1556,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, std::string LowerReg = Tok.getString().lower(); size_t DotPos = LowerReg.find('.'); - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; - + bool IsVec128 = false; SMLoc S = Tok.getLoc(); RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - if (DotPos == StringRef::npos) { + if (DotPos == std::string::npos) { Layout = StringRef(); } else { // Everything afterwards needs to be a literal token, expected to be @@ -1582,19 +1571,76 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, // would go out of scope when we return). LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + + // See if it's a 128-bit layout first. Layout = StringSwitch(LayoutText) - .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Case(".d", ".d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".16b", ".16b") .Default(""); + if (Layout.size() != 0) + IsVec128 = true; + else { + Layout = StringSwitch(LayoutText) + .Case(".1d", ".1d") + .Case(".2s", ".2s") + .Case(".4h", ".4h") + .Case(".8b", ".8b") + .Default(""); + } + if (Layout.size() == 0) { - // Malformed register + // If we've still not pinned it down the register is malformed. return false; } } + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) + .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) + .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) + .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) + .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) + .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) + .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) + .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) + .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) + .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) + .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) + .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) + .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) + .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) + .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) + .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) + .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) + .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) + .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) + .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) + .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) + .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) + .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) + .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) + .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) + .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) + .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) + .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) + .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) + .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) + .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) + .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + return true; } diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 5b57b50a98d..3baa4b5304c 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -85,12 +85,6 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, @@ -355,28 +349,6 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus -DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, uint64_t Address, @@ -608,11 +580,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); if (IsToVec) { - DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); } else { DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); } // Add the lane diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index b6243310d58..d2b6f6d0127 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, O << "#" << (Imm * MemScale); } +void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNo).getReg(); + std::string Name = getRegisterName(Reg); + Name[0] = 'v'; + O << Name; +} + void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index f7439bec668..b608908ca5b 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -157,6 +157,7 @@ public: void printRegExtendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s index 83313724107..c2ca8032200 100644 --- a/test/MC/AArch64/neon-mov.s +++ b/test/MC/AArch64/neon-mov.s @@ -195,13 +195,15 @@ //---------------------------------------------------------------------- // Vector Move - register //---------------------------------------------------------------------- + + // FIXME: these should all print with the "mov" syntax. mov v0.8b, v31.8b mov v15.16b, v16.16b orr v0.8b, v31.8b, v31.8b orr v15.16b, v16.16b, v16.16b -// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] -// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] -// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] -// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] +// CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] +// CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] +// CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] +// CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index fa9f73fe2a0..ecb6249fafa 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -131,8 +131,11 @@ #------------------------------------------------------------------------------ # Vector Move - register #------------------------------------------------------------------------------ -# CHECK: mov v1.16b, v15.16b -# CHECK: mov v25.8b, v4.8b + +# FIXME: these should print as "mov", but TableGen can't handle it. + +# CHECK: orr v1.16b, v15.16b, v15.16b +# CHECK: orr v25.8b, v4.8b, v4.8b 0xe1 0x1d 0xaf 0x4e 0x99 0x1c 0xa4 0x0e -- 2.34.1