X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64InstrInfo.cpp;h=afb20341f781153f94e725db8a5170dd33383e1b;hb=0c0cd3a4ee0a9bf48b396b2c94bbd0504f2e0e57;hp=f90bcefad4934a1e43bd835abe4bfb7495be230d;hpb=4393f48c03300203594e22d248808f20dd59d886;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index f90bcefad49..afb20341f78 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -26,10 +26,9 @@ #include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" - #include -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" using namespace llvm; @@ -68,45 +67,94 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) .addImm(A64SysReg::NZCV); } else if (AArch64::GPR64RegClass.contains(DestReg)) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; + if(AArch64::GPR64RegClass.contains(SrcReg)){ + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else{ + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::GPR32RegClass.contains(DestReg)) { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; + if(AArch64::GPR32RegClass.contains(SrcReg)){ + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else{ + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR32RegClass.contains(DestReg)) { - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR32RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR64RegClass.contains(DestReg)) { - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR64RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR128RegClass.contains(DestReg)) { assert(AArch64::FPR128RegClass.contains(SrcReg)); - // FIXME: there's no good way to do this, at least without NEON: - // + There's no single move instruction for q-registers - // + We can't create a spill slot and use normal STR/LDR because stack - // allocation has already happened - // + We can't go via X-registers with FMOV because register allocation has - // already happened. - // This may not be efficient, but at least it works. - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); - - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); + // If NEON is enable, we use ORR to implement this copy. + // If NEON isn't available, emit STR and LDR to handle this. + if(getSubTarget().hasNEON()) { + BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + return; + } else { + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } + } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) { + // The copy of two FPR8 registers is implemented by the copy of two FPR32 + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8, + &AArch64::FPR32RegClass); + unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) + .addReg(Src); + return; + } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { + // The copy of two FPR16 registers is implemented by the copy of two FPR32 + const TargetRegisterInfo *TRI = &getRegisterInfo(); + unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, + &AArch64::FPR32RegClass); + unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) + .addReg(Src); return; } else { - llvm_unreachable("Unknown register class in copyPhysReg"); + CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg); + return; } // E.g. ORR xDst, xzr, xSrc, lsl #0 @@ -116,15 +164,53 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0); } -MachineInstr * -AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0) - .addImm(Offset) - .addMetadata(MDPtr); - return &*MIB; +void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg) const { + unsigned SubRegs; + bool IsQRegs; + if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) { + SubRegs = 2; + IsQRegs = false; + } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) { + SubRegs = 3; + IsQRegs = false; + } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) { + SubRegs = 4; + IsQRegs = false; + } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) { + SubRegs = 2; + IsQRegs = true; + } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) { + SubRegs = 3; + IsQRegs = true; + } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) { + SubRegs = 4; + IsQRegs = true; + } else + llvm_unreachable("Unknown register class"); + + unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0; + int Spacing = 1; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + // Copy register tuples backward when the first Dest reg overlaps + // with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + (SubRegs - 1); + Spacing = -1; + } + + unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B; + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); + assert(Dst && Src && "Bad sub-register"); + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src) + .addReg(Src); + } + return; } /// Does the Opcode represent a conditional branch that we can remove and re-add @@ -401,10 +487,12 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } - } else { - assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) - && "Expected integer or floating type for store"); + } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { + StoreOp = AArch64::LSFP8_STR; + } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { + StoreOp = AArch64::LSFP16_STR; + } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) { switch (RC->getSize()) { case 4: StoreOp = AArch64::LSFP32_STR; break; case 8: StoreOp = AArch64::LSFP64_STR; break; @@ -412,6 +500,28 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } + } else { // For a super register class has more than one sub registers + if (AArch64::DPairRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x2_8B; + else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x3_8B; + else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x4_8B; + else if (AArch64::QPairRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x2_16B; + else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x3_16B; + else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) + StoreOp = AArch64::ST1x4_16B; + else + llvm_unreachable("Unknown reg class"); + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + // Vector store has different operands from other store instructions. + NewMI.addFrameIndex(FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + return; } MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); @@ -447,10 +557,12 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } - } else { - assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) - || RC->hasType(MVT::f128)) - && "Expected integer or floating type for store"); + } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { + LoadOp = AArch64::LSFP8_LDR; + } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { + LoadOp = AArch64::LSFP16_LDR; + } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) { switch (RC->getSize()) { case 4: LoadOp = AArch64::LSFP32_LDR; break; case 8: LoadOp = AArch64::LSFP64_LDR; break; @@ -458,6 +570,27 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } + } else { // For a super register class has more than one sub registers + if (AArch64::DPairRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x2_8B; + else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x3_8B; + else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x4_8B; + else if (AArch64::QPairRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x2_16B; + else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x3_16B; + else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) + LoadOp = AArch64::LD1x4_16B; + else + llvm_unreachable("Unknown reg class"); + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + // Vector load has different operands from other load instructions. + NewMI.addFrameIndex(FrameIdx) + .addMemOperand(MMO); + return; } MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); @@ -496,7 +629,8 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, int &AccessScale, int &MinOffset, int &MaxOffset) const { switch (MI.getOpcode()) { - default: llvm_unreachable("Unkown load/store kind"); + default: + llvm_unreachable("Unknown load/store kind"); case TargetOpcode::DBG_VALUE: AccessScale = 1; MinOffset = INT_MIN; @@ -555,6 +689,32 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, MinOffset = -0x40 * AccessScale; MaxOffset = 0x3f * AccessScale; return; + case AArch64::LD1x2_8B: case AArch64::ST1x2_8B: + AccessScale = 16; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x3_8B: case AArch64::ST1x3_8B: + AccessScale = 24; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x4_8B: case AArch64::ST1x4_8B: + case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: + AccessScale = 32; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: + AccessScale = 48; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: + AccessScale = 64; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; } } @@ -570,18 +730,15 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { if (MI.getOpcode() == AArch64::INLINEASM) return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); - if (MI.isLabel()) - return 0; - switch (MI.getOpcode()) { case TargetOpcode::BUNDLE: return getInstBundleLength(MI); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::CFI_INSTRUCTION: case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: case TargetOpcode::DBG_VALUE: - return 0; case AArch64::TLSDESCCALL: return 0; default: