X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64LoadStoreOptimizer.cpp;h=27d569d7043228e0b7f960650e3e4e1543bf33b3;hb=1f5f023fe6acfb5d3bf41cc19045e3e187707bf0;hp=32933930ac97960564031c6f52fcde445bb39018;hpb=575b88edcbf673417d8a8d7980808b31b4fef898;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 32933930ac9..27d569d7043 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -41,7 +41,8 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded"); STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -STATISTIC(NumSmallTypeMerged, "Number of small type loads merged"); +STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); +STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); static cl::opt ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -152,6 +153,8 @@ static bool isUnscaledLdSt(unsigned Opc) { case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: + case AArch64::STURBBi: + case AArch64::STURHHi: case AArch64::STURWi: case AArch64::STURXi: case AArch64::LDURSi: @@ -189,7 +192,23 @@ static unsigned getBitExtrOpcode(MachineInstr *MI) { } } -static bool isSmallTypeLdMerge(unsigned Opc) { +static bool isNarrowStore(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: + return true; + } +} + +static bool isNarrowStore(MachineInstr *MI) { + return isNarrowStore(MI->getOpcode()); +} + +static bool isNarrowLoad(unsigned Opc) { switch (Opc) { default: return false; @@ -205,8 +224,8 @@ static bool isSmallTypeLdMerge(unsigned Opc) { } } -static bool isSmallTypeLdMerge(MachineInstr *MI) { - return isSmallTypeLdMerge(MI->getOpcode()); +static bool isNarrowLoad(MachineInstr *MI) { + return isNarrowLoad(MI->getOpcode()); } // Scaling factor for unscaled load or store. @@ -219,12 +238,14 @@ static int getMemScale(MachineInstr *MI) { case AArch64::LDRSBWui: case AArch64::LDURSBWi: case AArch64::STRBBui: + case AArch64::STURBBi: return 1; case AArch64::LDRHHui: case AArch64::LDURHHi: case AArch64::LDRSHWui: case AArch64::LDURSHWi: case AArch64::STRHHui: + case AArch64::STURHHi: return 2; case AArch64::LDRSui: case AArch64::LDURSi: @@ -278,6 +299,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STURDi: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: case AArch64::STRWui: case AArch64::STURWi: case AArch64::STRXui: @@ -327,6 +352,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::STRQui: case AArch64::STURQi: return AArch64::STPQi; + case AArch64::STRBBui: + return AArch64::STRHHui; + case AArch64::STRHHui: + return AArch64::STRWui; + case AArch64::STURBBi: + return AArch64::STURHHi; + case AArch64::STURHHi: + return AArch64::STURWi; case AArch64::STRWui: case AArch64::STURWi: return AArch64::STPWi; @@ -582,7 +615,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, int OffsetImm = getLdStOffsetOp(RtMI).getImm(); - if (isSmallTypeLdMerge(Opc)) { + if (isNarrowLoad(Opc)) { // Change the scaled offset from small to large type. if (!IsUnscaled) { assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); @@ -681,17 +714,33 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, return NextI; } - // Handle Unscaled - if (IsUnscaled) - OffsetImm /= OffsetStride; - // Construct the new instruction. - MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, - I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(getLdStRegOp(RtMI)) - .addOperand(getLdStRegOp(Rt2MI)) - .addOperand(BaseRegOp) - .addImm(OffsetImm); + MachineInstrBuilder MIB; + if (isNarrowStore(Opc)) { + // Change the scaled offset from small to large type. + if (!IsUnscaled) { + assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); + OffsetImm /= 2; + } + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(I)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + // Copy MachineMemOperands from the original stores. + concatenateMemOperands(MIB, I, Paired); + } else { + // Handle Unscaled + if (IsUnscaled) + OffsetImm /= OffsetStride; + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(RtMI)) + .addOperand(getLdStRegOp(Rt2MI)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + } + (void)MIB; // FIXME: Do we need/want to copy the mem operands from the source @@ -830,6 +879,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, unsigned Reg = getLdStRegOp(FirstMI).getReg(); unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); int Offset = getLdStOffsetOp(FirstMI).getImm(); + bool IsNarrowStore = isNarrowStore(Opc); + + // For narrow stores, find only the case where the stored value is WZR. + if (IsNarrowStore && Reg != AArch64::WZR) + return E; // Early exit if the first instruction modifies the base register. // e.g., ldr x0, [x0] @@ -840,7 +894,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // range, plus allow an extra one in case we find a later insn that matches // with Offset-1) int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; - if (!isSmallTypeLdMerge(Opc) && + if (!(isNarrowLoad(Opc) || IsNarrowStore) && !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; @@ -900,17 +954,17 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the resultant immediate offset of merging these instructions // is out of range for a pairwise instruction, bail and keep looking. bool MIIsUnscaled = isUnscaledLdSt(MI); - bool IsSmallTypeLd = isSmallTypeLdMerge(MI->getOpcode()); - if (!IsSmallTypeLd && + bool IsNarrowLoad = isNarrowLoad(MI->getOpcode()); + if (!IsNarrowLoad && !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; } - if (IsSmallTypeLd) { - // If the alignment requirements of the larger type scaled load - // instruction can't express the scaled offset of the smaller type + if (IsNarrowLoad || IsNarrowStore) { + // If the alignment requirements of the scaled wide load/store + // instruction can't express the offset of the scaled narrow // input, bail and keep looking. if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); @@ -930,7 +984,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - if (MayLoad && Reg == getLdStRegOp(MI).getReg()) { + // For narrow stores, allow only when the stored value is the same + // (i.e., WZR). + if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || + (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); continue; @@ -1227,8 +1284,10 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst( LdStPairFlags Flags; MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit); if (Paired != E) { - if (isSmallTypeLdMerge(MI)) { - ++NumSmallTypeMerged; + if (isNarrowLoad(MI)) { + ++NumNarrowLoadsPromoted; + } else if (isNarrowStore(MI)) { + ++NumZeroStoresPromoted; } else { ++NumPairCreated; if (isUnscaledLdSt(MI)) @@ -1285,11 +1344,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, case AArch64::LDRHHui: case AArch64::LDRSBWui: case AArch64::LDRSHWui: + case AArch64::STRBBui: + case AArch64::STRHHui: // Unscaled instructions. case AArch64::LDURBBi: case AArch64::LDURHHi: case AArch64::LDURSBWi: - case AArch64::LDURSHWi: { + case AArch64::LDURSHWi: + case AArch64::STURBBi: + case AArch64::STURHHi: { if (tryToMergeLdStInst(MBBI)) { Modified = true; break; @@ -1463,14 +1526,12 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, } bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { - const AArch64Subtarget *SubTarget = - &static_cast(Fn.getSubtarget()); - bool ProfitableArch = SubTarget->isCortexA57(); + bool ProfitableArch = Subtarget->isCortexA57(); // FIXME: The benefit from converting narrow loads into a wider load could be // microarchitectural as it assumes that a single load with two bitfield // extracts is cheaper than two narrow loads. Currently, this conversion is // enabled only in cortex-a57 on which performance benefits were verified. - return ProfitableArch & (!SubTarget->requiresStrictAlign()); + return ProfitableArch && !Subtarget->requiresStrictAlign(); } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {