From: Bruno Cardoso Lopes Date: Mon, 27 Jul 2015 20:26:04 +0000 (+0000) Subject: Revert "[PeepholeOptimizer] Look through PHIs to find additional register sources" X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=272154cc862e3ffc043cc6af4cee04aa3effd495;p=oota-llvm.git Revert "[PeepholeOptimizer] Look through PHIs to find additional register sources" Still breaks some ARM buildbots. This reverts r243271. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243318 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 893ea9c887c..3861dbd91a2 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -1266,33 +1266,6 @@ private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; }; -/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. -template<> -struct DenseMapInfo { - typedef DenseMapInfo RegInfo; - - static inline TargetInstrInfo::RegSubRegPair getEmptyKey() { - return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(), - RegInfo::getEmptyKey()); - } - static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() { - return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(), - RegInfo::getTombstoneKey()); - } - /// \brief Reuse getHashValue implementation from - /// std::pair. - static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { - std::pair PairVal = - std::make_pair(Val.Reg, Val.SubReg); - return DenseMapInfo>::getHashValue(PairVal); - } - static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS, - const TargetInstrInfo::RegSubRegPair &RHS) { - return RegInfo::isEqual(LHS.Reg, RHS.Reg) && - RegInfo::isEqual(LHS.SubReg, RHS.SubReg); - } -}; - } // End llvm namespace #endif diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 92daaf7a9b0..ef213e4347f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -98,12 +98,6 @@ static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); -// Limit the number of PHI instructions to process -// in PeepholeOptimizer::getNextSource. -static cl::opt RewritePHILimit( - "rewrite-phi-limit", cl::Hidden, cl::init(10), - cl::desc("Limit the length of PHI chains to lookup")); - STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -138,10 +132,6 @@ namespace { } } - /// \brief Track Def -> Use info used for rewriting copies. - typedef SmallDenseMap - RewriteMapTy; - private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -153,8 +143,7 @@ namespace { bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); - bool findNextSource(unsigned Reg, unsigned SubReg, - RewriteMapTy &RewriteMap); + bool findNextSource(unsigned &Reg, unsigned &SubReg); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); @@ -231,20 +220,6 @@ namespace { assert(Idx < getNumSources() && "SubReg source out of index"); return RegSrcs[Idx].SubReg; } - - bool operator==(const ValueTrackerResult &Other) { - if (Other.getInst() != getInst()) - return false; - - if (Other.getNumSources() != getNumSources()) - return false; - - for (int i = 0, e = Other.getNumSources(); i != e; ++i) - if (Other.getSrcReg(i) != getSrcReg(i) || - Other.getSrcSubReg(i) != getSrcSubReg(i)) - return false; - return true; - } }; /// \brief Helper class to track the possible sources of a value defined by @@ -306,8 +281,6 @@ namespace { /// \brief Specialized version of getNextSource for SubregToReg /// instructions. ValueTrackerResult getNextSourceFromSubregToReg(); - /// \brief Specialized version of getNextSource for PHI instructions. - ValueTrackerResult getNextSourceFromPHI(); public: /// \brief Create a ValueTracker instance for the value defined by \p Reg. @@ -610,143 +583,58 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, /// \brief Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. -/// When true is returned, the \p RewriteMap can be used by the client to -/// retrieve all Def -> Use along the way up to the next source. Any found -/// Use that is not itself a key for another entry, is the next source to -/// use. During the search for the next source, multiple sources can be found -/// given multiple incoming sources of a PHI instruction. In this case, we -/// look in each PHI source for the next source; all found next sources must -/// share the same register file as \p Reg and \p SubReg. The client should -/// then be capable to rewrite all intermediate PHIs to get the next source. +/// When true is returned, \p Reg and \p SubReg are updated with the +/// register number and sub-register index of the new source. /// \return False if no alternative sources are available. True otherwise. -bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, - RewriteMapTy &RewriteMap) { +bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { // Do not try to find a new source for a physical register. // So far we do not have any motivating example for doing that. // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + unsigned DefSubReg = SubReg; - SmallVector SrcToLook; - TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); - SrcToLook.push_back(CurSrcPair); + unsigned Src; + unsigned SrcSubReg; bool ShouldRewrite = false; - unsigned PHILimit = RewritePHILimit; - while (!SrcToLook.empty() && PHILimit) { - TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); - // As explained above, do not handle physical registers - if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) - return false; - - CurSrcPair = Pair; - ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, - !DisableAdvCopyOpt, TII); - ValueTrackerResult Res; - - do { - // Follow the chain of copies until we reach the top of the use-def chain - // or find a more suitable source. - Res = ValTracker.getNextSource(); - if (!Res.isValid()) - break; - - // Insert the Def -> Use entry for the recently found source. - ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); - if (CurSrcRes.isValid()) { - assert(CurSrcRes == Res && "ValueTrackerResult found must match"); - // An existent entry with multiple sources is a PHI cycle we must avoid. - // Otherwise it's an entry with a valid next source we already found. - if (CurSrcRes.getNumSources() > 1) { - DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); - return false; - } - break; - } - RewriteMap.insert(std::make_pair(CurSrcPair, Res)); - - // ValueTrackerResult usually have one source unless it's the result from - // a PHI instruction. Add the found PHI edges to be looked up further. - unsigned NumSrcs = Res.getNumSources(); - if (NumSrcs > 1) { - PHILimit--; - for (unsigned i = 0; i < NumSrcs; ++i) - SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( - Res.getSrcReg(i), Res.getSrcSubReg(i))); - break; - } - - CurSrcPair.Reg = Res.getSrcReg(0); - CurSrcPair.SubReg = Res.getSrcSubReg(0); - // Do not extend the live-ranges of physical registers as they add - // constraints to the register allocator. Moreover, if we want to extend - // the live-range of a physical register, unlike SSA virtual register, - // we will have to check that they aren't redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) - return false; - - const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); - - // If this source does not incur a cross register bank copy, use it. - ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, SubReg, SrcRC, - CurSrcPair.SubReg); - } while (!ShouldRewrite); + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); + do { + ValueTrackerResult Res = ValTracker.getNextSource(); + if (!Res.isValid()) + break; + Src = Res.getSrcReg(0); + SrcSubReg = Res.getSrcSubReg(0); + + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. + // Moreover, if we want to extend the live-range of a physical register, + // unlike SSA virtual register, we will have to check that they are not + // redefine before the related use. + if (TargetRegisterInfo::isPhysicalRegister(Src)) + break; - // Continue looking for new sources... - if (Res.isValid()) - continue; + const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); - if (!PHILimit) { - DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); - return false; - } - - // Do not continue searching for a new source if the there's at least - // one use-def which cannot be rewritten. - if (!ShouldRewrite) - return false; - } + // If this source does not incur a cross register bank copy, use it. + ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, + SrcSubReg); + } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. - if (CurSrcPair.Reg == Reg) + if (!ShouldRewrite || Src == Reg) return false; + Reg = Src; + SubReg = SrcSubReg; return true; } -/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are -/// guaranteed to have the same register class. This is necessary whenever we -/// successfully traverse a PHI instruction and find suitable sources coming -/// from its edges. By inserting a new PHI, we provide a rewritten PHI def -/// suitable to be used in a new COPY instruction. -MachineInstr * -insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - const SmallVectorImpl &SrcRegs, - MachineInstr *OrigPHI) { - assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); - - const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); - unsigned NewVR = MRI->createVirtualRegister(NewRC); - MachineBasicBlock *MBB = OrigPHI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), - TII->get(TargetOpcode::PHI), NewVR); - - unsigned MBBOpIdx = 2; - for (auto RegPair : SrcRegs) { - MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); - MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); - // Since we're extended the lifetime of RegPair.Reg, clear the - // kill flags to account for that and make RegPair.Reg reaches - // the new PHI. - MRI->clearKillFlags(RegPair.Reg); - MBBOpIdx += 2; - } - - return MIB; -} - namespace { /// \brief Helper class to rewrite the arguments of a copy-like instruction. class CopyRewriter { @@ -820,65 +708,16 @@ public: return true; } - /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find - /// the new source to use for rewrite. If \p HandleMultipleSources is true and - /// multiple sources for a given \p Def are found along the way, we found a - /// PHI instructions that needs to be rewritten. - /// TODO: HandleMultipleSources should be removed once we test PHI handling - /// with coalescable copies. - TargetInstrInfo::RegSubRegPair - getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap, - bool HandleMultipleSources = true) { - - TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); - do { - ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); - // If there are no entries on the map, LookupSrc is the new source. - if (!Res.isValid()) - return LookupSrc; - - // There's only one source for this definition, keep searching... - unsigned NumSrcs = Res.getNumSources(); - if (NumSrcs == 1) { - LookupSrc.Reg = Res.getSrcReg(0); - LookupSrc.SubReg = Res.getSrcSubReg(0); - continue; - } - - if (!HandleMultipleSources) - break; - - // Multiple sources, recurse into each source to find a new source - // for it. Then, rewrite the PHI accordingly to its new edges. - SmallVector NewPHISrcs; - for (unsigned i = 0; i < NumSrcs; ++i) { - TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), - Res.getSrcSubReg(i)); - NewPHISrcs.push_back( - getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); - } - - // Build the new PHI node and return its def register as the new source. - MachineInstr *OrigPHI = const_cast(Res.getInst()); - MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); - const MachineOperand &MODef = NewPHI->getOperand(0); - return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - - } while (1); - - return TargetInstrInfo::RegSubRegPair(0, 0); - } - - /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap - /// and create a new COPY instruction. More info about RewriteMap in - /// PeepholeOptimizer::findNextSource. Right now this is only used to handle - /// Uncoalescable copies, since they are copy like instructions that aren't - /// recognized by the register allocator. - virtual MachineInstr * - RewriteSource(TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap) { + /// \brief Rewrite the current source with \p NewSrcReg and \p NewSecSubReg + /// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the + /// definition to be rewritten from the original copylike instruction. + /// \return The new COPY if the rewriting was possible, nullptr otherwise. + /// This is needed to handle Uncoalescable copies, since they are copy + /// like instructions that aren't recognized by the register allocator. + virtual MachineInstr *RewriteCurrentSource(unsigned DefReg, + unsigned DefSubReg, + unsigned NewSrcReg, + unsigned NewSrcSubReg) { return nullptr; } }; @@ -926,41 +765,31 @@ public: return true; } - /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap - /// and create a new COPY instruction. More info about RewriteMap in - /// PeepholeOptimizer::findNextSource. Right now this is only used to handle - /// Uncoalescable copies, since they are copy like instructions that aren't - /// recognized by the register allocator. - MachineInstr * - RewriteSource(TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap) override { - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + /// \brief Rewrite the current source with \p NewSrcReg and \p NewSrcSubReg + /// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the + /// definition to be rewritten from the original copylike instruction. + /// \return The new COPY if the rewriting was possible, nullptr otherwise. + MachineInstr *RewriteCurrentSource(unsigned DefReg, unsigned DefSubReg, + unsigned NewSrcReg, + unsigned NewSrcSubReg) override { + assert(!TargetRegisterInfo::isPhysicalRegister(DefReg) && "We do not rewrite physical registers"); - // Find the new source to use in the COPY rewrite. - TargetInstrInfo::RegSubRegPair NewSrc = - getNewSource(&MRI, &TII, Def, RewriteMap); - - // Insert the COPY. - const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); + const TargetRegisterClass *DefRC = MRI.getRegClass(DefReg); unsigned NewVR = MRI.createVirtualRegister(DefRC); MachineInstr *NewCopy = BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), TII.get(TargetOpcode::COPY), NewVR) - .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + .addReg(NewSrcReg, 0, NewSrcSubReg); - NewCopy->getOperand(0).setSubReg(Def.SubReg); - if (Def.SubReg) + NewCopy->getOperand(0).setSubReg(DefSubReg); + if (DefSubReg) NewCopy->getOperand(0).setIsUndef(); - MRI.replaceRegWith(Def.Reg, NewVR); + MRI.replaceRegWith(DefReg, NewVR); MRI.clearKillFlags(NewVR); - // We extended the lifetime of NewSrc.Reg, clear the kill flags to - // account for that. - MRI.clearKillFlags(NewSrc.Reg); - return NewCopy; } }; @@ -1204,25 +1033,16 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, TrackSubReg)) { - // Keep track of PHI nodes and its incoming edges when looking for sources. - RewriteMapTy RewriteMap; + unsigned NewSrc = TrackReg; + unsigned NewSubReg = TrackSubReg; // Try to find a more suitable source. If we failed to do so, or get the // actual source, move to the next source. - if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) + if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) continue; - - // Get the new source to rewrite. TODO: Only enable handling of multiple - // sources (PHIs) once we have a motivating example and testcases for it. - TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); - TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( - MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); - if (SrcReg == NewSrc.Reg) - continue; - // Rewrite source. - if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { + if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { // We may have extended the live-range of NewSrc, account for that. - MRI->clearKillFlags(NewSrc.Reg); + MRI->clearKillFlags(NewSrc); Changed = true; } } @@ -1251,7 +1071,9 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); // Check if we can rewrite all the values defined by this instruction. - SmallVector RewritePairs; + SmallVector< + std::pair, + 4> RewritePairs; // Get the right rewriter for the current copy. std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); // If none exists, bails out. @@ -1261,32 +1083,39 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( // Rewrite each rewritable source by generating new COPYs. This works // differently from optimizeCoalescableCopy since it first makes sure that all // definitions can be rewritten. - RewriteMapTy RewriteMap; - unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; - while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, - CopyDefSubReg)) { + unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; + while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, + TrackSubReg)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) + if (TargetRegisterInfo::isPhysicalRegister(TrackReg)) return false; // If we do not know how to rewrite this definition, there is no point // in trying to kill this instruction. - TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); - if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) + TargetInstrInfo::RegSubRegPair Def(TrackReg, TrackSubReg); + TargetInstrInfo::RegSubRegPair Src = Def; + if (!findNextSource(Src.Reg, Src.SubReg)) return false; - RewritePairs.push_back(Def); + RewritePairs.push_back(std::make_pair(Def, Src)); } // The change is possible for all defs, do it. - for (const auto &Def : RewritePairs) { + for (const auto &PairDefSrc : RewritePairs) { + const auto &Def = PairDefSrc.first; + const auto &Src = PairDefSrc.second; + // Rewrite the "copy" in a way the register coalescer understands. - MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); + MachineInstr *NewCopy = CpyRewriter->RewriteCurrentSource( + Def.Reg, Def.SubReg, Src.Reg, Src.SubReg); assert(NewCopy && "Should be able to always generate a new copy"); + + // We extended the lifetime of Src and clear the kill flags to + // account for that. + MRI->clearKillFlags(Src.Reg); LocalMIs.insert(NewCopy); } - // MI is now dead. MI->eraseFromParent(); ++NumUncoalescableCopies; @@ -1694,26 +1523,6 @@ ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { Def->getOperand(3).getImm()); } -/// \brief Explore each PHI incoming operand and return its sources -ValueTrackerResult ValueTracker::getNextSourceFromPHI() { - assert(Def->isPHI() && "Invalid definition"); - ValueTrackerResult Res; - - // If we look for a different subreg, bails as we do not - // support composing subreg yet. - if (Def->getOperand(0).getSubReg() != DefSubReg) - return ValueTrackerResult(); - - // Return all register sources for PHI instructions. - for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { - auto &MO = Def->getOperand(i); - assert(MO.isReg() && "Invalid PHI instruction"); - Res.addSource(MO.getReg(), MO.getSubReg()); - } - - return Res; -} - ValueTrackerResult ValueTracker::getNextSourceImpl() { assert(Def && "This method needs a valid definition"); @@ -1736,8 +1545,6 @@ ValueTrackerResult ValueTracker::getNextSourceImpl() { return getNextSourceFromExtractSubreg(); if (Def->isSubregToReg()) return getNextSourceFromSubregToReg(); - if (Def->isPHI()) - return getNextSourceFromPHI(); return ValueTrackerResult(); } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index ab301adca48..eaa7894004c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -249,7 +249,6 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; -let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (bitconvert GR64:$src))], @@ -263,7 +262,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. -let SchedRW = [WriteMove], isBitcast = 1 in { +let SchedRW = [WriteMove] in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", diff --git a/test/CodeGen/X86/mmx-coalescing.ll b/test/CodeGen/X86/mmx-coalescing.ll deleted file mode 100644 index a515e5ee375..00000000000 --- a/test/CodeGen/X86/mmx-coalescing.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s - -%SA = type <{ %union.anon, i32, [4 x i8], i8*, i8*, i8*, i32, [4 x i8] }> -%union.anon = type { <1 x i64> } - -; Check that extra movd (copy) instructions aren't generated. - -define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) { -entry: -; CHECK-LABEL: test -; CHECK: # BB#0: -; CHECK-NEXT: pshufw -; CHECK-NEXT: movd -; CHECK-NOT: movd -; CHECK-NEXT: testl - %shl = shl i32 1, %B - %shl1 = shl i32 %C, %B - %shl2 = shl i32 1, %D - %v = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 0, i32 0 - %v0 = load <1 x i64>, <1 x i64>* %v, align 8 - %SA0 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 1 - %v1 = load i32, i32* %SA0, align 4 - %SA1 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 3 - %v2 = load i8*, i8** %SA1, align 8 - %SA2 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 4 - %v3 = load i8*, i8** %SA2, align 8 - %v4 = bitcast <1 x i64> %v0 to <4 x i16> - %v5 = bitcast <4 x i16> %v4 to x86_mmx - %v6 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v5, i8 -18) - %v7 = bitcast x86_mmx %v6 to <4 x i16> - %v8 = bitcast <4 x i16> %v7 to <1 x i64> - %v9 = extractelement <1 x i64> %v8, i32 0 - %v10 = bitcast i64 %v9 to <2 x i32> - %v11 = extractelement <2 x i32> %v10, i32 0 - %cmp = icmp eq i32 %v11, 0 - br i1 %cmp, label %if.A, label %if.B - -if.A: -; CHECK: %if.A -; CHECK-NEXT: movd -; CHECK-NEXT: psllq - %pa = phi <1 x i64> [ %v8, %entry ], [ %vx, %if.C ] - %v17 = extractelement <1 x i64> %pa, i32 0 - %v18 = bitcast i64 %v17 to x86_mmx - %v19 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %v18, i32 %B) #2 - %v20 = bitcast x86_mmx %v19 to i64 - %v21 = insertelement <1 x i64> undef, i64 %v20, i32 0 - %cmp3 = icmp eq i64 %v20, 0 - br i1 %cmp3, label %if.C, label %merge - -if.B: - %v34 = bitcast <1 x i64> %v8 to <4 x i16> - %v35 = bitcast <4 x i16> %v34 to x86_mmx - %v36 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v35, i8 -18) - %v37 = bitcast x86_mmx %v36 to <4 x i16> - %v38 = bitcast <4 x i16> %v37 to <1 x i64> - br label %if.C - -if.C: - %vx = phi <1 x i64> [ %v21, %if.A ], [ %v38, %if.B ] - %cvt = bitcast <1 x i64> %vx to <2 x i32> - %ex = extractelement <2 x i32> %cvt, i32 0 - %cmp2 = icmp eq i32 %ex, 0 - br i1 %cmp2, label %if.A, label %merge - -merge: -; CHECK: %merge -; CHECK-NOT: movd -; CHECK-NEXT: pshufw - %vy = phi <1 x i64> [ %v21, %if.A ], [ %vx, %if.C ] - %v130 = bitcast <1 x i64> %vy to <4 x i16> - %v131 = bitcast <4 x i16> %v130 to x86_mmx - %v132 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v131, i8 -18) - %v133 = bitcast x86_mmx %v132 to <4 x i16> - %v134 = bitcast <4 x i16> %v133 to <1 x i64> - %v135 = extractelement <1 x i64> %v134, i32 0 - %v136 = bitcast i64 %v135 to <2 x i32> - %v137 = extractelement <2 x i32> %v136, i32 0 - ret i32 %v137 -} - - -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) -declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)