From 033f8711011afab6663cdd5515027f8245f808c4 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Thu, 3 Dec 2015 03:01:10 +0000 Subject: [PATCH] Revert "ScheduleDAGInstrs: Rework schedule graph builder." This works mostly fine but breaks some stage 1 builders when compiling compiler-rt on i386. Revert for further investigation as I can't see an obvious cause/fix. This reverts commit r254577. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254586 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAGInstrs.h | 40 +--- lib/CodeGen/ScheduleDAGInstrs.cpp | 225 +++++------------- test/CodeGen/AMDGPU/image-attributes.ll | 20 +- test/CodeGen/AMDGPU/literals.ll | 8 +- .../AMDGPU/llvm.AMDGPU.read.workdim.ll | 2 +- test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll | 2 +- .../AMDGPU/llvm.r600.read.local.size.ll | 6 +- test/CodeGen/AMDGPU/or.ll | 2 +- test/CodeGen/AMDGPU/set-dx10.ll | 48 ++-- test/CodeGen/AMDGPU/sext-in-reg.ll | 4 +- test/CodeGen/AMDGPU/shl.ll | 12 +- test/CodeGen/AMDGPU/sra.ll | 8 +- test/CodeGen/AMDGPU/srl.ll | 10 +- test/CodeGen/AMDGPU/unsupported-cc.ll | 32 +-- test/CodeGen/AMDGPU/work-item-intrinsics.ll | 12 +- test/CodeGen/AMDGPU/xor.ll | 2 +- 16 files changed, 156 insertions(+), 277 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index c715e0f7920..1446f2ac082 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -33,26 +33,15 @@ namespace llvm { /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { unsigned VirtReg; - LaneBitmask LaneMask; SUnit *SU; - VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) - : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} + VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; - /// Mapping from virtual register to SUnit including an operand index. - struct VReg2SUnitOperIdx : public VReg2SUnit { - unsigned OperandIndex; - - VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, - unsigned OperandIndex, SUnit *SU) - : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} - }; - /// Record a physical register access. /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { @@ -80,10 +69,7 @@ namespace llvm { /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2SUnitMultiMap; - - typedef SparseMultiSet - VReg2SUnitOperIdxMultiMap; + typedef SparseMultiSet VReg2UseMap; /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. @@ -109,9 +95,6 @@ namespace llvm { /// it has taken responsibility for scheduling the terminator correctly. bool CanHandleTerminators; - /// Whether lane masks should get tracked. - bool TrackLaneMasks; - /// State specific to the current scheduling region. /// ------------------------------------------------ @@ -134,7 +117,7 @@ namespace llvm { /// After calling BuildSchedGraph, each vreg used in the scheduling region /// is mapped to a set of SUnits. These include all local vreg uses, not /// just the uses for a singly defined vreg. - VReg2SUnitMultiMap VRegUses; + VReg2UseMap VRegUses; /// State internal to DAG building. /// ------------------------------- @@ -146,12 +129,8 @@ namespace llvm { Reg2SUnitsMap Defs; Reg2SUnitsMap Uses; - /// Tracks the last instruction(s) in this region defining each virtual - /// register. There may be multiple current definitions for a register with - /// disjunct lanemasks. - VReg2SUnitMultiMap CurrentVRegDefs; - /// Tracks the last instructions in this region using each virtual register. - VReg2SUnitOperIdxMultiMap CurrentVRegUses; + /// Track the last instruction in this region defining each virtual register. + VReg2SUnitMap VRegDefs; /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here @@ -221,8 +200,7 @@ namespace llvm { /// input. void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = nullptr, - PressureDiffs *PDiffs = nullptr, - bool TrackLaneMasks = false); + PressureDiffs *PDiffs = nullptr); /// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier. We want to @@ -269,12 +247,6 @@ namespace llvm { /// Other adjustments may be made to the instruction if necessary. Return /// true if the operand has been deleted, false if not. bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO); - - /// Returns a mask for which lanes get read/written by the given (register) - /// machine operand. - LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; - - void collectVRegUses(SUnit *SU); }; /// newSUnit - Creates a new SUnit and return a ptr to it. diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 2ef02deebfb..12b2beb357b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -55,7 +55,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), FirstDbgValue(nullptr) { + FirstDbgValue(nullptr) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -363,20 +363,6 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } -LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const -{ - unsigned Reg = MO.getReg(); - // No point in tracking lanemasks if we don't have interesting subregisters. - const TargetRegisterClass &RC = *MRI.getRegClass(Reg); - if (!RC.HasDisjunctSubRegs) - return ~0u; - - unsigned SubReg = MO.getSubReg(); - if (SubReg == 0) - return RC.getLaneMask(); - return TRI->getSubRegIndexLaneMask(SubReg); -} - /// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. @@ -384,106 +370,35 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - MachineInstr *MI = SU->getInstr(); - MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); - - LaneBitmask DefLaneMask; - LaneBitmask KillLaneMask; - if (TrackLaneMasks) { - bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); - DefLaneMask = getLaneMaskForMO(MO); - // If we have a flag, none of the lane values comes from an - // earlier instruction. - KillLaneMask = IsKill ? ~0u : DefLaneMask; - - // Clear undef flag, we'll re-add it later once we know which subregister - // Def is first. - MO.setIsUndef(false); - } else { - DefLaneMask = ~0u; - KillLaneMask = ~0u; - } - - if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); - } else { - // Add data dependence to all uses we found so far. - const TargetSubtargetInfo &ST = MF.getSubtarget(); - for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), - E = CurrentVRegUses.end(); I != E; /*empty*/) { - LaneBitmask LaneMask = I->LaneMask; - // Ignore uses of other lanes. - if ((LaneMask & KillLaneMask) == 0) { - ++I; - continue; - } - - if ((LaneMask & DefLaneMask) != 0) { - SUnit *UseSU = I->SU; - MachineInstr *Use = UseSU->getInstr(); - SDep Dep(SU, SDep::Data, Reg); - Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, - I->OperandIndex)); - ST.adjustSchedDependency(SU, UseSU, Dep); - UseSU->addPred(Dep); - } - - LaneMask &= ~KillLaneMask; - // If we found a Def for all lanes of this use, remove it from the list. - if (LaneMask != 0) { - I->LaneMask = LaneMask; - ++I; - } else - I = CurrentVRegUses.erase(I); - } - } + const MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); - // Shortcut: Singly defined vregs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. + // The current operand is a def, so we have at least one. + // Check here if there are any others... if (MRI.hasOneDef(Reg)) return; - // Add output dependence to the next nearest defs of this vreg. + // Add output dependence to the next nearest def of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - LaneBitmask LaneMask = DefLaneMask; - for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), - CurrentVRegDefs.end())) { - // Ignore defs for other lanes. - if ((V2SU.LaneMask & LaneMask) == 0) - continue; - // Add an output dependence. - SUnit *DefSU = V2SU.SU; - // Ignore additional defs of the same lanes in one instruction. This can - // happen because lanemasks are shared for targets with too many - // subregisters. We also use some representration tricks/hacks where we - // add super-register defs/uses, to imply that although we only access parts - // of the reg we care about the full one. - if (DefSU == SU) - continue; - SDep Dep(SU, SDep::Output, Reg); - Dep.setLatency( - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - - // Update current definition. This can get tricky if the def was about a - // bigger lanemask before. We then have to shrink it and create a new - // VReg2SUnit for the non-overlapping part. - LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; - LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; - if (NonOverlapMask != 0) - CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); - V2SU.SU = SU; - V2SU.LaneMask = OverlapMask; + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); + if (DefI == VRegDefs.end()) + VRegDefs.insert(VReg2SUnit(Reg, SU)); + else { + SUnit *DefSU = DefI->SU; + if (DefSU != SU && DefSU != &ExitSU) { + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + } + DefI->SU = SU; } - // If there was no CurrentVRegDefs entry for some lanes yet, create one. - if (LaneMask != 0) - CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } /// addVRegUseDeps - Add a register data dependency if the instruction that @@ -493,26 +408,49 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); - unsigned Reg = MO.getReg(); - - // Remember the use. Data dependencies will be added when we find the def. - LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; - CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); - - // Add antidependences to the following defs of the vreg. - for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), - CurrentVRegDefs.end())) { - // Ignore defs for unrelated lanes. - LaneBitmask PrevDefLaneMask = V2SU.LaneMask; - if ((PrevDefLaneMask & LaneMask) == 0) - continue; - if (V2SU.SU == SU) - continue; + MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); - V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); + // Record this local VReg use. + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); + + // Lookup this operand's reaching definition. + assert(LIS && "vreg dependencies requires LiveIntervals"); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); + VNInfo *VNI = LRQ.valueIn(); + + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + assert(VNI && "No value to read by operand"); + MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); + // Phis and other noninstructions (after coalescing) have a NULL Def. + if (Def) { + SUnit *DefSU = getSUnit(Def); + if (DefSU) { + // The reaching Def lives within this scheduling region. + // Create a data dependence. + SDep dep(DefSU, SDep::Data, Reg); + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + int DefOp = Def->findRegisterDefOperandIdx(Reg); + dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); + + const TargetSubtargetInfo &ST = MF.getSubtarget(); + ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); + SU->addPred(dep); + } } + + // Add antidependence to the following def of the vreg it uses. + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); + if (DefI != VRegDefs.end() && DefI->SU != SU) + DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about @@ -795,42 +733,17 @@ void ScheduleDAGInstrs::initSUnits() { } } -void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg()) - continue; - if (!MO.isUse() && (MO.getSubReg() == 0 || !TrackLaneMasks)) - continue; - - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // Record this local VReg use. - VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; - } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, 0, SU)); - } -} - /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, - PressureDiffs *PDiffs, - bool TrackLaneMasks) { + PressureDiffs *PDiffs) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; - this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -864,14 +777,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.setUniverse(TRI->getNumRegs()); Uses.setUniverse(TRI->getNumRegs()); - assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); - assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); - unsigned NumVirtRegs = MRI.getNumVirtRegs(); - CurrentVRegDefs.setUniverse(NumVirtRegs); - CurrentVRegUses.setUniverse(NumVirtRegs); - + assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); VRegUses.clear(); - VRegUses.setUniverse(NumVirtRegs); + VRegDefs.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(MRI.getNumVirtRegs()); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -899,7 +808,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); - collectVRegUses(SU); } assert( @@ -1149,8 +1057,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.clear(); Uses.clear(); - CurrentVRegDefs.clear(); - CurrentVRegUses.clear(); + VRegDefs.clear(); PendingLoads.clear(); } diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll index 5906b2f1570..7a5a7346865 100644 --- a/test/CodeGen/AMDGPU/image-attributes.ll +++ b/test/CodeGen/AMDGPU/image-attributes.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: {{^}}width_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].Z +; EG: MOV [[VAL]], KC0[2].Z define void @width_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -19,7 +19,7 @@ entry: ; FUNC-LABEL: {{^}}width_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].Z +; EG: MOV [[VAL]], KC0[2].Z define void @width_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -36,7 +36,7 @@ entry: ; FUNC-LABEL: {{^}}height_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].W +; EG: MOV [[VAL]], KC0[2].W define void @height_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -49,7 +49,7 @@ entry: ; FUNC-LABEL: {{^}}height_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].W +; EG: MOV [[VAL]], KC0[2].W define void @height_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -66,7 +66,7 @@ entry: ; FUNC-LABEL: {{^}}depth_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[3].X +; EG: MOV [[VAL]], KC0[3].X define void @depth_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[3].Y +; EG: MOV [[VAL]], KC0[3].Y define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -96,7 +96,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[3].Y +; EG: MOV [[VAL]], KC0[3].Y define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -113,7 +113,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[3].Z +; EG: MOV [[VAL]], KC0[3].Z define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -126,7 +126,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[3].Z +; EG: MOV [[VAL]], KC0[3].Z define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -145,7 +145,7 @@ entry: ; ; FUNC-LABEL: {{^}}image_arg_2nd: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[4].Z +; EG: MOV [[VAL]], KC0[4].Z define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1, i32 %x, %opencl.image2d_t addrspace(1)* %in2, diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll index 9d2320cb2d1..cff1c24f89d 100644 --- a/test/CodeGen/AMDGPU/literals.ll +++ b/test/CodeGen/AMDGPU/literals.ll @@ -7,8 +7,8 @@ ; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: {{^}}i32_literal: -; CHECK: LSHR -; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y +; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -24,8 +24,8 @@ entry: ; ADD literal.x KC0[2].Z, 5.0 ; CHECK: {{^}}float_literal: -; CHECK: LSHR -; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y +; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll index 2e299e30b8c..6dc9d050eee 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}read_workdim: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].Z +; EG: MOV [[VAL]], KC0[2].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll index a30a8e083eb..74792e50017 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; R600: {{^}}amdgpu_trunc: -; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: {{^}}amdgpu_trunc: ; SI: v_trunc_f32 diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll index 13ebee41e84..f2a7256e812 100644 --- a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -5,7 +5,7 @@ ; FUNC-LABEL: {{^}}local_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[1].Z +; EG: MOV [[VAL]], KC0[1].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 @@ -23,7 +23,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[1].W +; EG: MOV [[VAL]], KC0[1].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].X +; EG: MOV [[VAL]], KC0[2].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll index e40f18f040b..1c04090b407 100644 --- a/test/CodeGen/AMDGPU/or.ll +++ b/test/CodeGen/AMDGPU/or.ll @@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { } ; FUNC-LABEL: {{^}}or_i1: -; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}} +; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll index 57365a6e1fc..53694dcffa6 100644 --- a/test/CodeGen/AMDGPU/set-dx10.ll +++ b/test/CodeGen/AMDGPU/set-dx10.ll @@ -5,8 +5,8 @@ ; SET*DX10 instructions. ; CHECK: {{^}}fcmp_une_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -19,8 +19,8 @@ entry: } ; CHECK: {{^}}fcmp_une_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -31,8 +31,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -45,8 +45,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -57,8 +57,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -71,8 +71,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -83,8 +83,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -97,8 +97,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -109,8 +109,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -123,8 +123,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, +; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -135,8 +135,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_fptosi: -; CHECK: LSHR -; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -149,8 +149,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, +; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, +; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index 23ae3b96797..95fcfdbdeca 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI: buffer_store_dword [[EXTRACT]], ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] -; EG: LSHR * [[ADDR]] -; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 +; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 +; EG-NEXT: LSHR * [[ADDR]] define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll index 55db80731c9..bf08e66f330 100644 --- a/test/CodeGen/AMDGPU/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll @@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG-LABEL: {{^}}shl_i64: +;EG: {{^}}shl_i64: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}} +;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 @@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ret void } -;EG-LABEL: {{^}}shl_v2i64: +;EG: {{^}}shl_v2i64: ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index 3b59bbfb18c..bcbc32f4c05 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -70,11 +70,11 @@ entry: ;EG-LABEL: {{^}}ashr_i64_2: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 +;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll index bbd95435632..ebb2f2db252 100644 --- a/test/CodeGen/AMDGPU/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll @@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} +; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} -; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}} +; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} +; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}} -; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]] +; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll index d120111a71f..8ab4faf2f14 100644 --- a/test/CodeGen/AMDGPU/unsupported-cc.ll +++ b/test/CodeGen/AMDGPU/unsupported-cc.ll @@ -3,8 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK-LABEL: {{^}}slt: -; CHECK: LSHR -; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -15,8 +15,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -40,8 +40,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_float_native: -; CHECK: LSHR -; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR * ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float_native(float addrspace(1)* %out, float %in) { entry: @@ -52,8 +52,8 @@ entry: } ; CHECK-LABEL: {{^}}olt: -; CHECK: LSHR -; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR * ; CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -64,8 +64,8 @@ entry: } ; CHECK-LABEL: {{^}}sle: -; CHECK: LSHR -; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -76,8 +76,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_i32: -; CHECK: LSHR -; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -101,8 +101,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_float_native: -; CHECK: LSHR -; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} +; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR * ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float_native(float addrspace(1)* %out, float %in) { entry: @@ -113,8 +113,8 @@ entry: } ; CHECK-LABEL: {{^}}ole: -; CHECK: LSHR -; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR * ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll index f420ec9c7d2..a704a23b0f9 100644 --- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -7,7 +7,7 @@ ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].X +; EG: MOV [[VAL]], KC0[0].X ; HSA: .amd_kernel_code_t @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y +; EG: MOV [[VAL]], KC0[0].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 @@ -53,7 +53,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z +; EG: MOV [[VAL]], KC0[0].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 @@ -68,7 +68,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].W +; EG: MOV [[VAL]], KC0[0].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].X +; EG: MOV [[VAL]], KC0[1].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 @@ -98,7 +98,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y +; EG: MOV [[VAL]], KC0[1].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll index 655655d92f0..ddb920af29d 100644 --- a/test/CodeGen/AMDGPU/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll @@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in } ; FUNC-LABEL: {{^}}xor_i1: -; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}} +; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}} ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}} -- 2.34.1