From 228232b2821f8e7f9c0b874ad733414bda183db6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 30 Nov 2010 07:20:12 +0000 Subject: [PATCH] Rewrite mwait and monitor support and custom lower arguments. Fixes PR8573. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120404 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 53 ++++++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 7 ++++ lib/Target/X86/X86InstrSSE.td | 19 ++++++++--- test/CodeGen/X86/apm.ll | 26 +++++++++++++++ 4 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/X86/apm.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6793b70dd57..f53b0ed3834 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9443,6 +9443,53 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { + assert((Subtarget->hasSSE3()) && "Target must have SSE3 features enabled"); + + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // Address into RAX/EAX, other two args into ECX, EDX. + unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + (*MIB).addOperand(MI->getOperand(i)); + + unsigned ValOps = X86::AddrNumOperands; + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(ValOps).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) + .addReg(MI->getOperand(ValOps+1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { + assert((Subtarget->hasSSE3()) && "Target must have SSE3 features enabled"); + + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // First arg in ECX, the second in EAX. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(0).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) + .addReg(MI->getOperand(1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr *MI, @@ -10042,6 +10089,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); + // Thread synchronization. + case X86::MONITOR: + return EmitMonitor(MI, BB); + case X86::MWAIT: + return EmitMwait(MI, BB); + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index f2f19485be6..6d78d969d33 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -821,6 +821,13 @@ namespace llvm { MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, unsigned argNum, bool inMem) const; + /// Utility functions to emit monitor and mwait instructions. These + /// need to make sure that the arguments to the intrinsic are in the + /// correct registers. + MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) + const; + MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). /// It takes the bitwise instruction to expand, the associated machine basic /// block, and the associated X86 opcodes for reg/reg and reg/imm. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e4e92ccbf14..0a638973b70 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3640,10 +3640,21 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), //===---------------------------------------------------------------------===// // Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; +let usesCustomInserter = 1 in { +def MONITOR : I<0, Pseudo, (outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), + "# MONITORrrr PSUEDO", + [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; +def MWAIT : I<0, Pseudo, (outs), (ins GR32:$src1, GR32:$src2), + "# MWAITrr PSEUDO", + [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; +} + +let Uses = [EAX, ECX, EDX] in +def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, + Requires<[HasSSE3]>; +let Uses = [ECX, EAX] in +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, + Requires<[HasSSE3]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll new file mode 100644 index 00000000000..b112185ae78 --- /dev/null +++ b/test/CodeGen/X86/apm.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -o - -march=x86-64 | FileCheck %s +; PR8573 + +; CHECK: _foo: +; CHECK: leaq (%rdi), %rax +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: monitor +define void @foo(i8* %P, i32 %E, i32 %H) nounwind { +entry: + tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) + ret void +} + +declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind + +; CHECK: _bar: +; CHECK: movl %edi, %ecx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: mwait +define void @bar(i32 %E, i32 %H) nounwind { +entry: + tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) + ret void +} + +declare void @llvm.x86.sse3.mwait(i32, i32) nounwind -- 2.34.1