From b72c6bad3c6d50fd5d0f24b704c05c8ab997d675 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Thu, 15 Oct 2015 16:46:29 +0000 Subject: [PATCH] x86 FP atomic codegen: don't drop globals, stack Summary: x86 codegen is clever about generating good code for relaxed floating-point operations, but it was being silly when globals and immediates were involved, forgetting where the global was and loading/storing from/to the wrong place. The same applied to hard-coded address immediates. Don't let it forget about the displacement. This fixes https://llvm.org/bugs/show_bug.cgi?id=25171 A very similar bug when doing floating-points atomics to the stack is also fixed by this patch. This fixes https://llvm.org/bugs/show_bug.cgi?id=25144 Reviewers: pete Subscribers: llvm-commits, majnemer, rsmith Differential Revision: http://reviews.llvm.org/D13749 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250429 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 15 ++-- test/CodeGen/X86/atomic_mi.ll | 108 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 182e9116bfb..618290a257a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -21105,21 +21105,26 @@ X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI, const X86InstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned MSrc = MI->getOperand(0).getReg(); + MachineOperand MSrc = MI->getOperand(0); unsigned VSrc = MI->getOperand(5).getReg(); + const MachineOperand &Disp = MI->getOperand(3); + MachineOperand ZeroDisp = MachineOperand::CreateImm(0); + bool hasDisp = Disp.isGlobal() || Disp.isImm(); + if (hasDisp && MSrc.isReg()) + MSrc.setIsKill(false); MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp)) - .addReg(/*Base=*/MSrc) + .addOperand(/*Base=*/MSrc) .addImm(/*Scale=*/1) .addReg(/*Index=*/0) - .addImm(0) + .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) .addReg(0); MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp), MRI.createVirtualRegister(MRI.getRegClass(VSrc))) .addReg(VSrc) - .addReg(/*Base=*/MSrc) + .addOperand(/*Base=*/MSrc) .addImm(/*Scale=*/1) .addReg(/*Index=*/0) - .addImm(/*Disp=*/0) + .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) .addReg(/*Segment=*/0); MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill); MI->eraseFromParent(); // The pseudo instruction is gone now. diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll index 7be79113620..356d9dcff6f 100644 --- a/test/CodeGen/X86/atomic_mi.ll +++ b/test/CodeGen/X86/atomic_mi.ll @@ -871,3 +871,111 @@ define void @fadd_64r(double* %loc, double %val) { store atomic i64 %3, i64* %floc release, align 8 ret void } + +@glob32 = global float 0.000000e+00, align 4 +@glob64 = global double 0.000000e+00, align 8 + +; Floating-point add to a global using an immediate. +define void @fadd_32g() { +; X64-LABEL: fadd_32g: +; X64-NOT: lock +; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addss glob32(%rip), %[[XMM]] +; X64-NEXT: movss %[[XMM]], glob32(%rip) +; X32-LABEL: fadd_32g: +; Don't check x86-32 (see comment above). + %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 + %f = bitcast i32 %i to float + %add = fadd float %f, 1.000000e+00 + %s = bitcast float %add to i32 + store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4 + ret void +} + +define void @fadd_64g() { +; X64-LABEL: fadd_64g: +; X64-NOT: lock +; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addsd glob64(%rip), %[[XMM]] +; X64-NEXT: movsd %[[XMM]], glob64(%rip) +; X32-LABEL: fadd_64g: +; Don't check x86-32 (see comment above). + %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 + %f = bitcast i64 %i to double + %add = fadd double %f, 1.000000e+00 + %s = bitcast double %add to i64 + store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8 + ret void +} + +; Floating-point add to a hard-coded immediate location using an immediate. +define void @fadd_32imm() { +; X64-LABEL: fadd_32imm: +; X64-NOT: lock +; X64: movl $3735928559, %e[[M:[a-z]+]] +; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addss (%r[[M]]), %[[XMM]] +; X64-NEXT: movss %[[XMM]], (%r[[M]]) +; X32-LABEL: fadd_32imm: +; Don't check x86-32 (see comment above). + %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 + %f = bitcast i32 %i to float + %add = fadd float %f, 1.000000e+00 + %s = bitcast float %add to i32 + store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4 + ret void +} + +define void @fadd_64imm() { +; X64-LABEL: fadd_64imm: +; X64-NOT: lock +; X64: movl $3735928559, %e[[M:[a-z]+]] +; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addsd (%r[[M]]), %[[XMM]] +; X64-NEXT: movsd %[[XMM]], (%r[[M]]) +; X32-LABEL: fadd_64imm: +; Don't check x86-32 (see comment above). + %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 + %f = bitcast i64 %i to double + %add = fadd double %f, 1.000000e+00 + %s = bitcast double %add to i64 + store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8 + ret void +} + +; Floating-point add to a stack location. +define void @fadd_32stack() { +; X64-LABEL: fadd_32stack: +; X64-NOT: lock +; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] +; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp) +; X32-LABEL: fadd_32stack: +; Don't check x86-32 (see comment above). + %ptr = alloca i32, align 4 + %bc3 = bitcast i32* %ptr to float* + %load = load atomic i32, i32* %ptr acquire, align 4 + %bc0 = bitcast i32 %load to float + %fadd = fadd float 1.000000e+00, %bc0 + %bc1 = bitcast float %fadd to i32 + store atomic i32 %bc1, i32* %ptr release, align 4 + ret void +} + +define void @fadd_64stack() { +; X64-LABEL: fadd_64stack: +; X64-NOT: lock +; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]] +; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]] +; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp) +; X32-LABEL: fadd_64stack: +; Don't check x86-32 (see comment above). + %ptr = alloca i64, align 8 + %bc3 = bitcast i64* %ptr to double* + %load = load atomic i64, i64* %ptr acquire, align 8 + %bc0 = bitcast i64 %load to double + %fadd = fadd double 1.000000e+00, %bc0 + %bc1 = bitcast double %fadd to i64 + store atomic i64 %bc1, i64* %ptr release, align 8 + ret void +} -- 2.34.1