From: Samuel Antao Date: Thu, 9 Oct 2014 20:42:56 +0000 (+0000) Subject: Fix bug in GPR to FPR moves in PPC64LE. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=f75bfbea1763b0440a264d8f479522a3c330cafd;p=oota-llvm.git Fix bug in GPR to FPR moves in PPC64LE. The current implementation of GPR->FPR register moves uses a stack slot. This mechanism writes a double word and reads a word. In big-endian the load address must be displaced by 4-bytes in order to get the right value. In little endian this is no longer required. This patch fixes the issue and adds LE regression tests to fast-isel-conversion which currently expose this problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219441 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 99aa4eaf643..11493544595 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -864,7 +864,7 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) { } // Move an i32 or i64 value in a GPR to an f64 value in an FPR. -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte @@ -897,10 +897,10 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, if (SrcVT == MVT::i32) { if (!IsSigned) { LoadOpc = PPC::LFIWZX; - Addr.Offset = 4; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; - Addr.Offset = 4; + Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4; } } @@ -984,7 +984,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // Move the floating-point value in SrcReg into an integer destination // register, and return the register (or zero if we can't handle it). -// FIXME: When direct register moves are implemented (see PowerISA 2.08), +// FIXME: When direct register moves are implemented (see PowerISA 2.07), // those should be used instead of moving via a stack slot when the // subtarget permits. unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll index 5e00675c039..71611060ed7 100644 --- a/test/CodeGen/PowerPC/fast-isel-conversion.ll +++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE ; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt @@ -9,12 +10,16 @@ define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i64 +; ELF64LE: sitofp_single_i64 ; PPC970: sitofp_single_i64 %b.addr = alloca float, align 4 %conv = sitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -26,12 +31,20 @@ entry: define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i32 +; ELF64LE: sitofp_single_i32 ; PPC970: sitofp_single_i32 %b.addr = alloca float, align 4 %conv = sitofp i32 %a to float ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwax ; ELF64: fcfids +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwax +; ELF64LE: fcfids ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -43,6 +56,7 @@ entry: define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: sitofp_single_i16 +; ELF64LE: sitofp_single_i16 ; PPC970: sitofp_single_i16 %b.addr = alloca float, align 4 %conv = sitofp i16 %a to float @@ -50,6 +64,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: extsh +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: extsh ; PPC970: std ; PPC970: lfd @@ -62,6 +80,7 @@ entry: define void @sitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: sitofp_single_i8 +; ELF64LE: sitofp_single_i8 ; PPC970: sitofp_single_i8 %b.addr = alloca float, align 4 %conv = sitofp i8 %a to float @@ -69,6 +88,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfids +; ELF64LE: extsb +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfids ; PPC970: extsb ; PPC970: std ; PPC970: lfd @@ -81,12 +104,20 @@ entry: define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i32 +; ELF64LE: sitofp_double_i32 ; PPC970: sitofp_double_i32 %b.addr = alloca double, align 8 %conv = sitofp i32 %a to double ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwax ; ELF64: fcfid +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwax +; ELF64LE: fcfid ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -97,12 +128,16 @@ entry: define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i64 +; ELF64LE: sitofp_double_i64 ; PPC970: sitofp_double_i64 %b.addr = alloca double, align 8 %conv = sitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: std ; PPC970: lfd ; PPC970: fcfid @@ -113,6 +148,7 @@ entry: define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i16 +; ELF64LE: sitofp_double_i16 ; PPC970: sitofp_double_i16 %b.addr = alloca double, align 8 %conv = sitofp i16 %a to double @@ -120,6 +156,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: extsh +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: extsh ; PPC970: std ; PPC970: lfd @@ -131,6 +171,7 @@ entry: define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: sitofp_double_i8 +; ELF64LE: sitofp_double_i8 ; PPC970: sitofp_double_i8 %b.addr = alloca double, align 8 %conv = sitofp i8 %a to double @@ -138,6 +179,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfid +; ELF64LE: extsb +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfid ; PPC970: extsb ; PPC970: std ; PPC970: lfd @@ -151,12 +196,16 @@ entry: define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i64 +; ELF64LE: uitofp_single_i64 ; PPC970: uitofp_single_i64 %b.addr = alloca float, align 4 %conv = uitofp i64 %a to float ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 ret void @@ -165,12 +214,20 @@ entry: define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i32 +; ELF64LE: uitofp_single_i32 ; PPC970: uitofp_single_i32 %b.addr = alloca float, align 4 %conv = uitofp i32 %a to float ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwzx ; ELF64: fcfidus +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwzx +; ELF64LE: fcfidus ; PPC970-NOT: lfiwzx ; PPC970-NOT: fcfidus store float %conv, float* %b.addr, align 4 @@ -180,6 +237,7 @@ entry: define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp { entry: ; ELF64: uitofp_single_i16 +; ELF64LE: uitofp_single_i16 ; PPC970: uitofp_single_i16 %b.addr = alloca float, align 4 %conv = uitofp i16 %a to float @@ -187,6 +245,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 ; PPC970: std ; PPC970: lfd @@ -199,6 +261,7 @@ entry: define void @uitofp_single_i8(i8 %a) nounwind ssp { entry: ; ELF64: uitofp_single_i8 +; ELF64LE: uitofp_single_i8 ; PPC970: uitofp_single_i8 %b.addr = alloca float, align 4 %conv = uitofp i8 %a to float @@ -206,6 +269,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidus +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidus ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 ; PPC970: std ; PPC970: lfd @@ -218,12 +285,16 @@ entry: define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i64 +; ELF64LE: uitofp_double_i64 ; PPC970: uitofp_double_i64 %b.addr = alloca double, align 8 %conv = uitofp i64 %a to double ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 ret void @@ -232,12 +303,20 @@ entry: define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i32 +; ELF64LE: uitofp_double_i32 ; PPC970: uitofp_double_i32 %b.addr = alloca double, align 8 %conv = uitofp i32 %a to double ; ELF64: std +; stack offset used to load the float: 65524 = -16 + 4 +; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524 ; ELF64: lfiwzx ; ELF64: fcfidu +; ELF64LE: std +; stack offset used to load the float: 65520 = -16 + 0 +; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520 +; ELF64LE: lfiwzx +; ELF64LE: fcfidu ; PPC970-NOT: lfiwzx ; PPC970-NOT: fcfidu store double %conv, double* %b.addr, align 8 @@ -247,6 +326,7 @@ entry: define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i16 +; ELF64LE: uitofp_double_i16 ; PPC970: uitofp_double_i16 %b.addr = alloca double, align 8 %conv = uitofp i16 %a to double @@ -254,6 +334,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31 ; PPC970: std ; PPC970: lfd @@ -265,6 +349,7 @@ entry: define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp { entry: ; ELF64: uitofp_double_i8 +; ELF64LE: uitofp_double_i8 ; PPC970: uitofp_double_i8 %b.addr = alloca double, align 8 %conv = uitofp i8 %a to double @@ -272,6 +357,10 @@ entry: ; ELF64: std ; ELF64: lfd ; ELF64: fcfidu +; ELF64LE: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56 +; ELF64LE: std +; ELF64LE: lfd +; ELF64LE: fcfidu ; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31 ; PPC970: std ; PPC970: lfd @@ -285,12 +374,16 @@ entry: define void @fptosi_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i32 +; ELF64LE: fptosi_float_i32 ; PPC970: fptosi_float_i32 %b.addr = alloca i32, align 4 %conv = fptosi float %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; ELF64LE: fctiwz +; ELF64LE: stfd +; ELF64LE: lwa ; PPC970: fctiwz ; PPC970: stfd ; PPC970: lwa @@ -301,12 +394,16 @@ entry: define void @fptosi_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptosi_float_i64 +; ELF64LE: fptosi_float_i64 ; PPC970: fptosi_float_i64 %b.addr = alloca i64, align 4 %conv = fptosi float %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctidz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970: fctidz ; PPC970: stfd ; PPC970: ld @@ -317,12 +414,16 @@ entry: define void @fptosi_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i32 +; ELF64LE: fptosi_double_i32 ; PPC970: fptosi_double_i32 %b.addr = alloca i32, align 8 %conv = fptosi double %a to i32 ; ELF64: fctiwz ; ELF64: stfd ; ELF64: lwa +; ELF64LE: fctiwz +; ELF64LE: stfd +; ELF64LE: lwa ; PPC970: fctiwz ; PPC970: stfd ; PPC970: lwa @@ -333,12 +434,16 @@ entry: define void @fptosi_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptosi_double_i64 +; ELF64LE: fptosi_double_i64 ; PPC970: fptosi_double_i64 %b.addr = alloca i64, align 8 %conv = fptosi double %a to i64 ; ELF64: fctidz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctidz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970: fctidz ; PPC970: stfd ; PPC970: ld @@ -351,12 +456,16 @@ entry: define void @fptoui_float_i32(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i32 +; ELF64LE: fptoui_float_i32 ; PPC970: fptoui_float_i32 %b.addr = alloca i32, align 4 %conv = fptoui float %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; ELF64LE: fctiwuz +; ELF64LE: stfd +; ELF64LE: lwz ; PPC970: fctidz ; PPC970: stfd ; PPC970: lwz @@ -367,12 +476,16 @@ entry: define void @fptoui_float_i64(float %a) nounwind ssp { entry: ; ELF64: fptoui_float_i64 +; ELF64LE: fptoui_float_i64 ; PPC970: fptoui_float_i64 %b.addr = alloca i64, align 4 %conv = fptoui float %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctiduz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 4 ret void @@ -381,12 +494,16 @@ entry: define void @fptoui_double_i32(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i32 +; ELF64LE: fptoui_double_i32 ; PPC970: fptoui_double_i32 %b.addr = alloca i32, align 8 %conv = fptoui double %a to i32 ; ELF64: fctiwuz ; ELF64: stfd ; ELF64: lwz +; ELF64LE: fctiwuz +; ELF64LE: stfd +; ELF64LE: lwz ; PPC970: fctidz ; PPC970: stfd ; PPC970: lwz @@ -397,12 +514,16 @@ entry: define void @fptoui_double_i64(double %a) nounwind ssp { entry: ; ELF64: fptoui_double_i64 +; ELF64LE: fptoui_double_i64 ; PPC970: fptoui_double_i64 %b.addr = alloca i64, align 8 %conv = fptoui double %a to i64 ; ELF64: fctiduz ; ELF64: stfd ; ELF64: ld +; ELF64LE: fctiduz +; ELF64LE: stfd +; ELF64LE: ld ; PPC970-NOT: fctiduz store i64 %conv, i64* %b.addr, align 8 ret void