From: Nemanja Ivanovic Date: Thu, 13 Aug 2015 17:40:44 +0000 (+0000) Subject: Scalar to vector conversions using direct moves X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=31f6eee816354985afaa63fabf61d88271627811;p=oota-llvm.git Scalar to vector conversions using direct moves This patch corresponds to review: http://reviews.llvm.org/D11471 It improves the code generated for converting a scalar to a vector value. With direct moves from GPRs to VSRs, we no longer require expensive stack operations for this. Subsequent patches will handle the reverse case and more general operations between vectors and their scalar elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244921 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6058e513fc4..503f01f0827 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -542,6 +542,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); @@ -11490,7 +11498,7 @@ bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) - return false; + return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasQPX()) { if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 20c95fe888e..187145d3ac0 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1181,6 +1181,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } + + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; + + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, []>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // AddedComplexity = 400 } // HasP8Vector @@ -1204,3 +1221,60 @@ let Predicates = [HasDirectMove, HasVSX] in { "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX + +/* Direct moves of various size entities from GPR's into VSR's. Each lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def Moves { + dag BE_BYTE_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag BE_DWORD_0 = (MTVSRD $A); + + dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_WORD_1 = (v2i64 (COPY_TO_REGCLASS LE_MTVSRW, VSRC)); + dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); + dag LE_DWORD_1 = (v2i64 (COPY_TO_REGCLASS BE_DWORD_0, VSRC)); + dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); +} + +let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; +} // IsBigEndian, HasP8Vector + +let Predicates = [IsBigEndian, HasDirectMove] in { + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 (COPY_TO_REGCLASS Moves.BE_DWORD_0, VSRC))>; +} // IsBigEndian, HasDirectMove + +let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; +} // IsLittleEndian, HasP8Vector + +let Predicates = [IsLittleEndian, HasDirectMove] in { + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 Moves.LE_DWORD_0)>; +} // IsLittleEndian, HasDirectMove + diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 5e3ae2a4471..76b681de7f4 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -77,6 +77,10 @@ namespace { return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); } + bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); + } + protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -100,7 +104,8 @@ protected: IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI)) && + IsVRReg(SrcMO.getReg(), MRI) || + IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); unsigned NewVReg = MRI.createVirtualRegister(SrcRC); diff --git a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll index 1d9b6482314..4868a18a95a 100644 --- a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll +++ b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll @@ -24,8 +24,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z6testfcc ; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG01]] +; CHECK: xscvuxdsp 1, [[MOVEREG01]] } ; Function Attrs: nounwind @@ -77,8 +76,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z7testfuch ; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG03]] +; CHECK: xscvuxdsp 1, [[MOVEREG03]] } ; Function Attrs: nounwind @@ -130,8 +128,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z6testfss ; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG05]] +; CHECK: xscvsxdsp 1, [[MOVEREG05]] } ; Function Attrs: nounwind @@ -183,8 +180,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z7testfust ; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG07]] +; CHECK: xscvuxdsp 1, [[MOVEREG07]] } ; Function Attrs: nounwind @@ -236,8 +232,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z6testfii ; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG09]] +; CHECK: xscvsxdsp 1, [[MOVEREG09]] } ; Function Attrs: nounwind @@ -289,8 +284,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z7testfuij ; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG11]] +; CHECK: xscvuxdsp 1, [[MOVEREG11]] } ; Function Attrs: nounwind @@ -342,8 +336,7 @@ entry: ret float %conv ; CHECK-LABEL:@_Z7testfllx ; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG13]] +; CHECK: xscvsxdsp 1, [[MOVEREG13]] } ; Function Attrs: nounwind @@ -395,8 +388,7 @@ entry: ret float %conv ; CHECK-LABEL: @_Z8testfully ; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG15]] +; CHECK: xscvuxdsp 1, [[MOVEREG15]] } ; Function Attrs: nounwind diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll new file mode 100644 index 00000000000..535ddf4f574 --- /dev/null +++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE + +; The build[csilf] functions simply test the scalar_to_vector handling with +; direct moves. This corresponds to the "insertelement" instruction. Subsequent +; to this, there will be a splat corresponding to the shufflevector. + +; Function Attrs: nounwind +define <16 x i8> @buildc(i8 zeroext %a) { +entry: + %a.addr = alloca i8, align 1 + store i8 %a, i8* %a.addr, align 1 + %0 = load i8, i8* %a.addr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 56 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <8 x i16> @builds(i16 zeroext %a) { +entry: + %a.addr = alloca i16, align 2 + store i16 %a, i16* %a.addr, align 2 + %0 = load i16, i16* %a.addr, align 2 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 48 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <4 x i32> @buildi(i32 zeroext %a) { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 32 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <2 x i64> @buildl(i64 %a) { +entry: + %a.addr = alloca i64, align 8 + store i64 %a, i64* %a.addr, align 8 + %0 = load i64, i64* %a.addr, align 8 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK: mtvsrd {{[0-9]+}}, 3 +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <4 x float> @buildf(float %a) { +entry: + %a.addr = alloca float, align 4 + store float %a, float* %a.addr, align 4 + %0 = load float, float* %a.addr, align 4 + %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK: xscvdpspn {{[0-9]+}}, 1 +; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1 +; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1 +} diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll index dceb2516c69..b4b1d248d1a 100644 --- a/test/CodeGen/PowerPC/vsx.ll +++ b/test/CodeGen/PowerPC/vsx.ll @@ -1226,11 +1226,11 @@ define <2 x i32> @test80(i32 %v) { ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test80 -; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16 +; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI -; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]] +; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]] ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]] -; CHECK-LE-DAG: xxswapd 34, [[V1]] +; CHECK-LE-DAG: xxspltd 34, [[V1]] ; CHECK-LE-DAG: xxswapd 35, [[V2]] ; CHECK-LE: vaddudm 2, 2, 3 ; CHECK-LE: blr diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll index 10297088596..c2cb71c5888 100644 --- a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -55,8 +55,7 @@ entry: ret void ; CHECK-LABEL: @intToFlt ; CHECK: lxsiwax [[REGLD2:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfids {{[0-9]}}, [[REGLD2]] +; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]] } ; Function Attrs: nounwind @@ -108,8 +107,7 @@ entry: ret void ; CHECK-LABEL: @uIntToFlt ; CHECK: lxsiwzx [[REGLD4:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]] +; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]] } ; Function Attrs: nounwind diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt index 6f4ba6f6b9a..37fd17b015a 100644 --- a/test/MC/Disassembler/PowerPC/vsx.txt +++ b/test/MC/Disassembler/PowerPC/vsx.txt @@ -57,6 +57,9 @@ # CHECK: xscvdpsp 7, 27 0xf0 0xe0 0xdc 0x24 +# CHECK: xscvdpspn 7, 27 +0xf0 0xe0 0xdc 0x2c + # CHECK: xscvdpsxds 7, 27 0xf0 0xe0 0xdd 0x60 @@ -72,9 +75,18 @@ # CHECK: xscvspdp 7, 27 0xf0 0xe0 0xdd 0x24 +# CHECK: xscvspdpn 7, 27 +0xf0 0xe0 0xdd 0x2c + +# CHECK: xscvsxdsp 7, 27 +0xf0 0xe0 0xdc 0xe0 + # CHECK: xscvsxddp 7, 27 0xf0 0xe0 0xdd 0xe0 +# CHECK: xscvuxdsp 7, 27 +0xf0 0xe0 0xdc 0xa0 + # CHECK: xscvuxddp 7, 27 0xf0 0xe0 0xdd 0xa0 diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s index 352fc517380..2c03659c9cd 100644 --- a/test/MC/PowerPC/vsx.s +++ b/test/MC/PowerPC/vsx.s @@ -62,6 +62,9 @@ # CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24] # CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0] xscvdpsp 7, 27 +# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c] +# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0] + xscvdpspn 7, 27 # CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60] # CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0] xscvdpsxds 7, 27 @@ -77,9 +80,18 @@ # CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24] # CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0] xscvspdp 7, 27 +# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c] +# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0] + xscvspdpn 7, 27 +# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0] +# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0] + xscvsxdsp 7, 27 # CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0] # CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0] xscvsxddp 7, 27 +# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0] +# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0] + xscvuxdsp 7, 27 # CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0] # CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0] xscvuxddp 7, 27