if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector())
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
if (VT == MVT::v2i64)
- return false;
+ return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
if (Subtarget.hasQPX()) {
if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
}
+
+ // Single Precision Conversions (FP <-> INT)
+ def XSCVSXDSP : XX2Form<60, 312,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvsxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ def XSCVUXDSP : XX2Form<60, 296,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvuxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+
+ // Conversions between vector and scalar single precision
+ def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+ "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+ "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+
} // AddedComplexity = 400
} // HasP8Vector
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
} // HasDirectMove, HasVSX
+
+/* Direct moves of various size entities from GPR's into VSR's. Each lines
+ the value up into element 0 (both BE and LE). Namely, entities smaller than
+ a doubleword are shifted left and moved for BE. For LE, they're moved, then
+ swapped to go into the least significant element of the VSR.
+*/
+def Moves {
+ dag BE_BYTE_0 = (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 = (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 = (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+ dag BE_DWORD_0 = (MTVSRD $A);
+
+ dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+ dag LE_WORD_1 = (v2i64 (COPY_TO_REGCLASS LE_MTVSRW, VSRC));
+ dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+ dag LE_DWORD_1 = (v2i64 (COPY_TO_REGCLASS BE_DWORD_0, VSRC));
+ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+} // IsBigEndian, HasP8Vector
+
+let Predicates = [IsBigEndian, HasDirectMove] in {
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (COPY_TO_REGCLASS Moves.BE_DWORD_0, VSRC))>;
+} // IsBigEndian, HasDirectMove
+
+let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+} // IsLittleEndian, HasP8Vector
+
+let Predicates = [IsLittleEndian, HasDirectMove] in {
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (COPY_TO_REGCLASS Moves.LE_WORD_0, VSRC))>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 Moves.LE_DWORD_0)>;
+} // IsLittleEndian, HasDirectMove
+
return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
}
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVRReg(SrcMO.getReg(), MRI)) &&
+ IsVRReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
ret float %conv
; CHECK-LABEL: @_Z6testfcc
; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG01]]
+; CHECK: xscvuxdsp 1, [[MOVEREG01]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z7testfuch
; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG03]]
+; CHECK: xscvuxdsp 1, [[MOVEREG03]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z6testfss
; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG05]]
+; CHECK: xscvsxdsp 1, [[MOVEREG05]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z7testfust
; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG07]]
+; CHECK: xscvuxdsp 1, [[MOVEREG07]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z6testfii
; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG09]]
+; CHECK: xscvsxdsp 1, [[MOVEREG09]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z7testfuij
; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG11]]
+; CHECK: xscvuxdsp 1, [[MOVEREG11]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL:@_Z7testfllx
; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG13]]
+; CHECK: xscvsxdsp 1, [[MOVEREG13]]
}
; Function Attrs: nounwind
ret float %conv
; CHECK-LABEL: @_Z8testfully
; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG15]]
+; CHECK: xscvuxdsp 1, [[MOVEREG15]]
}
; Function Attrs: nounwind
--- /dev/null
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
+
+; The build[csilf] functions simply test the scalar_to_vector handling with
+; direct moves. This corresponds to the "insertelement" instruction. Subsequent
+; to this, there will be a splat corresponding to the shufflevector.
+
+; Function Attrs: nounwind
+define <16 x i8> @buildc(i8 zeroext %a) {
+entry:
+ %a.addr = alloca i8, align 1
+ store i8 %a, i8* %a.addr, align 1
+ %0 = load i8, i8* %a.addr, align 1
+ %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+ %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 56
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <8 x i16> @builds(i16 zeroext %a) {
+entry:
+ %a.addr = alloca i16, align 2
+ store i16 %a, i16* %a.addr, align 2
+ %0 = load i16, i16* %a.addr, align 2
+ %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
+ %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 48
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @buildi(i32 zeroext %a) {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ %0 = load i32, i32* %a.addr, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 32
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @buildl(i64 %a) {
+entry:
+ %a.addr = alloca i64, align 8
+ store i64 %a, i64* %a.addr, align 8
+ %0 = load i64, i64* %a.addr, align 8
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+; CHECK: mtvsrd {{[0-9]+}}, 3
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <4 x float> @buildf(float %a) {
+entry:
+ %a.addr = alloca float, align 4
+ store float %a, float* %a.addr, align 4
+ %0 = load float, float* %a.addr, align 4
+ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+ %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %splat.splat
+; CHECK: xscvdpspn {{[0-9]+}}, 1
+; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
+; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
+}
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test80
-; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16
+; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]]
+; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxswapd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[V1]]
; CHECK-LE-DAG: xxswapd 35, [[V2]]
; CHECK-LE: vaddudm 2, 2, 3
; CHECK-LE: blr
ret void
; CHECK-LABEL: @intToFlt
; CHECK: lxsiwax [[REGLD2:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
+; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
}
; Function Attrs: nounwind
ret void
; CHECK-LABEL: @uIntToFlt
; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
+; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
}
; Function Attrs: nounwind
# CHECK: xscvdpsp 7, 27
0xf0 0xe0 0xdc 0x24
+# CHECK: xscvdpspn 7, 27
+0xf0 0xe0 0xdc 0x2c
+
# CHECK: xscvdpsxds 7, 27
0xf0 0xe0 0xdd 0x60
# CHECK: xscvspdp 7, 27
0xf0 0xe0 0xdd 0x24
+# CHECK: xscvspdpn 7, 27
+0xf0 0xe0 0xdd 0x2c
+
+# CHECK: xscvsxdsp 7, 27
+0xf0 0xe0 0xdc 0xe0
+
# CHECK: xscvsxddp 7, 27
0xf0 0xe0 0xdd 0xe0
+# CHECK: xscvuxdsp 7, 27
+0xf0 0xe0 0xdc 0xa0
+
# CHECK: xscvuxddp 7, 27
0xf0 0xe0 0xdd 0xa0
# CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
# CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0]
xscvdpsp 7, 27
+# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c]
+# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0]
+ xscvdpspn 7, 27
# CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
# CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0]
xscvdpsxds 7, 27
# CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
# CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0]
xscvspdp 7, 27
+# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c]
+# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0]
+ xscvspdpn 7, 27
+# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0]
+# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0]
+ xscvsxdsp 7, 27
# CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
# CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0]
xscvsxddp 7, 27
+# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0]
+# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0]
+ xscvuxdsp 7, 27
# CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
# CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0]
xscvuxddp 7, 27