LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::MOV16r0:
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
Alignment = 16;
break;
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
};
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
+ (MOVSSrr (v4f32 (V_SET0PS)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
+ (MOVSSrr (v4i32 (V_SET0PI)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}