multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
Operand CC, SDNode OpNode, ValueType VT,
PatFrag ld_frag, string asm, string asm_alt,
- OpndItins itins> {
+ OpndItins itins, ImmLeaf immLeaf> {
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))],
itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))],
+ (ld_frag addr:$src2), immLeaf:$cc))],
itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S>,
- XS, VEX_4V, VEX_LIG;
+ SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG;
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S>, // same latency as 32 bit compare
+ SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare
XD, VEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in {
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
- XS;
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S,
+ i8immZExt3>, XS;
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSE_ALU_F64S>,
- XD;
+ SSE_ALU_F64S, i8immZExt3>, XD;
}
multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
- Intrinsic Int, string asm, OpndItins itins> {
+ Intrinsic Int, string asm, OpndItins itins,
+ ImmLeaf immLeaf> {
def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- VR128:$src, imm:$cc))],
+ VR128:$src, immLeaf:$cc))],
itins.rr>,
Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
- (load addr:$src), imm:$cc))],
+ (load addr:$src), immLeaf:$cc))],
itins.rm>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S>,
+ SSE_ALU_F32S, i8immZExt5>,
XS, VEX_4V;
defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- SSE_ALU_F32S>, // same latency as f32
+ SSE_ALU_F32S, i8immZExt5>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S>, XS;
+ SSE_ALU_F32S, i8immZExt3>, XS;
defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F64S>,
+ SSE_ALU_F64S, i8immZExt3>,
XD;
}
}
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, Intrinsic Int, string asm,
- string asm_alt, Domain d,
+ string asm_alt, Domain d, ImmLeaf immLeaf,
OpndItins itins = SSE_ALU_F32P> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+ [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), immLeaf:$cc))],
itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle, i8immZExt5>, PS, VEX_4V;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble, i8immZExt5>, PD, VEX_4V;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle, i8immZExt5>, PS, VEX_4V, VEX_L;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble, i8immZExt5>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedSingle, SSE_ALU_F32P>, PS;
+ SSEPackedSingle, i8immZExt5, SSE_ALU_F32P>, PS;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedDouble, SSE_ALU_F64P>, PD;
+ SSEPackedDouble, i8immZExt5, SSE_ALU_F64P>, PD;
}
let Predicates = [HasAVX] in {
int_x86_avx2_maskstore_q_256>, VEX_W;
def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
- (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+ (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
(VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
+ (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
+ (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
- (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+ (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
(bc_v8f32 (v8i32 immAllZerosV)))),
- (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+ (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
- (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
+ (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
(VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
+ (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask),
+ (bc_v4f32 (v4i32 immAllZerosV)))),
+ (VMASKMOVPSrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))),
+ (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)),
+ (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))),
+ (VPMASKMOVDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))),
+ (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
- (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+ (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
(VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
- (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+ (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
(v4f64 immAllZerosV))),
- (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+ (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
- (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
+ (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
(VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
+def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
+ (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
+ (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
+ (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
+ (v2f64 immAllZerosV))),
+ (VMASKMOVPDrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))),
+ (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
+ (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask),
+ (bc_v2i64 (v4i32 immAllZerosV)))),
+ (VPMASKMOVQrm VR128:$mask, addr:$ptr)>;
+
+def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))),
+ (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr),
+ VR128:$mask)>;
//===----------------------------------------------------------------------===//
// Variable Bit Shifts