Operand memopr, ComplexPattern mem_cpat,
Domain d, OpndItins itins, bit Is2Addr = 1> {
let isCodeGenOnly = 1 in {
- def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], itins.rr, d>,
Sched<[itins.Sched]>;
- def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
def : Pat<(Intr (load addr:$src)),
(vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
addr:$src), VR128))>;
- def : Pat<(Intr mem_cpat:$src),
+ def : Pat<(Intr mem_cpat:$src),
(!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}
X86MemOperand x86memop, Operand vec_memop,
ComplexPattern mem_cpat,
Intrinsic Intr, SDNode OpNode, Domain d,
- OpndItins itins, Predicate target, string Suffix> {
+ OpndItins itins, string Suffix> {
let hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
let isCodeGenOnly = 1 in {
- // todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp
- //def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
- // (ins VR128:$src1, VR128:$src2),
- // !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- // []>, Sched<[itins.Sched.Folded]>;
+ def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, Sched<[itins.Sched.Folded]>;
let mayLoad = 1 in
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, vec_memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
+ []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
- let Predicates = [target] in {
+ let Predicates = [UseAVX] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
mem_cpat:$src)>;
- // todo: use r_Int form when it will be ready
- //def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int)
- // (VT (IMPLICIT_DEF)), VR128:$src)>;
+ }
+ let Predicates = [HasAVX] in {
def : Pat<(Intr VR128:$src),
- (vt (COPY_TO_REGCLASS(
- !cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),
- (ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;
+ (!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
+ VR128:$src)>;
+
def : Pat<(Intr mem_cpat:$src),
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
}
- let Predicates = [target, OptForSize] in
+ let Predicates = [UseAVX, OptForSize] in
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem, ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
+ SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem, sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, SSEPackedDouble, itins, UseAVX, "SD">,
+ OpNode, SSEPackedDouble, itins, "SD">,
XD, VEX_4V, VEX_LIG;
}
// There is no f64 version of the reciprocal approximation instructions.
+// TODO: We should add *scalar* op patterns for these just like we have for
+// the binops above. If the binop and unop patterns could all be unified
+// that would be even better.
+
+multiclass scalar_unary_math_patterns<Intrinsic Intr, string OpcPrefix,
+ SDNode Move, ValueType VT,
+ Predicate BasePredicate> {
+ let Predicates = [BasePredicate] in {
+ def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
+ (!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+ }
+
+ // With SSE 4.1, blendi is preferred to movs*, so match that too.
+ let Predicates = [UseSSE41] in {
+ def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
+ (!cast<I>(OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+ }
+
+ // Repeat for AVX versions of the instructions.
+ let Predicates = [HasAVX] in {
+ def : Pat<(VT (Move VT:$dst, (Intr VT:$src))),
+ (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+
+ def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))),
+ (!cast<I>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>;
+ }
+}
+
+defm : scalar_unary_math_patterns<int_x86_sse_rcp_ss, "RCPSS", X86Movss,
+ v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse_rsqrt_ss, "RSQRTSS", X86Movss,
+ v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse_sqrt_ss, "SQRTSS", X86Movss,
+ v4f32, UseSSE1>;
+defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
+ v2f64, UseSSE2>;
+
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
int_x86_avx2_psad_bw, SSE_PMADD, 1>;
+let Predicates = [HasAVX2] in
+ def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPSADBWYrr VR256:$src2, VR256:$src1)>;
+
+let Predicates = [HasAVX] in
+ def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (VPSADBWrr VR128:$src2, VR128:$src1)>;
+
+def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
+ (v16i8 VR128:$src2))),
+ (PSADBWrr VR128:$src2, VR128:$src1)>;
+
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
//
-let Predicates = [UseAVX] in
+let Predicates = [HasAVX] in
def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
(VMOVPQI2QImr addr:$dst, VR128:$src)>;
let Predicates = [UseSSE2] in
OpndItins SSEItins, OpndItins AVXItins,
OpndItins AVX2Itins> {
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
- let Predicates = [HasAVX] in
+ let Predicates = [HasAVX, NoVLX] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
VR128, VR128, AVXItins>, VEX;
- let Predicates = [HasAVX2] in
+ let Predicates = [HasAVX2, NoVLX] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
VR256, VR128, AVX2Itins>, VEX, VEX_L;
}
(!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
}
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
}