let Predicates = [UseAVX] in {
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
(CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
- (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+ (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>;
def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
- (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+ (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+ (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
}
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
+ (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
+let Predicates = [UseSSE41] in {
+ // If we're inserting an element from a load or a null pshuf of a load,
+ // fold the load into the insertps instruction.
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32
+ (scalar_to_vector (loadf32 addr:$src2))), (i8 0)),
+ imm:$src3)),
+ (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd
+ (loadv4f32 addr:$src2), (i8 0)), imm:$src3)),
+ (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+}
+
+let Predicates = [UseAVX] in {
+ // If we're inserting an element from a vbroadcast of a load, fold the
+ // load into the X86insertps instruction.
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
+ (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)),
+ (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
+ (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)),
+ (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
multiclass pclmul_alias<string asm, int immop> {
def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
- (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>;
def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
- (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>;
def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
- (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop),
+ 0>;
def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
- (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop),
+ 0>;
}
defm : pclmul_alias<"hqhq", 0x11>;
defm : pclmul_alias<"hqlq", 0x01>;
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
+class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT,
+ PatFrag ld_frag, SchedWrite Sched> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
+ Sched<[Sched]>, VEX {
+ let mayLoad = 1;
+}
+
// AVX2 adds register forms
class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
Intrinsic Int, SchedWrite Sched> :
[(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcast_ss, WriteLoad>;
- def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256,
- WriteFShuffleLd>, VEX_L;
+ def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+ f32mem, v4f32, loadf32, WriteLoad>;
+ def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+ f32mem, v8f32, loadf32,
+ WriteFShuffleLd>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256,
- WriteFShuffleLd>, VEX_L;
+def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+ v4f64, loadf64, WriteFShuffleLd>, VEX_L;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256,
WriteFShuffleLd>, VEX_L;
}
let Predicates = [HasAVX] in {
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDYrm addr:$src)>;
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
-
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {