def Int_CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f128mem:$src),
"cvttsd2si {$src, $dst|$dst, $src}",
[(set R32:$dst, (int_x86_sse2_cvttsd2si
- (load addr:$src)))]>;
+ (loadv2f64 addr:$src)))]>;
def CVTSD2SIrr: SDI<0x2D, MRMSrcReg, (ops R32:$dst, VR128:$src),
"cvtsd2si {$src, $dst|$dst, $src}",
def CVTSD2SIrm: SDI<0x2D, MRMSrcMem, (ops R32:$dst, f128mem:$src),
"cvtsd2si {$src, $dst|$dst, $src}",
[(set R32:$dst, (int_x86_sse2_cvtsd2si
- (load addr:$src)))]>;
+ (loadv2f64 addr:$src)))]>;
// Comparison instructions
let isTwoAddress = 1 in {
def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
"cvtdq2ps {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (load addr:$src)))]>,
+ (bc_v4i32 (loadv2i64 addr:$src))))]>,
TB, Requires<[HasSSE2]>;
// SSE2 instructions with XS prefix
def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
"cvtdq2pd {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (load addr:$src)))]>,
+ (bc_v4i32 (loadv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops VR64:$dst, VR128:$src),
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
"cvtps2dq {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (load addr:$src)))]>;
+ (loadv4f32 addr:$src)))]>;
// SSE2 packed instructions with XS prefix
def CVTTPS2DQrr : I<0x5B, MRMSrcReg, (ops VR128:$dst, VR128:$src),
"cvttps2dq {$src, $dst|$dst, $src}",
def CVTTPS2DQrm : I<0x5B, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
"cvttps2dq {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (load addr:$src)))]>,
+ (loadv4f32 addr:$src)))]>,
XS, Requires<[HasSSE2]>;
// SSE2 packed instructions with XD prefix
def CVTPD2DQrm : I<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
"cvtpd2dq {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (load addr:$src)))]>,
+ (loadv2f64 addr:$src)))]>,
XD, Requires<[HasSSE2]>;
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (ops VR128:$dst, VR128:$src),
"cvttpd2dq {$src, $dst|$dst, $src}",
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (ops VR128:$dst, f128mem:$src),
"cvttpd2dq {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (load addr:$src)))]>;
+ (loadv2f64 addr:$src)))]>;
// SSE2 instructions without OpSize prefix
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops VR128:$dst, f64mem:$src),
"cvtps2pd {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))]>,
+ (loadv4f32 addr:$src)))]>,
TB, Requires<[HasSSE2]>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, VR128:$src),
def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops VR128:$dst, f128mem:$src),
"cvtpd2ps {$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (load addr:$src)))]>;
+ (loadv2f64 addr:$src)))]>;
// Arithmetic
let isTwoAddress = 1 in {
"movdqa {$src, $dst|$dst, $src}", []>;
def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
"movdqa {$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv4i32 addr:$src))]>;
+ [(set VR128:$dst, (loadv2i64 addr:$src))]>;
def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
"movdqa {$src, $dst|$dst, $src}",
- [(store (v4i32 VR128:$src), addr:$dst)]>;
+ [(store (v2i64 VR128:$src), addr:$dst)]>;
// 128-bit Integer Arithmetic
let isTwoAddress = 1 in {
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufd {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
- (load addr:$src1), (undef),
+ (bc_v4i32 (loadv2i64 addr:$src1)),
+ (undef),
PSHUFD_shuffle_mask:$src2)))]>;
// SSE2 with ImmT == Imm8 and XS prefix.
(ops VR128:$dst, i128mem:$src1, i8imm:$src2),
"pshufhw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
- (load addr:$src1), (undef),
+ (bc_v8i16 (loadv2i64 addr:$src1)),
+ (undef),
PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>;
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
"pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
- (load addr:$src1), (undef),
+ (bc_v8i16 (loadv2i64 addr:$src1)),
+ (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpcklbw {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (loadv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpcklwd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (loadv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckldq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (loadv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpcklqdq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v2i64 (vector_shuffle VR128:$src1,
+ (loadv2i64 addr:$src2),
UNPCKL_shuffle_mask)))]>;
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckhbw {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v16i8 (vector_shuffle VR128:$src1,
+ (bc_v16i8 (loadv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckhwd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v8i16 (vector_shuffle VR128:$src1,
+ (bc_v8i16 (loadv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckhdq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v4i32 (vector_shuffle VR128:$src1,
+ (bc_v4i32 (loadv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
(ops VR128:$dst, VR128:$src1, i128mem:$src2),
"punpckhqdq {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
+ (v2i64 (vector_shuffle VR128:$src1,
+ (loadv2i64 addr:$src2),
UNPCKH_shuffle_mask)))]>;
}
def PEXTRWm : PDIi8<0xC5, MRMSrcMem,
(ops R32:$dst, i128mem:$src1, i32i8imm:$src2),
"pextrw {$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set R32:$dst, (X86pextrw (loadv8i16 addr:$src1),
+ [(set R32:$dst, (X86pextrw
+ (bc_v8i16 (loadv2i64 addr:$src1)),
(i32 imm:$src2)))]>;
let isTwoAddress = 1 in {
def : Pat<(v2i64 immAllOnesV), (v2i64 (V_SETALLONES))>, Requires<[HasSSE2]>;
def : Pat<(v4f32 immAllOnesV), (v4f32 (V_SETALLONES))>, Requires<[HasSSE1]>;
-// Load 128-bit integer vector values.
-def : Pat<(v16i8 (load addr:$src)), (MOVDQArm addr:$src)>,
- Requires<[HasSSE2]>;
-def : Pat<(v8i16 (load addr:$src)), (MOVDQArm addr:$src)>,
- Requires<[HasSSE2]>;
-def : Pat<(v4i32 (load addr:$src)), (MOVDQArm addr:$src)>,
- Requires<[HasSSE2]>;
-def : Pat<(v2i64 (load addr:$src)), (MOVDQArm addr:$src)>,
- Requires<[HasSSE2]>;
-
// Store 128-bit integer vector values.
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
// Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
// 16-bits matter.
(v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
Requires<[HasSSE1]>;
-// Special pshuf* cases: folding (bit_convert (loadv2i64 addr)).
-def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (loadv2i64 addr:$src1)), (undef),
- PSHUFD_shuffle_mask:$src2)),
- (PSHUFDmi addr:$src1, PSHUFD_shuffle_mask:$src2)>,
- Requires<[HasSSE2]>;
-def : Pat<(v8i16 (vector_shuffle (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
- PSHUFHW_shuffle_mask:$src2)),
- (PSHUFHWmi addr:$src1, PSHUFHW_shuffle_mask:$src2)>,
- Requires<[HasSSE2]>;
-def : Pat<(v8i16 (vector_shuffle (bc_v8i16 (loadv2i64 addr:$src1)), (undef),
- PSHUFLW_shuffle_mask:$src2)),
- (PSHUFLWmi addr:$src1, PSHUFHW_shuffle_mask:$src2)>,
- Requires<[HasSSE2]>;
-
-
// Special unary SHUFPSrr case.
// FIXME: when we want non two-address code, then we should use PSHUFD?
def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
PSHUFD_binary_shuffle_mask:$sm),
(v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
PSHUFD_binary_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
-def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
- PSHUFD_binary_shuffle_mask:$sm),
+def : Pat<(vector_shuffle (v4i32 VR128:$src1),
+ (bc_v4i32 (loadv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm),
(v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
PSHUFD_binary_shuffle_mask:$sm))>, Requires<[HasSSE2]>;