(VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
}
+//===----------------------------------------------------------------------===//
+// VMOVHPS/PD VMOVLPS Instructions
+// All patterns was taken from SSS implementation.
+//===----------------------------------------------------------------------===//
+multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1,
+ (_.VT (bitconvert
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+}
+
+defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+
+let Predicates = [HasAVX512] in {
+ // VMOVHPS patterns
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVHPD patterns
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPS patterns
+ def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+}
+
+let mayStore = 1 in {
+def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
+ (bc_v2f64 (v4f32 VR128X:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128X:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+let Predicates = [HasAVX512] in {
+ // VMOVHPD patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
+ (iPTR 0))), addr:$dst),
+ (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
+ // VMOVLPS patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+}
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
string base_opc, InstrItinClass itin> {
- defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Predicates = [UseAVX] in
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
itin>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $dst|$dst, $src2}",
itin>;
}
}
let SchedRW = [WriteStore] in {
+let Predicates = [UseAVX] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>, VEX;
+}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// Shuffle with VMOVLPS
def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
(VMOVLPSrm VR128:$src1, addr:$src2)>;
let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
+let Predicates = [UseAVX] in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// VMOVHPS patterns
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
vmovd %xmm5, -516(%rdx)
+// CHECK: vmovlps (%rcx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
+ vmovlps (%rcx), %xmm20, %xmm7
+
+// CHECK: vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xb1,0x5c,0x00,0x12,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+
+// CHECK: vmovlps 1016(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x7f]
+ vmovlps 1016(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps 1024(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0x00,0x04,0x00,0x00]
+ vmovlps 1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1024(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x80]
+ vmovlps -1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1032(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0xf8,0xfb,0xff,0xff]
+ vmovlps -1032(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x19]
+ vmovlps %xmm27, (%rcx)
+
+// CHECK: vmovlps %xmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovlps %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vmovlps %xmm27, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x7f]
+ vmovlps %xmm27, 1016(%rdx)
+
+// CHECK: vmovlps %xmm27, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+ vmovlps %xmm27, 1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x80]
+ vmovlps %xmm27, -1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+ vmovlps %xmm27, -1032(%rdx)
+
+// CHECK: vmovlpd (%rcx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x29]
+ vmovlpd (%rcx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x21,0xcd,0x08,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1016(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x7f]
+ vmovlpd 1016(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1024(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0x00,0x04,0x00,0x00]
+ vmovlpd 1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1024(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x80]
+ vmovlpd -1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1032(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0xf8,0xfb,0xff,0xff]
+ vmovlpd -1032(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x09]
+ vmovlpd %xmm25, (%rcx)
+
+// CHECK: vmovlpd %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x13,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovlpd %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vmovlpd %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x7f]
+ vmovlpd %xmm25, 1016(%rdx)
+
+// CHECK: vmovlpd %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0x00,0x04,0x00,0x00]
+ vmovlpd %xmm25, 1024(%rdx)
+
+// CHECK: vmovlpd %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x80]
+ vmovlpd %xmm25, -1024(%rdx)
+
+// CHECK: vmovlpd %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovlpd %xmm25, -1032(%rdx)
+
+// CHECK: vmovhps (%rcx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x21]
+ vmovhps (%rcx), %xmm17, %xmm20
+
+// CHECK: vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xa1,0x74,0x00,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+
+// CHECK: vmovhps 1016(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x7f]
+ vmovhps 1016(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps 1024(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0x00,0x04,0x00,0x00]
+ vmovhps 1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1024(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x80]
+ vmovhps -1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1032(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0xf8,0xfb,0xff,0xff]
+ vmovhps -1032(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x11]
+ vmovhps %xmm18, (%rcx)
+
+// CHECK: vmovhps %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x17,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovhps %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovhps %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x7f]
+ vmovhps %xmm18, 1016(%rdx)
+
+// CHECK: vmovhps %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0x00,0x04,0x00,0x00]
+ vmovhps %xmm18, 1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x80]
+ vmovhps %xmm18, -1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0xf8,0xfb,0xff,0xff]
+ vmovhps %xmm18, -1032(%rdx)
+
+// CHECK: vmovhpd (%rcx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x19]
+ vmovhpd (%rcx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x16,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1016(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x7f]
+ vmovhpd 1016(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1024(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0x00,0x04,0x00,0x00]
+ vmovhpd 1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1024(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x80]
+ vmovhpd -1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1032(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0xf8,0xfb,0xff,0xff]
+ vmovhpd -1032(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x09]
+ vmovhpd %xmm25, (%rcx)
+
+// CHECK: vmovhpd %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x17,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovhpd %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vmovhpd %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x7f]
+ vmovhpd %xmm25, 1016(%rdx)
+
+// CHECK: vmovhpd %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0x00,0x04,0x00,0x00]
+ vmovhpd %xmm25, 1024(%rdx)
+
+// CHECK: vmovhpd %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x80]
+ vmovhpd %xmm25, -1024(%rdx)
+
+// CHECK: vmovhpd %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovhpd %xmm25, -1032(%rdx)
+