From fcb6d5f29c05c94b39b0a29e3d93b706d8434bac Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Tue, 10 Nov 2015 07:09:07 +0000 Subject: [PATCH] AVX512 : Implemented encoding and DAG lowering for VMOVHPS/PD and VMOVLPS/PD instructions. Differential Revision: http://reviews.llvm.org/D14492 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 109 +++++++++++++++++ lib/Target/X86/X86InstrSSE.td | 15 ++- test/CodeGen/X86/avx-isa-check.ll | 38 ++++++ test/CodeGen/X86/exedeps-movq.ll | 18 +++ test/MC/X86/avx512-encodings.s | 192 ++++++++++++++++++++++++++++++ 5 files changed, 367 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8cb6babd35f..1dde04d4f7e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4309,6 +4309,115 @@ let Predicates = [HasAVX512] in { (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; } +//===----------------------------------------------------------------------===// +// VMOVHPS/PD VMOVLPS Instructions +// All patterns was taken from SSS implementation. +//===----------------------------------------------------------------------===// +multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + def rm : AVX512, EVEX_4V; +} + +defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; +defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; + +let Predicates = [HasAVX512] in { + // VMOVHPS patterns + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVHPD patterns + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPS patterns + def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPD patterns + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, + (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; +} + +let mayStore = 1 in { +def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), + (bc_v2f64 (v4f32 VR128X:$src))), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128X:$src), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +} +let Predicates = [HasAVX512] in { + // VMOVHPD patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), + (iPTR 0))), addr:$dst), + (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; + // VMOVLPS patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + // VMOVLPD patterns + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; +} //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations // diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e4ff9b34345..1407be2f60c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1172,12 +1172,13 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, multiclass sse12_mov_hilo_packedopc, SDNode psnode, SDNode pdnode, string base_opc, InstrItinClass itin> { - defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V; -let Constraints = "$src1 = $dst" in - defm NAME : sse12_mov_hilo_packed_base; } @@ -1188,6 +1189,7 @@ let AddedComplexity = 20 in { } let SchedRW = [WriteStore] in { +let Predicates = [UseAVX] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1198,6 +1200,7 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; +}// UseAVX def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1210,7 +1213,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), IIC_SSE_MOV_LH>; } // SchedRW -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // Shuffle with VMOVLPS def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), (VMOVLPSrm VR128:$src1, addr:$src2)>; @@ -1297,6 +1300,7 @@ let AddedComplexity = 20 in { let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let Predicates = [UseAVX] in { def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -1308,6 +1312,7 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; +} // UseAVX def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -1321,7 +1326,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; } // SchedRW -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // VMOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), diff --git a/test/CodeGen/X86/avx-isa-check.ll b/test/CodeGen/X86/avx-isa-check.ll index 02b4f37f96a..d295ffd3048 100644 --- a/test/CodeGen/X86/avx-isa-check.ll +++ b/test/CodeGen/X86/avx-isa-check.ll @@ -344,3 +344,41 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 ret <16 x i16> %shuffle } +define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { + %a = load double, double* %ptr + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle +} + +define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { + %a = load double, double* %ptr + %v = insertelement <2 x double> undef, double %a, i32 0 + %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle +} + +define void @store_floats(<4 x float> %x, i64* %p) { + %a = fadd <4 x float> %x, %x + %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> + %c = bitcast <2 x float> %b to i64 + store i64 %c, i64* %p + ret void +} + +define void @store_double(<2 x double> %x, i64* %p) { + %a = fadd <2 x double> %x, %x + %b = extractelement <2 x double> %a, i32 0 + %c = bitcast double %b to i64 + store i64 %c, i64* %p + ret void +} + +define void @store_h_double(<2 x double> %x, i64* %p) { + %a = fadd <2 x double> %x, %x + %b = extractelement <2 x double> %a, i32 1 + %c = bitcast double %b to i64 + store i64 %c, i64* %p + ret void +} + diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll index a5873be6f27..ae147accc3a 100644 --- a/test/CodeGen/X86/exedeps-movq.ll +++ b/test/CodeGen/X86/exedeps-movq.ll @@ -66,3 +66,21 @@ define void @store_int(<4 x i32> %x, <2 x float>* %p) { ret void } +define void @store_h_double(<2 x double> %x, i64* %p) { +; SSE-LABEL: store_h_double: +; SSE: # BB#0: +; SSE-NEXT: addpd %xmm0, %xmm0 +; SSE-NEXT: movhpd %xmm0, (%rdi) +; SSE-NEXT: retq +; +; AVX-LABEL: store_h_double: +; AVX: # BB#0: +; AVX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovhpd %xmm0, (%rdi) +; AVX-NEXT: retq + %a = fadd <2 x double> %x, %x + %b = extractelement <2 x double> %a, i32 1 + %c = bitcast double %b to i64 + store i64 %c, i64* %p + ret void +} diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index dc0ee7e671b..1d7ae1c7a38 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -18297,3 +18297,195 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff] vmovd %xmm5, -516(%rdx) +// CHECK: vmovlps (%rcx), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39] + vmovlps (%rcx), %xmm20, %xmm7 + +// CHECK: vmovlps 291(%rax,%r14,8), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xb1,0x5c,0x00,0x12,0xbc,0xf0,0x23,0x01,0x00,0x00] + vmovlps 291(%rax,%r14,8), %xmm20, %xmm7 + +// CHECK: vmovlps 1016(%rdx), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x7f] + vmovlps 1016(%rdx), %xmm20, %xmm7 + +// CHECK: vmovlps 1024(%rdx), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0x00,0x04,0x00,0x00] + vmovlps 1024(%rdx), %xmm20, %xmm7 + +// CHECK: vmovlps -1024(%rdx), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x80] + vmovlps -1024(%rdx), %xmm20, %xmm7 + +// CHECK: vmovlps -1032(%rdx), %xmm20, %xmm7 +// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0xf8,0xfb,0xff,0xff] + vmovlps -1032(%rdx), %xmm20, %xmm7 + +// CHECK: vmovlps %xmm27, (%rcx) +// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x19] + vmovlps %xmm27, (%rcx) + +// CHECK: vmovlps %xmm27, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00] + vmovlps %xmm27, 291(%rax,%r14,8) + +// CHECK: vmovlps %xmm27, 1016(%rdx) +// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x7f] + vmovlps %xmm27, 1016(%rdx) + +// CHECK: vmovlps %xmm27, 1024(%rdx) +// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0x00,0x04,0x00,0x00] + vmovlps %xmm27, 1024(%rdx) + +// CHECK: vmovlps %xmm27, -1024(%rdx) +// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x80] + vmovlps %xmm27, -1024(%rdx) + +// CHECK: vmovlps %xmm27, -1032(%rdx) +// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff] + vmovlps %xmm27, -1032(%rdx) + +// CHECK: vmovlpd (%rcx), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x29] + vmovlpd (%rcx), %xmm6, %xmm29 + +// CHECK: vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x21,0xcd,0x08,0x12,0xac,0xf0,0x23,0x01,0x00,0x00] + vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29 + +// CHECK: vmovlpd 1016(%rdx), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x7f] + vmovlpd 1016(%rdx), %xmm6, %xmm29 + +// CHECK: vmovlpd 1024(%rdx), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0x00,0x04,0x00,0x00] + vmovlpd 1024(%rdx), %xmm6, %xmm29 + +// CHECK: vmovlpd -1024(%rdx), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x80] + vmovlpd -1024(%rdx), %xmm6, %xmm29 + +// CHECK: vmovlpd -1032(%rdx), %xmm6, %xmm29 +// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0xf8,0xfb,0xff,0xff] + vmovlpd -1032(%rdx), %xmm6, %xmm29 + +// CHECK: vmovlpd %xmm25, (%rcx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x09] + vmovlpd %xmm25, (%rcx) + +// CHECK: vmovlpd %xmm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x13,0x8c,0xf0,0x23,0x01,0x00,0x00] + vmovlpd %xmm25, 291(%rax,%r14,8) + +// CHECK: vmovlpd %xmm25, 1016(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x7f] + vmovlpd %xmm25, 1016(%rdx) + +// CHECK: vmovlpd %xmm25, 1024(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0x00,0x04,0x00,0x00] + vmovlpd %xmm25, 1024(%rdx) + +// CHECK: vmovlpd %xmm25, -1024(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x80] + vmovlpd %xmm25, -1024(%rdx) + +// CHECK: vmovlpd %xmm25, -1032(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0xf8,0xfb,0xff,0xff] + vmovlpd %xmm25, -1032(%rdx) + +// CHECK: vmovhps (%rcx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x21] + vmovhps (%rcx), %xmm17, %xmm20 + +// CHECK: vmovhps 291(%rax,%r14,8), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xa1,0x74,0x00,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00] + vmovhps 291(%rax,%r14,8), %xmm17, %xmm20 + +// CHECK: vmovhps 1016(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x7f] + vmovhps 1016(%rdx), %xmm17, %xmm20 + +// CHECK: vmovhps 1024(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0x00,0x04,0x00,0x00] + vmovhps 1024(%rdx), %xmm17, %xmm20 + +// CHECK: vmovhps -1024(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x80] + vmovhps -1024(%rdx), %xmm17, %xmm20 + +// CHECK: vmovhps -1032(%rdx), %xmm17, %xmm20 +// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0xf8,0xfb,0xff,0xff] + vmovhps -1032(%rdx), %xmm17, %xmm20 + +// CHECK: vmovhps %xmm18, (%rcx) +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x11] + vmovhps %xmm18, (%rcx) + +// CHECK: vmovhps %xmm18, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x17,0x94,0xf0,0x23,0x01,0x00,0x00] + vmovhps %xmm18, 291(%rax,%r14,8) + +// CHECK: vmovhps %xmm18, 1016(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x7f] + vmovhps %xmm18, 1016(%rdx) + +// CHECK: vmovhps %xmm18, 1024(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0x00,0x04,0x00,0x00] + vmovhps %xmm18, 1024(%rdx) + +// CHECK: vmovhps %xmm18, -1024(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x80] + vmovhps %xmm18, -1024(%rdx) + +// CHECK: vmovhps %xmm18, -1032(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0xf8,0xfb,0xff,0xff] + vmovhps %xmm18, -1032(%rdx) + +// CHECK: vmovhpd (%rcx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x19] + vmovhpd (%rcx), %xmm28, %xmm19 + +// CHECK: vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x16,0x9c,0xf0,0x23,0x01,0x00,0x00] + vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19 + +// CHECK: vmovhpd 1016(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x7f] + vmovhpd 1016(%rdx), %xmm28, %xmm19 + +// CHECK: vmovhpd 1024(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0x00,0x04,0x00,0x00] + vmovhpd 1024(%rdx), %xmm28, %xmm19 + +// CHECK: vmovhpd -1024(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x80] + vmovhpd -1024(%rdx), %xmm28, %xmm19 + +// CHECK: vmovhpd -1032(%rdx), %xmm28, %xmm19 +// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0xf8,0xfb,0xff,0xff] + vmovhpd -1032(%rdx), %xmm28, %xmm19 + +// CHECK: vmovhpd %xmm25, (%rcx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x09] + vmovhpd %xmm25, (%rcx) + +// CHECK: vmovhpd %xmm25, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x17,0x8c,0xf0,0x23,0x01,0x00,0x00] + vmovhpd %xmm25, 291(%rax,%r14,8) + +// CHECK: vmovhpd %xmm25, 1016(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x7f] + vmovhpd %xmm25, 1016(%rdx) + +// CHECK: vmovhpd %xmm25, 1024(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0x00,0x04,0x00,0x00] + vmovhpd %xmm25, 1024(%rdx) + +// CHECK: vmovhpd %xmm25, -1024(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x80] + vmovhpd %xmm25, -1024(%rdx) + +// CHECK: vmovhpd %xmm25, -1032(%rdx) +// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff] + vmovhpd %xmm25, -1032(%rdx) + -- 2.34.1