multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1> {
- let isCommutable = 1 in
+ bit Is2Addr = 1,
+ bit rr_hasSideEffects = 0> {
+ let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
+ def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
+ sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
sub_sd))>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+ def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+ (iPTR 0))), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
def : Pat<(X86Movlps VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1 in {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ let mayLoad = 1 in
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}", []>;
// PSRADQri doesn't exist in SSE[1-3].
- }
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ let mayLoad = 1 in
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ }
}
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ (VPSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ (VPSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
}
let Predicates = [HasAVX2] in {
def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
- (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
- (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
+ (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2),
- (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>;
+ (VPSLLDQYri VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2),
- (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>;
+ (VPSRLDQYri VR256:$src1, imm:$src2)>;
}
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ (PSLLDQri VR128:$src1, imm:$src2)>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
- (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ (PSRLDQri VR128:$src1, imm:$src2)>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
- (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
// Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
- (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
}
//===---------------------------------------------------------------------===//
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(VPCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(VPCMPEQBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
(VPCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(VPCMPEQWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
(VPCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(VPCMPEQDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
(VPCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(VPCMPGTBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
(VPCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(VPCMPGTWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
(VPCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(VPCMPGTDrm VR128:$src1, addr:$src2)>;
}
VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d,
VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
+
+ def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)),
+ (VPCMPEQBYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v32i8 (X86pcmpeqb VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)))),
+ (VPCMPEQBYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)),
+ (VPCMPEQWYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86pcmpeqw VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)))),
+ (VPCMPEQWYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)),
+ (VPCMPEQDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86pcmpeqd VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VPCMPEQDYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)),
+ (VPCMPGTBYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v32i8 (X86pcmpgtb VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)))),
+ (VPCMPGTBYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)),
+ (VPCMPGTWYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86pcmpgtw VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)))),
+ (VPCMPGTWYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)),
+ (VPCMPGTDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86pcmpgtd VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VPCMPGTDYrm VR256:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
let Predicates = [HasSSE2] in {
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpeqb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(PCMPEQBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
(PCMPEQWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpeqw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(PCMPEQWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
(PCMPEQDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpeqd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(PCMPEQDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
(PCMPGTBrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v16i8 (X86pcmpgtb VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
(PCMPGTBrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
(PCMPGTWrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v8i16 (X86pcmpgtw VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
(PCMPGTWrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
(PCMPGTDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
}
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
-let Predicates = [HasAVX],
- ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, 0>, TB, OpSize, VEX_4V;
- defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, 0>, TB, XD, VEX_4V;
- defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, 0>, TB, OpSize, VEX_4V;
-}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
- ExeDomain = SSEPackedDouble in {
+let Predicates = [HasAVX] in {
+ let ExeDomain = SSEPackedSingle in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, TB, XD, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, TB, XD, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, TB, OpSize, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, TB, OpSize, VEX_4V;
+ }
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+ let ExeDomain = SSEPackedSingle in
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
f128mem>, TB, XD;
+ let ExeDomain = SSEPackedDouble in
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
f128mem>, TB, OpSize;
}
}
let Predicates = [HasAVX] in {
- defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, 0>, VEX_4V;
- defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, 0>, VEX_4V;
- defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
- defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ X86fhsub, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ X86fhadd, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ X86fhsub, 0>, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
- defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
- defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
- defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
- defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ let ExeDomain = SSEPackedSingle in {
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+ }
}
//===---------------------------------------------------------------------===//
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
int_x86_avx2_pshuf_b>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8,
+ defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
int_x86_avx2_psign_b>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16,
+ defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
int_x86_avx2_psign_w>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32,
+ defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
int_x86_avx2_psign_d>, VEX_4V;
}
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>;
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(PSIGNDrr128 VR128:$src1, VR128:$src2)>;
}
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(VPSHUFBrm128 VR128:$src, addr:$mask)>;
- def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
- def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)),
(VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
}
+let Predicates = [HasAVX2] in {
+ def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNBrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNWrr256 VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)),
+ (VPSIGNDrr256 VR256:$src1, VR256:$src2)>;
+}
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
+ let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!if(Is2Addr,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
+ }
}
multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
+ let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
+ }
}
let Predicates = [HasAVX] in
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
OpSize;
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
addr:$dst)]>, OpSize;
}
-let Predicates = [HasAVX] in {
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
- def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, OpSize, VEX;
+let ExeDomain = SSEPackedSingle in {
+ let Predicates = [HasAVX] in {
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+ }
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
}
-defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
imm:$src3))]>, OpSize;
}
-let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let Predicates = [HasAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+let ExeDomain = SSEPackedSingle in {
+ let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+ let Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+}
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
X86MemOperand x86memop, RegisterClass RC,
PatFrag mem_frag32, PatFrag mem_frag64,
Intrinsic V4F32Int, Intrinsic V2F64Int> {
+let ExeDomain = SSEPackedSingle in {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm : SS4AIi8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
+ OpSize;
+} // ExeDomain = SSEPackedSingle
+let ExeDomain = SSEPackedDouble in {
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
[(set RC:$dst,
(V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
-}
-
-multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
- RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_AVX : Ii8<opcps, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TA, OpSize, Requires<[HasSSE41]>;
-
- // Vector intrinsic operation, reg
- def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-
- // Vector intrinsic operation, mem
- def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
+} // ExeDomain = SSEPackedDouble
}
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
string OpcodeStr,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
+let ExeDomain = GenericDomain in {
// Intrinsic operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
[(set VR128:$dst,
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
OpSize;
-}
-
-multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
- // Intrinsic operation, reg.
- def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, reg.
- def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
-
- // Intrinsic operation, mem.
- def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+} // ExeDomain = GenericDomain
}
// FP round - roundss, roundps, roundsd, roundpd
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
-
- // Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
- VEX;
- defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
- VEX;
- defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
+ "ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
+ "ptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
OpSize;
}
}
let Defs = [EFLAGS], Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedSingle in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
}
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
int_x86_avx2_pmaxu_w>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
int_x86_avx2_pmul_dq>, VEX_4V;
+
+ def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, VR256:$src2)),
+ (VPCMPEQQYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, (memop addr:$src2))),
+ (VPCMPEQQYrm VR256:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in {
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE41] in {
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ }
+ let ExeDomain = SSEPackedDouble in {
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ }
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
+ let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
+ let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
VR128, memopv16i8, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
VR128, memopv16i8, i128mem>;
}
+ let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv16i8, i128mem>;
+ let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
VR128, memopv16i8, i128mem>;
}
}
let Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
memopv16i8, int_x86_sse41_blendvpd>;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvps>;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv16i8, int_x86_sse41_pblendvb>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_pd_256>;
+} // ExeDomain = SSEPackedDouble
+let ExeDomain = SSEPackedSingle in {
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_avx_blendv_ps_256>;
+} // ExeDomain = SSEPackedSingle
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
}
+let Predicates = [HasAVX2] in {
+ def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
+ (v32i8 VR256:$src2))),
+ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+}
+
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
}
}
+let ExeDomain = SSEPackedDouble in
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+let ExeDomain = SSEPackedSingle in
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
let Predicates = [HasAVX2] in {
defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>,
VEX_4V;
+
+ def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, VR256:$src2)),
+ (VPCMPGTQYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, (memop addr:$src2))),
+ (VPCMPGTQYrm VR256:$src1, addr:$src2)>;
}
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
- (PCMPGTQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
- (PCMPGTQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasSSE42] in {
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
}
let Predicates = [HasAVX],
- Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
//===----------------------------------------------------------------------===//
// Carry-less Multiplication instructions
+let neverHasSideEffects = 1 in {
let Constraints = "$src1 = $dst" in {
def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
+let mayLoad = 1 in
def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
+let mayLoad = 1 in
def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
+}
multiclass pclmul_alias<string asm, int immop> {
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, VEX;
-class avx_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
- Intrinsic Int> :
- AVX8I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (Int VR128:$src))]>, VEX;
+// AVX2 adds register forms
+class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ Intrinsic Int> :
+ AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int VR128:$src))]>, VEX;
-def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcast_ss>;
-def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256>;
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcast_ss>;
+ def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcast_ss_256>;
+}
+let ExeDomain = SSEPackedDouble in
def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
int_x86_avx_vbroadcast_sd_256>;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256>;
-let Predicates = [HasAVX2] in {
+let ExeDomain = SSEPackedSingle in {
+ def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
+ int_x86_avx2_vbroadcast_ss_ps>;
+ def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
+ int_x86_avx2_vbroadcast_ss_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
+
+let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
int_x86_avx2_vbroadcasti128>;
-def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
- int_x86_avx2_vbroadcast_ss_ps>;
-def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
- int_x86_avx2_vbroadcast_ss_ps_256>;
-def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
-}
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
-def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDrm addr:$src)>;
-
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
+let neverHasSideEffects = 1 in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+let mayLoad = 1 in
def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f128mem:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+}
def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
+let neverHasSideEffects = 1 in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+let mayStore = 1 in
def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
(ins f128mem:$dst, VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, VEX;
+}
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256,
- PatFrag pf128, PatFrag pf256> {
+ Intrinsic IntSt, Intrinsic IntSt256> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
}
+let ExeDomain = SSEPackedSingle in
defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
int_x86_avx_maskload_ps,
int_x86_avx_maskload_ps_256,
int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256,
- memopv4f32, memopv8f32>;
+ int_x86_avx_maskstore_ps_256>;
+let ExeDomain = SSEPackedDouble in
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskload_pd,
int_x86_avx_maskload_pd_256,
int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256,
- memopv2f64, memopv4f64>;
+ int_x86_avx_maskstore_pd_256>;
//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
[(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
}
-defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
- int_x86_avx_vpermilvar_ps,
- int_x86_avx_vpermil_ps>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
- int_x86_avx_vpermilvar_ps_256,
- int_x86_avx_vpermil_ps_256>;
-defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- memopv2f64, memopv2i64,
- int_x86_avx_vpermilvar_pd,
- int_x86_avx_vpermil_pd>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4f64, memopv4i64,
- int_x86_avx_vpermilvar_pd_256,
- int_x86_avx_vpermil_pd_256>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+ defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+ defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+}
def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
+let neverHasSideEffects = 1 in {
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+let mayLoad = 1 in
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>, VEX_4V;
+}
def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
-//
+//===----------------------------------------------------------------------===//
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
let Predicates = [HasAVX, HasF16C] in {
- def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
- def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
- (ins VR256:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
- def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
- (ins VR256:$src1, i32i8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- TA, OpSize, VEX;
+ def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set RC:$dst, (Int VR128:$src))]>,
+ T8, OpSize, VEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+}
+}
+
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+let Predicates = [HasAVX, HasF16C] in {
+ def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+ TA, OpSize, VEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
+ (ins RC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ TA, OpSize, VEX;
+}
+}
+
+defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
+defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+
+//===----------------------------------------------------------------------===//
+// AVX2 Instructions
+//===----------------------------------------------------------------------===//
+
+/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ let isCommutable = 1 in
+ def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ VEX_4V;
+ def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ VEX_4V;
+}
+
+let isCommutable = 0 in {
+defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
+ VR128, memopv16i8, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
+ VR256, memopv32i8, i256mem>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPBROADCAST - Load from memory and broadcast to all elements of the
+// destination operand
+//
+multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int128, Intrinsic Int256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst,
+ (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+}
+
+defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+ int_x86_avx2_pbroadcastb_128,
+ int_x86_avx2_pbroadcastb_256>;
+defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+ int_x86_avx2_pbroadcastw_128,
+ int_x86_avx2_pbroadcastw_256>;
+defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+ int_x86_avx2_pbroadcastd_128,
+ int_x86_avx2_pbroadcastd_256>;
+defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+ int_x86_avx2_pbroadcastq_128,
+ int_x86_avx2_pbroadcastq_256>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBrm addr:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
+ (VPBROADCASTBYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDrm addr:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VPBROADCASTDYrm addr:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VPBROADCASTQYrm addr:$src)>;
+}
+
+// AVX1 broadcast patterns
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VBROADCASTSDrm addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+ (VBROADCASTSSrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// VPERM - Permute instructions
+//
+
+multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic Int> {
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
+ VEX_4V;
+}
+
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
+
+multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic Int> {
+ def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
+ def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins i256mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
+ VEX;
+}
+
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
+ VEX_W;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
+ VEX_W;
+
+//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
+def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>,
+ VEX_4V;
+def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2),
+ imm:$src3))]>,
+ VEX_4V;
+
+//===----------------------------------------------------------------------===//
+// VINSERTI128 - Insert packed integer values
+//
+def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>,
+ VEX_4V;
+def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+ "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
+ imm:$src3))]>, VEX_4V;
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTI128 - Extract packed integer values
+//
+def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ VEX;
+let neverHasSideEffects = 1, mayStore = 1 in
+def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+
+//===----------------------------------------------------------------------===//
+// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
+//
+multiclass avx2_pmovmask<string OpcodeStr,
+ Intrinsic IntLd128, Intrinsic IntLd256,
+ Intrinsic IntSt128, Intrinsic IntSt256> {
+ def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
+ def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
+ def mr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
+ int_x86_avx2_maskload_d,
+ int_x86_avx2_maskload_d_256,
+ int_x86_avx2_maskstore_d,
+ int_x86_avx2_maskstore_d_256>;
+defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
+ int_x86_avx2_maskload_q,
+ int_x86_avx2_maskload_q_256,
+ int_x86_avx2_maskstore_q,
+ int_x86_avx2_maskstore_q_256>, VEX_W;
+
+
+//===----------------------------------------------------------------------===//
+// Variable Bit Shifts
+//
+multiclass avx2_var_shift<bits<8> opc, string OpcodeStr,
+ Intrinsic Int128, Intrinsic Int256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+ VEX_4V;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>,
+ VEX_4V;
+}
+
+multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr,
+ Intrinsic Int128, Intrinsic Int256> {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V;
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ VEX_4V;
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ VEX_4V;
+}
+
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d,
+ int_x86_avx2_psllv_d_256>;
+defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q,
+ int_x86_avx2_psllv_q_256>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d,
+ int_x86_avx2_psrlv_d_256>;
+defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q,
+ int_x86_avx2_psrlv_q_256>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d,
+ int_x86_avx2_psrav_d_256>;
+
+let Predicates = [HasAVX2] in {
+ def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
+ (VPSLLVDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
+ (VPSLLVQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
+ (VPSRLVDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))),
+ (VPSRLVQrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))),
+ (VPSRAVDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
+ (VPSLLVDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
+ (VPSLLVQYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
+ (VPSRLVDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))),
+ (VPSRLVQYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))),
+ (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i32 (shl (v4i32 VR128:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSLLVDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))),
+ (VPSLLVQrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (srl (v4i32 VR128:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRLVDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))),
+ (VPSRLVQrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (sra (v4i32 VR128:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (VPSRAVDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (shl (v8i32 VR256:$src1),
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VPSLLVDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))),
+ (VPSLLVQYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (srl (v8i32 VR256:$src1),
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VPSRLVDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))),
+ (VPSRLVQYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (sra (v8i32 VR256:$src1),
+ (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VPSRAVDYrm VR256:$src1, addr:$src2)>;
}