[(set VR128:$dst,
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(loadi64 addr:$src))))))]>, XS,
Requires<[HasSSE2]>;
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
}
--- /dev/null
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep xorp
+
+define void @t1() nounwind {
+ %tmp298.i.i = load <4 x float>* null, align 16
+ %tmp304.i.i = bitcast <4 x float> %tmp298.i.i to <4 x i32>
+ %tmp305.i.i = and <4 x i32> %tmp304.i.i, < i32 -1, i32 0, i32 0, i32 0 >
+ store <4 x i32> %tmp305.i.i, <4 x i32>* null, align 16
+ unreachable
+}