bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasAVX = Subtarget->hasAVX();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
- case MVT::i32: Opc = X86::MOV32mr; break;
- case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::i32:
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+ break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ Opc = X86ScalarSSEf32 ?
+ (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
break;
case MVT::f64:
Opc = X86ScalarSSEf64 ?
- (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
case MVT::v4f32:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
- else
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ else
+ Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ } else
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
--- /dev/null
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+define void @test_nti32(i32* nocapture %ptr, i32 %X) {
+; ALL-LABEL: test_nti32:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movntil %esi, (%rdi)
+; ALL-NEXT: retq
+entry:
+ store i32 %X, i32* %ptr, align 4, !nontemporal !1
+ ret void
+}
+
+define void @test_nti64(i64* nocapture %ptr, i64 %X) {
+; ALL-LABEL: test_nti64:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movntiq %rsi, (%rdi)
+; ALL-NEXT: retq
+entry:
+ store i64 %X, i64* %ptr, align 8, !nontemporal !1
+ ret void
+}
+
+define void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) {
+; SSE2-LABEL: test_nt4xfloat:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movntps %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_nt4xfloat:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntps %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) {
+; SSE2-LABEL: test_nt2xdouble:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movntpd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_nt2xdouble:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) {
+; SSE2-LABEL: test_nt2xi64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movntdq %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: test_nt2xi64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntdq %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+!1 = !{i32 1}