bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
return SelectShiftedRegister(N, true, Reg, Shift);
}
+ bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
+ bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
return true;
}
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // stp x1, x2, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+// Patterns for nontemporal/no-allocate stores.
+// We have to resort to tricks to turn a single-input store into a store pair,
+// because there is no single-input nontemporal store, only STNP.
+let Predicates = [IsLE] in {
+let AddedComplexity = 15 in {
+class NTStore128Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR128:$Rt),
+ (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
+ (CPYi64 FPR128:$Rt, (i64 1)),
+ GPR64sp:$Rn, simm7s8:$offset)>;
+
+def : NTStore128Pat<v2i64>;
+def : NTStore128Pat<v4i32>;
+def : NTStore128Pat<v8i16>;
+def : NTStore128Pat<v16i8>;
+
+class NTStore64Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR64:$Rt),
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
+ (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+
+// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
+def : NTStore64Pat<v1f64>;
+def : NTStore64Pat<v1i64>;
+def : NTStore64Pat<v2i32>;
+def : NTStore64Pat<v4i16>;
+def : NTStore64Pat<v8i8>;
+
+def : Pat<(nontemporalstore GPR64:$Rt,
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+} // AddedComplexity=10
+} // Predicates = [IsLE]
+
// Tail call return handling. These are all compiler pseudo-instructions,
// so no encoding information or anything like that.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
--- /dev/null
+; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false | FileCheck %s
+
+define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i64:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #16
+; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1]
+; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d1, d[[HI1]], [x[[PTR]]]
+; CHECK-NEXT: stnp d0, d[[HI0]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i32:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i16:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v16i8:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v2i32:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i16:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i8:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v1f64:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v1i64:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64:
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x0]
+; CHECK-NEXT: ret
+ store i64 %v, i64* %p, align 1, !nontemporal !0
+ ret void
+}
+
+
+define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #16
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
+ store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #16
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
+ store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #8
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
+ store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #8
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
+ store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #8
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i64, i64* %p, i32 1
+ store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset_neg:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #8
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i64, i64* %p, i32 -1
+ store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+!0 = !{ i32 1 }
+
+attributes #0 = { nounwind }