SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
]>;
-def STDPrefetch : SDTypeProfile<0, 3, [ // prefetch
+def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch
SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
]>;
-def STDMemBarrier : SDTypeProfile<0, 5, [ // memory barier
+def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier
SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
SDTCisInt<0>
]>;
-def STDAtomic3 : SDTypeProfile<1, 3, [
+def SDTAtomic3 : SDTypeProfile<1, 3, [
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1>
]>;
-def STDAtomic2 : SDTypeProfile<1, 2, [
+def SDTAtomic2 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
-def prefetch : SDNode<"ISD::PREFETCH" , STDPrefetch,
+def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
+def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
[SDNPHasChain, SDNPSideEffect]>;
-def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , STDAtomic3,
+def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , STDAtomic2,
+def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2,
+def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , STDAtomic2,
+def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2,
+def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2,
+def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2,
+def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", STDAtomic2,
+def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2,
+def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2,
+def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2,
+def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2,
+def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
// ARM architectures.
def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
- // ARM v5TE+ and Thumb2 has preload instructions.
- if (Subtarget->isThumb2() ||
- (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))
- setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
DAG.getConstant(DMBOpt, MVT::i32));
}
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Flavor = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ if (Flavor != 3) {
+ if (!Subtarget->hasV7Ops())
+ return Op.getOperand(0);
+ else if (Flavor == 2 && !Subtarget->hasMPExtension())
+ return Op.getOperand(0);
+ }
+
+ if (Subtarget->isThumb())
+ // Invert the bits.
+ Flavor = ~Flavor & 0x3;
+
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(Flavor, MVT::i32));
+}
+
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
MEMBARRIER, // Memory barrier (DMB)
MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMPRELOAD : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
[SDNPHasChain]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPRELOAD,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
AssemblerPredicate;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
// Preload signals the memory system of possible future data/instruction access.
// These are for disassembly only.
-//
-// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
multiclass APreLoad<bits<2> data_read, string opc> {
- def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary,
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$addr"),
- [(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> {
+ [(ARMPreload addrmode_imm12:$addr, (i32 data_read))]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-26} = 0b111101;
let Inst{11-0} = addr{11-0}; // imm12
}
- def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary,
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$shift"),
- [(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> {
+ [(ARMPreload ldst_so_reg:$shift, (i32 data_read))]> {
bits<4> Rt;
bits<17> shift;
let Inst{31-26} = 0b111101;
}
}
-defm PLD : APreLoad<3, "pld">;
-defm PLDW : APreLoad<2, "pldw">;
-defm PLI : APreLoad<1, "pli">;
+defm PLD : APreLoad<3, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<2, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, "pli">, Requires<[IsARM,HasV7]>;
def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
"setend\t$end",
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
-//
-// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass T2Ipl<bit instr, bit write, bits<2> data_read, string opc> {
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<2> instr_write, string opc> {
- def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc,
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
"\t$addr",
- [(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> {
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
+ let Inst{24} = instr_write{1};
let Inst{23} = 1; // U = 1
let Inst{22} = 0;
- let Inst{21} = write;
+ let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
}
- def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc,
+ def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
"\t$addr",
- [(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> {
+ [(ARMPreload t2addrmode_imm8:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
+ let Inst{24} = instr_write{1};
let Inst{23} = 0; // U = 0
let Inst{22} = 0;
- let Inst{21} = write;
+ let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-8} = 0b1100;
}
- def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc,
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
"\t$addr",
- [(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> {
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
+ let Inst{24} = instr_write{1};
let Inst{23} = 0; // add = TRUE for T1
let Inst{22} = 0;
- let Inst{21} = write;
+ let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-6} = 0000000;
}
let isCodeGenOnly = 1 in
- def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc,
+ def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_Preload, opc,
"\t$addr",
[]> {
let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
+ let Inst{24} = instr_write{1};
let Inst{23} = ?; // add = (U == 1)
let Inst{22} = 0;
- let Inst{21} = write;
+ let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{19-16} = 0b1111; // Rn = 0b1111
let Inst{15-12} = 0b1111;
}
}
-defm t2PLD : T2Ipl<0, 0, 3, "pld">;
-defm t2PLDW : T2Ipl<0, 1, 2, "pldw">;
-defm t2PLI : T2Ipl<1, 0, 1, "pli">;
+defm t2PLD : T2Ipl<0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<2, "pli">, Requires<[IsThumb2,HasV7]>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
def IIC_iStore_d_ru : InstrItinClass;
def IIC_iStore_m : InstrItinClass<0>; // micro-coded
def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
def IIC_fpUNA32 : InstrItinClass;
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_LSPipe]>], [2]>,
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
InstrStage<1, [A9_AGU], 0>,
InstrStage<2, [A9_LSUnit]>], [2]>,
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
+ , HasMPExtension(false)
, FPOnlySP(false)
, AllowsUnalignedMem(false)
, stackAlignment(4)
/// over 16-bit ones.
bool Pref32BitThumb;
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool hasMPExtension() const { return HasMPExtension; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2
-; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -march=thumb -mattr=+v7a,+mp | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM
; rdar://8601536
define void @t1(i8* %ptr) nounwind {