From: Evan Cheng Date: Wed, 3 Nov 2010 06:34:55 +0000 (+0000) Subject: Fix preload instruction isel. Only v7 supports pli, and only v7 with mp extension... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=dfed19fe2c34c1209108afa58e8ab014ffd894e2;p=oota-llvm.git Fix preload instruction isel. Only v7 supports pli, and only v7 with mp extension supports pldw. Add subtarget attribute to denote mp extension support and legalize illegal ones to nothing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118160 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index de2f50495ca..7315e52ef0a 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -183,18 +183,18 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; -def STDPrefetch : SDTypeProfile<0, 3, [ // prefetch +def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1> ]>; -def STDMemBarrier : SDTypeProfile<0, 5, [ // memory barier +def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>, SDTCisInt<0> ]>; -def STDAtomic3 : SDTypeProfile<1, 3, [ +def SDTAtomic3 : SDTypeProfile<1, 3, [ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; -def STDAtomic2 : SDTypeProfile<1, 2, [ +def SDTAtomic2 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; @@ -374,35 +374,35 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; -def prefetch : SDNode<"ISD::PREFETCH" , STDPrefetch, +def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; -def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier, +def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier, [SDNPHasChain, SDNPSideEffect]>; -def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , STDAtomic3, +def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , STDAtomic2, +def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, +def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , STDAtomic2, +def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2, +def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2, +def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2, +def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", STDAtomic2, +def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2, +def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2, +def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2, +def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2, +def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 0ebdd75244e..eef152f2741 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -64,6 +64,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +// Multiprocessing extension. +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // ARM architectures. def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b816e66f7d3..6bd8503bb47 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -598,10 +598,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); - // ARM v5TE+ and Thumb2 has preload instructions. - if (Subtarget->isThumb2() || - (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())) - setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { @@ -777,6 +774,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; + case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -2060,6 +2059,31 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } +static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // ARM pre v5TE and Thumb1 does not have preload instructions. + if (!(Subtarget->isThumb2() || + (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) + // Just preserve the chain. + return Op.getOperand(0); + + DebugLoc dl = Op.getDebugLoc(); + unsigned Flavor = cast(Op.getOperand(3))->getZExtValue(); + if (Flavor != 3) { + if (!Subtarget->hasV7Ops()) + return Op.getOperand(0); + else if (Flavor == 2 && !Subtarget->hasMPExtension()) + return Op.getOperand(0); + } + + if (Subtarget->isThumb()) + // Invert the bits. + Flavor = ~Flavor & 0x3; + + return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(Flavor, MVT::i32)); +} + static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *FuncInfo = MF.getInfo(); @@ -3842,6 +3866,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index bd2fa8eb47e..8504b83d243 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -83,6 +83,8 @@ namespace llvm { MEMBARRIER, // Memory barrier (DMB) MEMBARRIER_MCR, // Memory barrier (MCR) + + PRELOAD, // Preload VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 71557c9864e..c98d45bf43d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,6 +62,8 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMPRELOAD : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; + def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -130,6 +132,8 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain]>; +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPRELOAD, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; @@ -159,6 +163,8 @@ def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate; +def HasMP : Predicate<"Subtarget->hasMPExtension()">, + AssemblerPredicate; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate; @@ -988,14 +994,11 @@ def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt", // Preload signals the memory system of possible future data/instruction access. // These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. multiclass APreLoad data_read, string opc> { - def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary, + def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, !strconcat(opc, "\t$addr"), - [(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> { + [(ARMPreload addrmode_imm12:$addr, (i32 data_read))]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; @@ -1009,9 +1012,9 @@ multiclass APreLoad data_read, string opc> { let Inst{11-0} = addr{11-0}; // imm12 } - def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary, + def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, !strconcat(opc, "\t$shift"), - [(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> { + [(ARMPreload ldst_so_reg:$shift, (i32 data_read))]> { bits<4> Rt; bits<17> shift; let Inst{31-26} = 0b111101; @@ -1025,9 +1028,9 @@ multiclass APreLoad data_read, string opc> { } } -defm PLD : APreLoad<3, "pld">; -defm PLDW : APreLoad<2, "pldw">; -defm PLI : APreLoad<1, "pli">; +defm PLD : APreLoad<3, "pld">, Requires<[IsARM]>; +defm PLDW : APreLoad<2, "pldw">, Requires<[IsARM,HasV7,HasMP]>; +defm PLI : APreLoad<1, "pli">, Requires<[IsARM,HasV7]>; def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, "setend\t$end", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 44724111bb3..b105bc7eed6 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1171,67 +1171,66 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass T2Ipl data_read, string opc> { +// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), +// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1). +multiclass T2Ipl instr_write, string opc> { - def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc, + def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_imm12:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 1; // U = 1 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; } - def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc, + def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_imm8:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 0; // U = 0 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-8} = 0b1100; } - def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc, + def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_so_reg:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 0; // add = TRUE for T1 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-6} = 0000000; } let isCodeGenOnly = 1 in - def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc, + def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_Preload, opc, "\t$addr", []> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = ?; // add = (U == 1) let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{19-16} = 0b1111; // Rn = 0b1111 let Inst{15-12} = 0b1111; } } -defm t2PLD : T2Ipl<0, 0, 3, "pld">; -defm t2PLDW : T2Ipl<0, 1, 2, "pldw">; -defm t2PLI : T2Ipl<1, 0, 1, "pli">; +defm t2PLD : T2Ipl<0, "pld">, Requires<[IsThumb2]>; +defm t2PLDW : T2Ipl<1, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; +defm t2PLI : T2Ipl<2, "pli">, Requires<[IsThumb2,HasV7]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index c35cadb12ca..173961875e4 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -90,6 +90,7 @@ def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; def IIC_iStore_m : InstrItinClass<0>; // micro-coded def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 94b22c9113c..fc62faa09a3 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -225,6 +225,10 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData, InstrStage<2, [A8_LSPipe]>], [2]>, + // + // Preload + InstrItinData], [2, 2]>, + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index d5ab5dcc87b..bc325b1ec44 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -402,6 +402,10 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_AGU], 0>, InstrStage<2, [A9_LSUnit]>], [2]>, + // + // Preload + InstrItinData], [1, 1]>, + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 787bc305dfb..08331724f1e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -51,6 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) , stackAlignment(4) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ca9921eba13..551b2f99ffd 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -106,6 +106,10 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// HasMPExtension - True if the subtarget supports Multiprocessing + /// extension (ARMv7 only). + bool HasMPExtension; + /// FPOnlySP - If true, the floating point unit only supports single /// precision. bool FPOnlySP; @@ -176,6 +180,7 @@ protected: bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index dbc1002e508..1e4ff30af78 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2 -; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -march=thumb -mattr=+v7a,+mp | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM ; rdar://8601536 define void @t1(i8* %ptr) nounwind {