1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ARM-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // ARMGenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMTargetMachine.h"
20 #include "ARMSubtarget.h"
21 #include "ARMConstantPoolValue.h"
22 #include "MCTargetDesc/ARMAddressingModes.h"
23 #include "llvm/CallingConv.h"
24 #include "llvm/DerivedTypes.h"
25 #include "llvm/GlobalVariable.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/IntrinsicInst.h"
28 #include "llvm/Module.h"
29 #include "llvm/Operator.h"
30 #include "llvm/CodeGen/Analysis.h"
31 #include "llvm/CodeGen/FastISel.h"
32 #include "llvm/CodeGen/FunctionLoweringInfo.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineModuleInfo.h"
35 #include "llvm/CodeGen/MachineConstantPool.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineMemOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/Support/CallSite.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/GetElementPtrTypeIterator.h"
43 #include "llvm/Target/TargetData.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetLowering.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include "llvm/Target/TargetOptions.h"
50 extern cl::opt<bool> EnableARMLongCalls;
54 // All possible address modes, plus some.
55 typedef struct Address {
68 // Innocuous defaults for our address.
70 : BaseType(RegBase), Offset(0) {
75 class ARMFastISel : public FastISel {
77 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
78 /// make the right decision when generating code for different targets.
79 const ARMSubtarget *Subtarget;
80 const TargetMachine &TM;
81 const TargetInstrInfo &TII;
82 const TargetLowering &TLI;
85 // Convenience variables to avoid some queries.
90 explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
92 TM(funcInfo.MF->getTarget()),
93 TII(*TM.getInstrInfo()),
94 TLI(*TM.getTargetLowering()) {
95 Subtarget = &TM.getSubtarget<ARMSubtarget>();
96 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
97 isThumb2 = AFI->isThumbFunction();
98 Context = &funcInfo.Fn->getContext();
101 // Code from FastISel.cpp.
102 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
103 const TargetRegisterClass *RC);
104 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
105 const TargetRegisterClass *RC,
106 unsigned Op0, bool Op0IsKill);
107 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
108 const TargetRegisterClass *RC,
109 unsigned Op0, bool Op0IsKill,
110 unsigned Op1, bool Op1IsKill);
111 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
112 const TargetRegisterClass *RC,
113 unsigned Op0, bool Op0IsKill,
114 unsigned Op1, bool Op1IsKill,
115 unsigned Op2, bool Op2IsKill);
116 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
117 const TargetRegisterClass *RC,
118 unsigned Op0, bool Op0IsKill,
120 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
121 const TargetRegisterClass *RC,
122 unsigned Op0, bool Op0IsKill,
123 const ConstantFP *FPImm);
124 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
125 const TargetRegisterClass *RC,
126 unsigned Op0, bool Op0IsKill,
127 unsigned Op1, bool Op1IsKill,
129 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
130 const TargetRegisterClass *RC,
132 virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
133 const TargetRegisterClass *RC,
134 uint64_t Imm1, uint64_t Imm2);
136 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
137 unsigned Op0, bool Op0IsKill,
140 // Backend specific FastISel code.
141 virtual bool TargetSelectInstruction(const Instruction *I);
142 virtual unsigned TargetMaterializeConstant(const Constant *C);
143 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
144 virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
147 #include "ARMGenFastISel.inc"
149 // Instruction selection routines.
151 bool SelectLoad(const Instruction *I);
152 bool SelectStore(const Instruction *I);
153 bool SelectBranch(const Instruction *I);
154 bool SelectIndirectBr(const Instruction *I);
155 bool SelectCmp(const Instruction *I);
156 bool SelectFPExt(const Instruction *I);
157 bool SelectFPTrunc(const Instruction *I);
158 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
159 bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
160 bool SelectIToFP(const Instruction *I, bool isSigned);
161 bool SelectFPToI(const Instruction *I, bool isSigned);
162 bool SelectDiv(const Instruction *I, bool isSigned);
163 bool SelectRem(const Instruction *I, bool isSigned);
164 bool SelectCall(const Instruction *I, const char *IntrMemName);
165 bool SelectIntrinsicCall(const IntrinsicInst &I);
166 bool SelectSelect(const Instruction *I);
167 bool SelectRet(const Instruction *I);
168 bool SelectTrunc(const Instruction *I);
169 bool SelectIntExt(const Instruction *I);
173 bool isTypeLegal(Type *Ty, MVT &VT);
174 bool isLoadTypeLegal(Type *Ty, MVT &VT);
175 bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
177 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
178 unsigned Alignment = 0, bool isZExt = true,
179 bool allocReg = true);
180 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
181 unsigned Alignment = 0);
182 bool ARMComputeAddress(const Value *Obj, Address &Addr);
183 void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
184 bool ARMIsMemCpySmall(uint64_t Len);
185 bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
186 unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
187 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
188 unsigned ARMMaterializeInt(const Constant *C, EVT VT);
189 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
190 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
191 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
192 unsigned ARMSelectCallOp(bool UseReg);
194 // Call handling routines.
196 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
197 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
198 SmallVectorImpl<unsigned> &ArgRegs,
199 SmallVectorImpl<MVT> &ArgVTs,
200 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
201 SmallVectorImpl<unsigned> &RegArgs,
204 unsigned getLibcallReg(const Twine &Name);
205 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
206 const Instruction *I, CallingConv::ID CC,
208 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
210 // OptionalDef handling routines.
212 bool isARMNEONPred(const MachineInstr *MI);
213 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
214 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
215 void AddLoadStoreOperands(EVT VT, Address &Addr,
216 const MachineInstrBuilder &MIB,
217 unsigned Flags, bool useAM3);
220 } // end anonymous namespace
222 #include "ARMGenCallingConv.inc"
224 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
225 // we don't care about implicit defs here, just places we'll need to add a
226 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
227 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
228 if (!MI->hasOptionalDef())
231 // Look to see if our OptionalDef is defining CPSR or CCR.
232 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
233 const MachineOperand &MO = MI->getOperand(i);
234 if (!MO.isReg() || !MO.isDef()) continue;
235 if (MO.getReg() == ARM::CPSR)
241 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
242 const MCInstrDesc &MCID = MI->getDesc();
244 // If we're a thumb2 or not NEON function we were handled via isPredicable.
245 if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
246 AFI->isThumb2Function())
249 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
250 if (MCID.OpInfo[i].isPredicate())
256 // If the machine is predicable go ahead and add the predicate operands, if
257 // it needs default CC operands add those.
258 // TODO: If we want to support thumb1 then we'll need to deal with optional
259 // CPSR defs that need to be added before the remaining operands. See s_cc_out
260 // for descriptions why.
261 const MachineInstrBuilder &
262 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
263 MachineInstr *MI = &*MIB;
265 // Do we use a predicate? or...
266 // Are we NEON in ARM mode and have a predicate operand? If so, I know
267 // we're not predicable but add it anyways.
268 if (TII.isPredicable(MI) || isARMNEONPred(MI))
271 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
272 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
274 if (DefinesOptionalPredicate(MI, &CPSR)) {
283 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
284 const TargetRegisterClass* RC) {
285 unsigned ResultReg = createResultReg(RC);
286 const MCInstrDesc &II = TII.get(MachineInstOpcode);
288 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
292 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
293 const TargetRegisterClass *RC,
294 unsigned Op0, bool Op0IsKill) {
295 unsigned ResultReg = createResultReg(RC);
296 const MCInstrDesc &II = TII.get(MachineInstOpcode);
298 if (II.getNumDefs() >= 1) {
299 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
300 .addReg(Op0, Op0IsKill * RegState::Kill));
302 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
303 .addReg(Op0, Op0IsKill * RegState::Kill));
304 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
305 TII.get(TargetOpcode::COPY), ResultReg)
306 .addReg(II.ImplicitDefs[0]));
311 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
312 const TargetRegisterClass *RC,
313 unsigned Op0, bool Op0IsKill,
314 unsigned Op1, bool Op1IsKill) {
315 unsigned ResultReg = createResultReg(RC);
316 const MCInstrDesc &II = TII.get(MachineInstOpcode);
318 if (II.getNumDefs() >= 1) {
319 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
320 .addReg(Op0, Op0IsKill * RegState::Kill)
321 .addReg(Op1, Op1IsKill * RegState::Kill));
323 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
324 .addReg(Op0, Op0IsKill * RegState::Kill)
325 .addReg(Op1, Op1IsKill * RegState::Kill));
326 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
327 TII.get(TargetOpcode::COPY), ResultReg)
328 .addReg(II.ImplicitDefs[0]));
333 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
334 const TargetRegisterClass *RC,
335 unsigned Op0, bool Op0IsKill,
336 unsigned Op1, bool Op1IsKill,
337 unsigned Op2, bool Op2IsKill) {
338 unsigned ResultReg = createResultReg(RC);
339 const MCInstrDesc &II = TII.get(MachineInstOpcode);
341 if (II.getNumDefs() >= 1) {
342 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
343 .addReg(Op0, Op0IsKill * RegState::Kill)
344 .addReg(Op1, Op1IsKill * RegState::Kill)
345 .addReg(Op2, Op2IsKill * RegState::Kill));
347 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
348 .addReg(Op0, Op0IsKill * RegState::Kill)
349 .addReg(Op1, Op1IsKill * RegState::Kill)
350 .addReg(Op2, Op2IsKill * RegState::Kill));
351 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
352 TII.get(TargetOpcode::COPY), ResultReg)
353 .addReg(II.ImplicitDefs[0]));
358 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
359 const TargetRegisterClass *RC,
360 unsigned Op0, bool Op0IsKill,
362 unsigned ResultReg = createResultReg(RC);
363 const MCInstrDesc &II = TII.get(MachineInstOpcode);
365 if (II.getNumDefs() >= 1) {
366 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
367 .addReg(Op0, Op0IsKill * RegState::Kill)
370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
371 .addReg(Op0, Op0IsKill * RegState::Kill)
373 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
374 TII.get(TargetOpcode::COPY), ResultReg)
375 .addReg(II.ImplicitDefs[0]));
380 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
381 const TargetRegisterClass *RC,
382 unsigned Op0, bool Op0IsKill,
383 const ConstantFP *FPImm) {
384 unsigned ResultReg = createResultReg(RC);
385 const MCInstrDesc &II = TII.get(MachineInstOpcode);
387 if (II.getNumDefs() >= 1) {
388 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
389 .addReg(Op0, Op0IsKill * RegState::Kill)
392 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
393 .addReg(Op0, Op0IsKill * RegState::Kill)
395 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
396 TII.get(TargetOpcode::COPY), ResultReg)
397 .addReg(II.ImplicitDefs[0]));
402 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
403 const TargetRegisterClass *RC,
404 unsigned Op0, bool Op0IsKill,
405 unsigned Op1, bool Op1IsKill,
407 unsigned ResultReg = createResultReg(RC);
408 const MCInstrDesc &II = TII.get(MachineInstOpcode);
410 if (II.getNumDefs() >= 1) {
411 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
412 .addReg(Op0, Op0IsKill * RegState::Kill)
413 .addReg(Op1, Op1IsKill * RegState::Kill)
416 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
417 .addReg(Op0, Op0IsKill * RegState::Kill)
418 .addReg(Op1, Op1IsKill * RegState::Kill)
420 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
421 TII.get(TargetOpcode::COPY), ResultReg)
422 .addReg(II.ImplicitDefs[0]));
427 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
428 const TargetRegisterClass *RC,
430 unsigned ResultReg = createResultReg(RC);
431 const MCInstrDesc &II = TII.get(MachineInstOpcode);
433 if (II.getNumDefs() >= 1) {
434 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
437 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
439 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
440 TII.get(TargetOpcode::COPY), ResultReg)
441 .addReg(II.ImplicitDefs[0]));
446 unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
447 const TargetRegisterClass *RC,
448 uint64_t Imm1, uint64_t Imm2) {
449 unsigned ResultReg = createResultReg(RC);
450 const MCInstrDesc &II = TII.get(MachineInstOpcode);
452 if (II.getNumDefs() >= 1) {
453 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
454 .addImm(Imm1).addImm(Imm2));
456 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
457 .addImm(Imm1).addImm(Imm2));
458 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
459 TII.get(TargetOpcode::COPY),
461 .addReg(II.ImplicitDefs[0]));
466 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
467 unsigned Op0, bool Op0IsKill,
469 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
470 assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
471 "Cannot yet extract from physregs");
473 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
474 DL, TII.get(TargetOpcode::COPY), ResultReg)
475 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
479 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
480 // checks from the various callers.
481 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
482 if (VT == MVT::f64) return 0;
484 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
485 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
486 TII.get(ARM::VMOVSR), MoveReg)
491 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
492 if (VT == MVT::i64) return 0;
494 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
495 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
496 TII.get(ARM::VMOVRS), MoveReg)
501 // For double width floating point we need to materialize two constants
502 // (the high and the low) into integer registers then use a move to get
503 // the combined constant into an FP reg.
504 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
505 const APFloat Val = CFP->getValueAPF();
506 bool is64bit = VT == MVT::f64;
508 // This checks to see if we can use VFP3 instructions to materialize
509 // a constant, otherwise we have to go through the constant pool.
510 if (TLI.isFPImmLegal(Val, VT)) {
514 Imm = ARM_AM::getFP64Imm(Val);
517 Imm = ARM_AM::getFP32Imm(Val);
520 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
521 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
527 // Require VFP2 for loading fp constants.
528 if (!Subtarget->hasVFP2()) return false;
530 // MachineConstantPool wants an explicit alignment.
531 unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
533 // TODO: Figure out if this is correct.
534 Align = TD.getTypeAllocSize(CFP->getType());
536 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
537 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
538 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
540 // The extra reg is for addrmode5.
541 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
543 .addConstantPoolIndex(Idx)
548 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
550 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
553 // If we can do this in a single instruction without a constant pool entry
555 const ConstantInt *CI = cast<ConstantInt>(C);
556 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
557 unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
558 unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
559 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
560 TII.get(Opc), ImmReg)
561 .addImm(CI->getZExtValue()));
565 // Use MVN to emit negative constants.
566 if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
567 unsigned Imm = (unsigned)~(CI->getSExtValue());
568 bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
569 (ARM_AM::getSOImmVal(Imm) != -1);
571 unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
572 unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
573 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
574 TII.get(Opc), ImmReg)
580 // Load from constant pool. For now 32-bit only.
584 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
586 // MachineConstantPool wants an explicit alignment.
587 unsigned Align = TD.getPrefTypeAlignment(C->getType());
589 // TODO: Figure out if this is correct.
590 Align = TD.getTypeAllocSize(C->getType());
592 unsigned Idx = MCP.getConstantPoolIndex(C, Align);
595 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
596 TII.get(ARM::t2LDRpci), DestReg)
597 .addConstantPoolIndex(Idx));
599 // The extra immediate is for addrmode2.
600 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
601 TII.get(ARM::LDRcp), DestReg)
602 .addConstantPoolIndex(Idx)
608 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
609 // For now 32-bit only.
610 if (VT != MVT::i32) return 0;
612 Reloc::Model RelocM = TM.getRelocationModel();
614 // TODO: Need more magic for ARM PIC.
615 if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
617 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
619 // Use movw+movt when possible, it avoids constant pool entries.
620 // Darwin targets don't support movt with Reloc::Static, see
621 // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support
622 // static movt relocations.
623 if (Subtarget->useMovt() &&
624 Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
628 Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
630 case Reloc::DynamicNoPIC:
631 Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
634 Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
637 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
638 DestReg).addGlobalAddress(GV));
640 // MachineConstantPool wants an explicit alignment.
641 unsigned Align = TD.getPrefTypeAlignment(GV->getType());
643 // TODO: Figure out if this is correct.
644 Align = TD.getTypeAllocSize(GV->getType());
648 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
649 (Subtarget->isThumb() ? 4 : 8);
650 unsigned Id = AFI->createPICLabelUId();
651 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
654 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
657 MachineInstrBuilder MIB;
659 unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
660 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
661 .addConstantPoolIndex(Idx);
662 if (RelocM == Reloc::PIC_)
665 // The extra immediate is for addrmode2.
666 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
668 .addConstantPoolIndex(Idx)
671 AddOptionalDefs(MIB);
674 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
675 MachineInstrBuilder MIB;
676 unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
678 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
679 TII.get(ARM::t2LDRi12), NewDestReg)
683 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12),
687 DestReg = NewDestReg;
688 AddOptionalDefs(MIB);
694 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
695 EVT VT = TLI.getValueType(C->getType(), true);
697 // Only handle simple types.
698 if (!VT.isSimple()) return 0;
700 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
701 return ARMMaterializeFP(CFP, VT);
702 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
703 return ARMMaterializeGV(GV, VT);
704 else if (isa<ConstantInt>(C))
705 return ARMMaterializeInt(C, VT);
710 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
712 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
713 // Don't handle dynamic allocas.
714 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
717 if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
719 DenseMap<const AllocaInst*, int>::iterator SI =
720 FuncInfo.StaticAllocaMap.find(AI);
722 // This will get lowered later into the correct offsets and registers
723 // via rewriteXFrameIndex.
724 if (SI != FuncInfo.StaticAllocaMap.end()) {
725 const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
726 unsigned ResultReg = createResultReg(RC);
727 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
728 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
729 TII.get(Opc), ResultReg)
730 .addFrameIndex(SI->second)
738 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
739 EVT evt = TLI.getValueType(Ty, true);
741 // Only handle simple types.
742 if (evt == MVT::Other || !evt.isSimple()) return false;
743 VT = evt.getSimpleVT();
745 // Handle all legal types, i.e. a register that will directly hold this
747 return TLI.isTypeLegal(VT);
750 bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
751 if (isTypeLegal(Ty, VT)) return true;
753 // If this is a type than can be sign or zero-extended to a basic operation
754 // go ahead and accept it now.
755 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
761 // Computes the address to get to an object.
762 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
763 // Some boilerplate from the X86 FastISel.
764 const User *U = NULL;
765 unsigned Opcode = Instruction::UserOp1;
766 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
767 // Don't walk into other basic blocks unless the object is an alloca from
768 // another block, otherwise it may not have a virtual register assigned.
769 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
770 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
771 Opcode = I->getOpcode();
774 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
775 Opcode = C->getOpcode();
779 if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
780 if (Ty->getAddressSpace() > 255)
781 // Fast instruction selection doesn't support the special
788 case Instruction::BitCast: {
789 // Look through bitcasts.
790 return ARMComputeAddress(U->getOperand(0), Addr);
792 case Instruction::IntToPtr: {
793 // Look past no-op inttoptrs.
794 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
795 return ARMComputeAddress(U->getOperand(0), Addr);
798 case Instruction::PtrToInt: {
799 // Look past no-op ptrtoints.
800 if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
801 return ARMComputeAddress(U->getOperand(0), Addr);
804 case Instruction::GetElementPtr: {
805 Address SavedAddr = Addr;
806 int TmpOffset = Addr.Offset;
808 // Iterate through the GEP folding the constants into offsets where
810 gep_type_iterator GTI = gep_type_begin(U);
811 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
812 i != e; ++i, ++GTI) {
813 const Value *Op = *i;
814 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
815 const StructLayout *SL = TD.getStructLayout(STy);
816 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
817 TmpOffset += SL->getElementOffset(Idx);
819 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
821 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
822 // Constant-offset addressing.
823 TmpOffset += CI->getSExtValue() * S;
826 if (isa<AddOperator>(Op) &&
827 (!isa<Instruction>(Op) ||
828 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
830 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
831 // An add (in the same block) with a constant operand. Fold the
834 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
835 TmpOffset += CI->getSExtValue() * S;
836 // Iterate on the other operand.
837 Op = cast<AddOperator>(Op)->getOperand(0);
841 goto unsupported_gep;
846 // Try to grab the base operand now.
847 Addr.Offset = TmpOffset;
848 if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
850 // We failed, restore everything and try the other options.
856 case Instruction::Alloca: {
857 const AllocaInst *AI = cast<AllocaInst>(Obj);
858 DenseMap<const AllocaInst*, int>::iterator SI =
859 FuncInfo.StaticAllocaMap.find(AI);
860 if (SI != FuncInfo.StaticAllocaMap.end()) {
861 Addr.BaseType = Address::FrameIndexBase;
862 Addr.Base.FI = SI->second;
869 // Try to get this in a register if nothing else has worked.
870 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
871 return Addr.Base.Reg != 0;
874 void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
876 assert(VT.isSimple() && "Non-simple types are invalid here!");
878 bool needsLowering = false;
879 switch (VT.getSimpleVT().SimpleTy) {
880 default: llvm_unreachable("Unhandled load/store type!");
886 // Integer loads/stores handle 12-bit offsets.
887 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
888 // Handle negative offsets.
889 if (needsLowering && isThumb2)
890 needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
893 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
894 needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
899 // Floating point operands handle 8-bit offsets.
900 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
904 // If this is a stack pointer and the offset needs to be simplified then
905 // put the alloca address into a register, set the base type back to
906 // register and continue. This should almost never happen.
907 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
908 const TargetRegisterClass *RC = isThumb2 ?
909 (const TargetRegisterClass*)&ARM::tGPRRegClass :
910 (const TargetRegisterClass*)&ARM::GPRRegClass;
911 unsigned ResultReg = createResultReg(RC);
912 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
913 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
914 TII.get(Opc), ResultReg)
915 .addFrameIndex(Addr.Base.FI)
917 Addr.Base.Reg = ResultReg;
918 Addr.BaseType = Address::RegBase;
921 // Since the offset is too large for the load/store instruction
922 // get the reg+offset into a register.
924 Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
925 /*Op0IsKill*/false, Addr.Offset, MVT::i32);
930 void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
931 const MachineInstrBuilder &MIB,
932 unsigned Flags, bool useAM3) {
933 // addrmode5 output depends on the selection dag addressing dividing the
934 // offset by 4 that it then later multiplies. Do this here as well.
935 if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
936 VT.getSimpleVT().SimpleTy == MVT::f64)
939 // Frame base works a bit differently. Handle it separately.
940 if (Addr.BaseType == Address::FrameIndexBase) {
941 int FI = Addr.Base.FI;
942 int Offset = Addr.Offset;
943 MachineMemOperand *MMO =
944 FuncInfo.MF->getMachineMemOperand(
945 MachinePointerInfo::getFixedStack(FI, Offset),
947 MFI.getObjectSize(FI),
948 MFI.getObjectAlignment(FI));
949 // Now add the rest of the operands.
950 MIB.addFrameIndex(FI);
952 // ARM halfword load/stores and signed byte loads need an additional
955 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
959 MIB.addImm(Addr.Offset);
961 MIB.addMemOperand(MMO);
963 // Now add the rest of the operands.
964 MIB.addReg(Addr.Base.Reg);
966 // ARM halfword load/stores and signed byte loads need an additional
969 signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
973 MIB.addImm(Addr.Offset);
976 AddOptionalDefs(MIB);
979 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
980 unsigned Alignment, bool isZExt, bool allocReg) {
981 assert(VT.isSimple() && "Non-simple types are invalid here!");
984 bool needVMOV = false;
985 const TargetRegisterClass *RC;
986 switch (VT.getSimpleVT().SimpleTy) {
987 // This is mostly going to be Neon/vector support.
988 default: return false;
992 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
993 Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
995 Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
1004 RC = &ARM::GPRRegClass;
1008 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1009 Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
1011 Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
1013 Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
1016 RC = &ARM::GPRRegClass;
1020 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1023 Opc = ARM::t2LDRi12;
1027 RC = &ARM::GPRRegClass;
1030 if (!Subtarget->hasVFP2()) return false;
1031 // Unaligned loads need special handling. Floats require word-alignment.
1032 if (Alignment && Alignment < 4) {
1035 Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1036 RC = &ARM::GPRRegClass;
1039 RC = TLI.getRegClassFor(VT);
1043 if (!Subtarget->hasVFP2()) return false;
1044 // FIXME: Unaligned loads need special handling. Doublewords require
1046 if (Alignment && Alignment < 4)
1050 RC = TLI.getRegClassFor(VT);
1053 // Simplify this down to something we can handle.
1054 ARMSimplifyAddress(Addr, VT, useAM3);
1056 // Create the base instruction, then add the operands.
1058 ResultReg = createResultReg(RC);
1059 assert (ResultReg > 255 && "Expected an allocated virtual register.");
1060 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1061 TII.get(Opc), ResultReg);
1062 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
1064 // If we had an unaligned load of a float we've converted it to an regular
1065 // load. Now we must move from the GRP to the FP register.
1067 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1068 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1069 TII.get(ARM::VMOVSR), MoveReg)
1070 .addReg(ResultReg));
1071 ResultReg = MoveReg;
1076 bool ARMFastISel::SelectLoad(const Instruction *I) {
1077 // Atomic loads need special handling.
1078 if (cast<LoadInst>(I)->isAtomic())
1081 // Verify we have a legal type before going any further.
1083 if (!isLoadTypeLegal(I->getType(), VT))
1086 // See if we can handle this address.
1088 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
1091 if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
1093 UpdateValueMap(I, ResultReg);
1097 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
1098 unsigned Alignment) {
1100 bool useAM3 = false;
1101 switch (VT.getSimpleVT().SimpleTy) {
1102 // This is mostly going to be Neon/vector support.
1103 default: return false;
1105 unsigned Res = createResultReg(isThumb2 ?
1106 (const TargetRegisterClass*)&ARM::tGPRRegClass :
1107 (const TargetRegisterClass*)&ARM::GPRRegClass);
1108 unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1109 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1111 .addReg(SrcReg).addImm(1));
1113 } // Fallthrough here.
1116 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1117 StrOpc = ARM::t2STRBi8;
1119 StrOpc = ARM::t2STRBi12;
1121 StrOpc = ARM::STRBi12;
1126 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1127 StrOpc = ARM::t2STRHi8;
1129 StrOpc = ARM::t2STRHi12;
1137 if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
1138 StrOpc = ARM::t2STRi8;
1140 StrOpc = ARM::t2STRi12;
1142 StrOpc = ARM::STRi12;
1146 if (!Subtarget->hasVFP2()) return false;
1147 // Unaligned stores need special handling. Floats require word-alignment.
1148 if (Alignment && Alignment < 4) {
1149 unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1150 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1151 TII.get(ARM::VMOVRS), MoveReg)
1155 StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1157 StrOpc = ARM::VSTRS;
1161 if (!Subtarget->hasVFP2()) return false;
1162 // FIXME: Unaligned stores need special handling. Doublewords require
1164 if (Alignment && Alignment < 4)
1167 StrOpc = ARM::VSTRD;
1170 // Simplify this down to something we can handle.
1171 ARMSimplifyAddress(Addr, VT, useAM3);
1173 // Create the base instruction, then add the operands.
1174 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1177 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
1181 bool ARMFastISel::SelectStore(const Instruction *I) {
1182 Value *Op0 = I->getOperand(0);
1183 unsigned SrcReg = 0;
1185 // Atomic stores need special handling.
1186 if (cast<StoreInst>(I)->isAtomic())
1189 // Verify we have a legal type before going any further.
1191 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
1194 // Get the value to be stored into a register.
1195 SrcReg = getRegForValue(Op0);
1196 if (SrcReg == 0) return false;
1198 // See if we can handle this address.
1200 if (!ARMComputeAddress(I->getOperand(1), Addr))
1203 if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
1208 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1210 // Needs two compares...
1211 case CmpInst::FCMP_ONE:
1212 case CmpInst::FCMP_UEQ:
1214 // AL is our "false" for now. The other two need more compares.
1216 case CmpInst::ICMP_EQ:
1217 case CmpInst::FCMP_OEQ:
1219 case CmpInst::ICMP_SGT:
1220 case CmpInst::FCMP_OGT:
1222 case CmpInst::ICMP_SGE:
1223 case CmpInst::FCMP_OGE:
1225 case CmpInst::ICMP_UGT:
1226 case CmpInst::FCMP_UGT:
1228 case CmpInst::FCMP_OLT:
1230 case CmpInst::ICMP_ULE:
1231 case CmpInst::FCMP_OLE:
1233 case CmpInst::FCMP_ORD:
1235 case CmpInst::FCMP_UNO:
1237 case CmpInst::FCMP_UGE:
1239 case CmpInst::ICMP_SLT:
1240 case CmpInst::FCMP_ULT:
1242 case CmpInst::ICMP_SLE:
1243 case CmpInst::FCMP_ULE:
1245 case CmpInst::FCMP_UNE:
1246 case CmpInst::ICMP_NE:
1248 case CmpInst::ICMP_UGE:
1250 case CmpInst::ICMP_ULT:
1255 bool ARMFastISel::SelectBranch(const Instruction *I) {
1256 const BranchInst *BI = cast<BranchInst>(I);
1257 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1258 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1260 // Simple branch support.
1262 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1264 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1265 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1267 // Get the compare predicate.
1268 // Try to take advantage of fallthrough opportunities.
1269 CmpInst::Predicate Predicate = CI->getPredicate();
1270 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1271 std::swap(TBB, FBB);
1272 Predicate = CmpInst::getInversePredicate(Predicate);
1275 ARMCC::CondCodes ARMPred = getComparePred(Predicate);
1277 // We may not handle every CC for now.
1278 if (ARMPred == ARMCC::AL) return false;
1280 // Emit the compare.
1281 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1284 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1285 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1286 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
1287 FastEmitBranch(FBB, DL);
1288 FuncInfo.MBB->addSuccessor(TBB);
1291 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1293 if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1294 (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
1295 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1296 unsigned OpReg = getRegForValue(TI->getOperand(0));
1297 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1299 .addReg(OpReg).addImm(1));
1301 unsigned CCMode = ARMCC::NE;
1302 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1303 std::swap(TBB, FBB);
1307 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1308 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1309 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1311 FastEmitBranch(FBB, DL);
1312 FuncInfo.MBB->addSuccessor(TBB);
1315 } else if (const ConstantInt *CI =
1316 dyn_cast<ConstantInt>(BI->getCondition())) {
1317 uint64_t Imm = CI->getZExtValue();
1318 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1319 FastEmitBranch(Target, DL);
1323 unsigned CmpReg = getRegForValue(BI->getCondition());
1324 if (CmpReg == 0) return false;
1326 // We've been divorced from our compare! Our block was split, and
1327 // now our compare lives in a predecessor block. We musn't
1328 // re-compare here, as the children of the compare aren't guaranteed
1329 // live across the block boundary (we *could* check for this).
1330 // Regardless, the compare has been done in the predecessor block,
1331 // and it left a value for us in a virtual register. Ergo, we test
1332 // the one-bit value left in the virtual register.
1333 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1334 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
1335 .addReg(CmpReg).addImm(1));
1337 unsigned CCMode = ARMCC::NE;
1338 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1339 std::swap(TBB, FBB);
1343 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
1345 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
1346 FastEmitBranch(FBB, DL);
1347 FuncInfo.MBB->addSuccessor(TBB);
1351 bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1352 unsigned AddrReg = getRegForValue(I->getOperand(0));
1353 if (AddrReg == 0) return false;
1355 unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1356 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
1361 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1363 Type *Ty = Src1Value->getType();
1364 EVT SrcVT = TLI.getValueType(Ty, true);
1365 if (!SrcVT.isSimple()) return false;
1367 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
1368 if (isFloat && !Subtarget->hasVFP2())
1371 // Check to see if the 2nd operand is a constant that we can encode directly
1374 bool UseImm = false;
1375 bool isNegativeImm = false;
1376 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1377 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1378 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
1379 if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1381 const APInt &CIVal = ConstInt->getValue();
1382 Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1383 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1384 // then a cmn, because there is no way to represent 2147483648 as a
1385 // signed 32-bit int.
1386 if (Imm < 0 && Imm != (int)0x80000000) {
1387 isNegativeImm = true;
1390 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1391 (ARM_AM::getSOImmVal(Imm) != -1);
1393 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
1394 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1395 if (ConstFP->isZero() && !ConstFP->isNegative())
1401 bool needsExt = false;
1402 switch (SrcVT.getSimpleVT().SimpleTy) {
1403 default: return false;
1404 // TODO: Verify compares.
1407 CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
1411 CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
1417 // Intentional fall-through.
1421 CmpOpc = ARM::t2CMPrr;
1423 CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1426 CmpOpc = ARM::CMPrr;
1428 CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1433 unsigned SrcReg1 = getRegForValue(Src1Value);
1434 if (SrcReg1 == 0) return false;
1436 unsigned SrcReg2 = 0;
1438 SrcReg2 = getRegForValue(Src2Value);
1439 if (SrcReg2 == 0) return false;
1442 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1444 SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
1445 if (SrcReg1 == 0) return false;
1447 SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
1448 if (SrcReg2 == 0) return false;
1453 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1455 .addReg(SrcReg1).addReg(SrcReg2));
1457 MachineInstrBuilder MIB;
1458 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1461 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1464 AddOptionalDefs(MIB);
1467 // For floating point we need to move the result to a comparison register
1468 // that we can then use for branches.
1469 if (Ty->isFloatTy() || Ty->isDoubleTy())
1470 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1471 TII.get(ARM::FMSTAT)));
1475 bool ARMFastISel::SelectCmp(const Instruction *I) {
1476 const CmpInst *CI = cast<CmpInst>(I);
1478 // Get the compare predicate.
1479 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
1481 // We may not handle every CC for now.
1482 if (ARMPred == ARMCC::AL) return false;
1484 // Emit the compare.
1485 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1488 // Now set a register based on the comparison. Explicitly set the predicates
1490 unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1491 const TargetRegisterClass *RC = isThumb2 ?
1492 (const TargetRegisterClass*)&ARM::rGPRRegClass :
1493 (const TargetRegisterClass*)&ARM::GPRRegClass;
1494 unsigned DestReg = createResultReg(RC);
1495 Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
1496 unsigned ZeroReg = TargetMaterializeConstant(Zero);
1497 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
1499 .addReg(ZeroReg).addImm(1)
1500 .addImm(ARMPred).addReg(ARM::CPSR);
1502 UpdateValueMap(I, DestReg);
1506 bool ARMFastISel::SelectFPExt(const Instruction *I) {
1507 // Make sure we have VFP and that we're extending float to double.
1508 if (!Subtarget->hasVFP2()) return false;
1510 Value *V = I->getOperand(0);
1511 if (!I->getType()->isDoubleTy() ||
1512 !V->getType()->isFloatTy()) return false;
1514 unsigned Op = getRegForValue(V);
1515 if (Op == 0) return false;
1517 unsigned Result = createResultReg(&ARM::DPRRegClass);
1518 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1519 TII.get(ARM::VCVTDS), Result)
1521 UpdateValueMap(I, Result);
1525 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1526 // Make sure we have VFP and that we're truncating double to float.
1527 if (!Subtarget->hasVFP2()) return false;
1529 Value *V = I->getOperand(0);
1530 if (!(I->getType()->isFloatTy() &&
1531 V->getType()->isDoubleTy())) return false;
1533 unsigned Op = getRegForValue(V);
1534 if (Op == 0) return false;
1536 unsigned Result = createResultReg(&ARM::SPRRegClass);
1537 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1538 TII.get(ARM::VCVTSD), Result)
1540 UpdateValueMap(I, Result);
1544 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1545 // Make sure we have VFP.
1546 if (!Subtarget->hasVFP2()) return false;
1549 Type *Ty = I->getType();
1550 if (!isTypeLegal(Ty, DstVT))
1553 Value *Src = I->getOperand(0);
1554 EVT SrcVT = TLI.getValueType(Src->getType(), true);
1555 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1558 unsigned SrcReg = getRegForValue(Src);
1559 if (SrcReg == 0) return false;
1561 // Handle sign-extension.
1562 if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1563 EVT DestVT = MVT::i32;
1564 SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
1565 /*isZExt*/!isSigned);
1566 if (SrcReg == 0) return false;
1569 // The conversion routine works on fp-reg to fp-reg and the operand above
1570 // was an integer, move it to the fp registers if possible.
1571 unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
1572 if (FP == 0) return false;
1575 if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1576 else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1579 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
1580 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1583 UpdateValueMap(I, ResultReg);
1587 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1588 // Make sure we have VFP.
1589 if (!Subtarget->hasVFP2()) return false;
1592 Type *RetTy = I->getType();
1593 if (!isTypeLegal(RetTy, DstVT))
1596 unsigned Op = getRegForValue(I->getOperand(0));
1597 if (Op == 0) return false;
1600 Type *OpTy = I->getOperand(0)->getType();
1601 if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1602 else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1605 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1606 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
1607 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
1611 // This result needs to be in an integer register, but the conversion only
1612 // takes place in fp-regs.
1613 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
1614 if (IntReg == 0) return false;
1616 UpdateValueMap(I, IntReg);
1620 bool ARMFastISel::SelectSelect(const Instruction *I) {
1622 if (!isTypeLegal(I->getType(), VT))
1625 // Things need to be register sized for register moves.
1626 if (VT != MVT::i32) return false;
1627 const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
1629 unsigned CondReg = getRegForValue(I->getOperand(0));
1630 if (CondReg == 0) return false;
1631 unsigned Op1Reg = getRegForValue(I->getOperand(1));
1632 if (Op1Reg == 0) return false;
1634 // Check to see if we can use an immediate in the conditional move.
1636 bool UseImm = false;
1637 bool isNegativeImm = false;
1638 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
1639 assert (VT == MVT::i32 && "Expecting an i32.");
1640 Imm = (int)ConstInt->getValue().getZExtValue();
1642 isNegativeImm = true;
1645 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
1646 (ARM_AM::getSOImmVal(Imm) != -1);
1649 unsigned Op2Reg = 0;
1651 Op2Reg = getRegForValue(I->getOperand(2));
1652 if (Op2Reg == 0) return false;
1655 unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
1656 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
1657 .addReg(CondReg).addImm(0));
1661 MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1663 if (!isNegativeImm) {
1664 MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1666 MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1669 unsigned ResultReg = createResultReg(RC);
1671 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1672 .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
1674 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
1675 .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
1676 UpdateValueMap(I, ResultReg);
1680 bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1682 Type *Ty = I->getType();
1683 if (!isTypeLegal(Ty, VT))
1686 // If we have integer div support we should have selected this automagically.
1687 // In case we have a real miss go ahead and return false and we'll pick
1689 if (Subtarget->hasDivide()) return false;
1691 // Otherwise emit a libcall.
1692 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1694 LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1695 else if (VT == MVT::i16)
1696 LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1697 else if (VT == MVT::i32)
1698 LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1699 else if (VT == MVT::i64)
1700 LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1701 else if (VT == MVT::i128)
1702 LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1703 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1705 return ARMEmitLibcall(I, LC);
1708 bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1710 Type *Ty = I->getType();
1711 if (!isTypeLegal(Ty, VT))
1714 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1716 LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1717 else if (VT == MVT::i16)
1718 LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1719 else if (VT == MVT::i32)
1720 LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1721 else if (VT == MVT::i64)
1722 LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1723 else if (VT == MVT::i128)
1724 LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1725 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1727 return ARMEmitLibcall(I, LC);
1730 bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1731 EVT DestVT = TLI.getValueType(I->getType(), true);
1733 // We can get here in the case when we have a binary operation on a non-legal
1734 // type and the target independent selector doesn't know how to handle it.
1735 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1739 switch (ISDOpcode) {
1740 default: return false;
1742 Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1745 Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1748 Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1752 unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1753 if (SrcReg1 == 0) return false;
1755 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1756 // in the instruction, rather then materializing the value in a register.
1757 unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1758 if (SrcReg2 == 0) return false;
1760 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
1761 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1762 TII.get(Opc), ResultReg)
1763 .addReg(SrcReg1).addReg(SrcReg2));
1764 UpdateValueMap(I, ResultReg);
1768 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1769 EVT VT = TLI.getValueType(I->getType(), true);
1771 // We can get here in the case when we want to use NEON for our fp
1772 // operations, but can't figure out how to. Just use the vfp instructions
1774 // FIXME: It'd be nice to use NEON instructions.
1775 Type *Ty = I->getType();
1776 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
1777 if (isFloat && !Subtarget->hasVFP2())
1781 bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1782 switch (ISDOpcode) {
1783 default: return false;
1785 Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1788 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1791 Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1794 unsigned Op1 = getRegForValue(I->getOperand(0));
1795 if (Op1 == 0) return false;
1797 unsigned Op2 = getRegForValue(I->getOperand(1));
1798 if (Op2 == 0) return false;
1800 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1801 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1802 TII.get(Opc), ResultReg)
1803 .addReg(Op1).addReg(Op2));
1804 UpdateValueMap(I, ResultReg);
1808 // Call Handling Code
1810 // This is largely taken directly from CCAssignFnForNode - we don't support
1811 // varargs in FastISel so that part has been removed.
1812 // TODO: We may not support all of this.
1813 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1816 llvm_unreachable("Unsupported calling convention");
1817 case CallingConv::Fast:
1818 // Ignore fastcc. Silence compiler warnings.
1819 (void)RetFastCC_ARM_APCS;
1820 (void)FastCC_ARM_APCS;
1822 case CallingConv::C:
1823 // Use target triple & subtarget features to do actual dispatch.
1824 if (Subtarget->isAAPCS_ABI()) {
1825 if (Subtarget->hasVFP2() &&
1826 TM.Options.FloatABIType == FloatABI::Hard)
1827 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1829 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1831 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1832 case CallingConv::ARM_AAPCS_VFP:
1833 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1834 case CallingConv::ARM_AAPCS:
1835 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1836 case CallingConv::ARM_APCS:
1837 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1841 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1842 SmallVectorImpl<unsigned> &ArgRegs,
1843 SmallVectorImpl<MVT> &ArgVTs,
1844 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1845 SmallVectorImpl<unsigned> &RegArgs,
1847 unsigned &NumBytes) {
1848 SmallVector<CCValAssign, 16> ArgLocs;
1849 CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
1850 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1852 // Check that we can handle all of the arguments. If we can't, then bail out
1853 // now before we add code to the MBB.
1854 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1855 CCValAssign &VA = ArgLocs[i];
1856 MVT ArgVT = ArgVTs[VA.getValNo()];
1858 // We don't handle NEON/vector parameters yet.
1859 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1862 // Now copy/store arg to correct locations.
1863 if (VA.isRegLoc() && !VA.needsCustom()) {
1865 } else if (VA.needsCustom()) {
1866 // TODO: We need custom lowering for vector (v2f64) args.
1867 if (VA.getLocVT() != MVT::f64 ||
1868 // TODO: Only handle register args for now.
1869 !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1872 switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
1881 if (!Subtarget->hasVFP2())
1885 if (!Subtarget->hasVFP2())
1892 // At the point, we are able to handle the call's arguments in fast isel.
1894 // Get a count of how many bytes are to be pushed on the stack.
1895 NumBytes = CCInfo.getNextStackOffset();
1897 // Issue CALLSEQ_START
1898 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1899 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1900 TII.get(AdjStackDown))
1903 // Process the args.
1904 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1905 CCValAssign &VA = ArgLocs[i];
1906 unsigned Arg = ArgRegs[VA.getValNo()];
1907 MVT ArgVT = ArgVTs[VA.getValNo()];
1909 assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
1910 "We don't handle NEON/vector parameters yet.");
1912 // Handle arg promotion, etc.
1913 switch (VA.getLocInfo()) {
1914 case CCValAssign::Full: break;
1915 case CCValAssign::SExt: {
1916 MVT DestVT = VA.getLocVT();
1917 Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
1918 assert (Arg != 0 && "Failed to emit a sext");
1922 case CCValAssign::AExt:
1923 // Intentional fall-through. Handle AExt and ZExt.
1924 case CCValAssign::ZExt: {
1925 MVT DestVT = VA.getLocVT();
1926 Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
1927 assert (Arg != 0 && "Failed to emit a sext");
1931 case CCValAssign::BCvt: {
1932 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
1933 /*TODO: Kill=*/false);
1934 assert(BC != 0 && "Failed to emit a bitcast!");
1936 ArgVT = VA.getLocVT();
1939 default: llvm_unreachable("Unknown arg promotion!");
1942 // Now copy/store arg to correct locations.
1943 if (VA.isRegLoc() && !VA.needsCustom()) {
1944 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1947 RegArgs.push_back(VA.getLocReg());
1948 } else if (VA.needsCustom()) {
1949 // TODO: We need custom lowering for vector (v2f64) args.
1950 assert(VA.getLocVT() == MVT::f64 &&
1951 "Custom lowering for v2f64 args not available");
1953 CCValAssign &NextVA = ArgLocs[++i];
1955 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1956 "We only handle register args!");
1958 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1959 TII.get(ARM::VMOVRRD), VA.getLocReg())
1960 .addReg(NextVA.getLocReg(), RegState::Define)
1962 RegArgs.push_back(VA.getLocReg());
1963 RegArgs.push_back(NextVA.getLocReg());
1965 assert(VA.isMemLoc());
1966 // Need to store on the stack.
1968 Addr.BaseType = Address::RegBase;
1969 Addr.Base.Reg = ARM::SP;
1970 Addr.Offset = VA.getLocMemOffset();
1972 bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
1973 assert(EmitRet && "Could not emit a store for argument!");
1980 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
1981 const Instruction *I, CallingConv::ID CC,
1982 unsigned &NumBytes) {
1983 // Issue CALLSEQ_END
1984 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1985 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1986 TII.get(AdjStackUp))
1987 .addImm(NumBytes).addImm(0));
1989 // Now the return value.
1990 if (RetVT != MVT::isVoid) {
1991 SmallVector<CCValAssign, 16> RVLocs;
1992 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
1993 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1995 // Copy all of the result registers out of their specified physreg.
1996 if (RVLocs.size() == 2 && RetVT == MVT::f64) {
1997 // For this move we copy into two registers and then move into the
1998 // double fp reg we want.
1999 EVT DestVT = RVLocs[0].getValVT();
2000 const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
2001 unsigned ResultReg = createResultReg(DstRC);
2002 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2003 TII.get(ARM::VMOVDRR), ResultReg)
2004 .addReg(RVLocs[0].getLocReg())
2005 .addReg(RVLocs[1].getLocReg()));
2007 UsedRegs.push_back(RVLocs[0].getLocReg());
2008 UsedRegs.push_back(RVLocs[1].getLocReg());
2010 // Finally update the result.
2011 UpdateValueMap(I, ResultReg);
2013 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2014 EVT CopyVT = RVLocs[0].getValVT();
2016 // Special handling for extended integers.
2017 if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2020 const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
2022 unsigned ResultReg = createResultReg(DstRC);
2023 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2024 ResultReg).addReg(RVLocs[0].getLocReg());
2025 UsedRegs.push_back(RVLocs[0].getLocReg());
2027 // Finally update the result.
2028 UpdateValueMap(I, ResultReg);
2035 bool ARMFastISel::SelectRet(const Instruction *I) {
2036 const ReturnInst *Ret = cast<ReturnInst>(I);
2037 const Function &F = *I->getParent()->getParent();
2039 if (!FuncInfo.CanLowerReturn)
2045 CallingConv::ID CC = F.getCallingConv();
2046 if (Ret->getNumOperands() > 0) {
2047 SmallVector<ISD::OutputArg, 4> Outs;
2048 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
2051 // Analyze operands of the call, assigning locations to each operand.
2052 SmallVector<CCValAssign, 16> ValLocs;
2053 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
2054 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
2056 const Value *RV = Ret->getOperand(0);
2057 unsigned Reg = getRegForValue(RV);
2061 // Only handle a single return value for now.
2062 if (ValLocs.size() != 1)
2065 CCValAssign &VA = ValLocs[0];
2067 // Don't bother handling odd stuff for now.
2068 if (VA.getLocInfo() != CCValAssign::Full)
2070 // Only handle register returns for now.
2074 unsigned SrcReg = Reg + VA.getValNo();
2075 EVT RVVT = TLI.getValueType(RV->getType());
2076 EVT DestVT = VA.getValVT();
2077 // Special handling for extended integers.
2078 if (RVVT != DestVT) {
2079 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2082 assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2084 // Perform extension if flagged as either zext or sext. Otherwise, do
2086 if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2087 SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
2088 if (SrcReg == 0) return false;
2093 unsigned DstReg = VA.getLocReg();
2094 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
2095 // Avoid a cross-class copy. This is very unlikely.
2096 if (!SrcRC->contains(DstReg))
2098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2099 DstReg).addReg(SrcReg);
2101 // Mark the register as live out of the function.
2102 MRI.addLiveOut(VA.getLocReg());
2105 unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
2106 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2111 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2113 return isThumb2 ? ARM::tBLXr : ARM::BLX;
2115 return isThumb2 ? ARM::tBL : ARM::BL;
2118 unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
2119 GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false,
2120 GlobalValue::ExternalLinkage, 0, Name);
2121 return ARMMaterializeGV(GV, TLI.getValueType(GV->getType()));
2124 // A quick function that will emit a call for a named libcall in F with the
2125 // vector of passed arguments for the Instruction in I. We can assume that we
2126 // can emit a call for any libcall we can produce. This is an abridged version
2127 // of the full call infrastructure since we won't need to worry about things
2128 // like computed function pointers or strange arguments at call sites.
2129 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
2131 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2132 CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2134 // Handle *simple* calls for now.
2135 Type *RetTy = I->getType();
2137 if (RetTy->isVoidTy())
2138 RetVT = MVT::isVoid;
2139 else if (!isTypeLegal(RetTy, RetVT))
2142 // Can't handle non-double multi-reg retvals.
2143 if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2144 SmallVector<CCValAssign, 16> RVLocs;
2145 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
2146 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
2147 if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2151 // Set up the argument vectors.
2152 SmallVector<Value*, 8> Args;
2153 SmallVector<unsigned, 8> ArgRegs;
2154 SmallVector<MVT, 8> ArgVTs;
2155 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2156 Args.reserve(I->getNumOperands());
2157 ArgRegs.reserve(I->getNumOperands());
2158 ArgVTs.reserve(I->getNumOperands());
2159 ArgFlags.reserve(I->getNumOperands());
2160 for (unsigned i = 0; i < I->getNumOperands(); ++i) {
2161 Value *Op = I->getOperand(i);
2162 unsigned Arg = getRegForValue(Op);
2163 if (Arg == 0) return false;
2165 Type *ArgTy = Op->getType();
2167 if (!isTypeLegal(ArgTy, ArgVT)) return false;
2169 ISD::ArgFlagsTy Flags;
2170 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2171 Flags.setOrigAlign(OriginalAlignment);
2174 ArgRegs.push_back(Arg);
2175 ArgVTs.push_back(ArgVT);
2176 ArgFlags.push_back(Flags);
2179 // Handle the arguments now that we've gotten them.
2180 SmallVector<unsigned, 4> RegArgs;
2182 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
2185 unsigned CalleeReg = 0;
2186 if (EnableARMLongCalls) {
2187 CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
2188 if (CalleeReg == 0) return false;
2192 unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
2193 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2194 DL, TII.get(CallOpc));
2196 // Explicitly adding the predicate here.
2197 AddDefaultPred(MIB);
2198 if (EnableARMLongCalls)
2199 MIB.addReg(CalleeReg);
2201 MIB.addExternalSymbol(TLI.getLibcallName(Call));
2203 if (EnableARMLongCalls)
2204 MIB.addReg(CalleeReg);
2206 MIB.addExternalSymbol(TLI.getLibcallName(Call));
2208 // Explicitly adding the predicate here.
2209 AddDefaultPred(MIB);
2211 // Add implicit physical register uses to the call.
2212 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2213 MIB.addReg(RegArgs[i]);
2215 // Add a register mask with the call-preserved registers.
2216 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2217 MIB.addRegMask(TRI.getCallPreservedMask(CC));
2219 // Finish off the call including any return values.
2220 SmallVector<unsigned, 4> UsedRegs;
2221 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
2223 // Set all unused physreg defs as dead.
2224 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2229 bool ARMFastISel::SelectCall(const Instruction *I,
2230 const char *IntrMemName = 0) {
2231 const CallInst *CI = cast<CallInst>(I);
2232 const Value *Callee = CI->getCalledValue();
2234 // Can't handle inline asm.
2235 if (isa<InlineAsm>(Callee)) return false;
2237 // Check the calling convention.
2238 ImmutableCallSite CS(CI);
2239 CallingConv::ID CC = CS.getCallingConv();
2241 // TODO: Avoid some calling conventions?
2243 // Let SDISel handle vararg functions.
2244 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
2245 FunctionType *FTy = cast<FunctionType>(PT->getElementType());
2246 if (FTy->isVarArg())
2249 // Handle *simple* calls for now.
2250 Type *RetTy = I->getType();
2252 if (RetTy->isVoidTy())
2253 RetVT = MVT::isVoid;
2254 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
2255 RetVT != MVT::i8 && RetVT != MVT::i1)
2258 // Can't handle non-double multi-reg retvals.
2259 if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2260 RetVT != MVT::i16 && RetVT != MVT::i32) {
2261 SmallVector<CCValAssign, 16> RVLocs;
2262 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
2263 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
2264 if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2268 // Set up the argument vectors.
2269 SmallVector<Value*, 8> Args;
2270 SmallVector<unsigned, 8> ArgRegs;
2271 SmallVector<MVT, 8> ArgVTs;
2272 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2273 unsigned arg_size = CS.arg_size();
2274 Args.reserve(arg_size);
2275 ArgRegs.reserve(arg_size);
2276 ArgVTs.reserve(arg_size);
2277 ArgFlags.reserve(arg_size);
2278 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
2280 // If we're lowering a memory intrinsic instead of a regular call, skip the
2281 // last two arguments, which shouldn't be passed to the underlying function.
2282 if (IntrMemName && e-i <= 2)
2285 ISD::ArgFlagsTy Flags;
2286 unsigned AttrInd = i - CS.arg_begin() + 1;
2287 if (CS.paramHasAttr(AttrInd, Attribute::SExt))
2289 if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
2292 // FIXME: Only handle *easy* calls for now.
2293 if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
2294 CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
2295 CS.paramHasAttr(AttrInd, Attribute::Nest) ||
2296 CS.paramHasAttr(AttrInd, Attribute::ByVal))
2299 Type *ArgTy = (*i)->getType();
2301 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2305 unsigned Arg = getRegForValue(*i);
2309 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
2310 Flags.setOrigAlign(OriginalAlignment);
2313 ArgRegs.push_back(Arg);
2314 ArgVTs.push_back(ArgVT);
2315 ArgFlags.push_back(Flags);
2318 // Handle the arguments now that we've gotten them.
2319 SmallVector<unsigned, 4> RegArgs;
2321 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
2324 bool UseReg = false;
2325 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
2326 if (!GV || EnableARMLongCalls) UseReg = true;
2328 unsigned CalleeReg = 0;
2331 CalleeReg = getLibcallReg(IntrMemName);
2333 CalleeReg = getRegForValue(Callee);
2335 if (CalleeReg == 0) return false;
2339 unsigned CallOpc = ARMSelectCallOp(UseReg);
2340 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
2341 DL, TII.get(CallOpc));
2343 // Explicitly adding the predicate here.
2344 AddDefaultPred(MIB);
2346 MIB.addReg(CalleeReg);
2347 else if (!IntrMemName)
2348 MIB.addGlobalAddress(GV, 0, 0);
2350 MIB.addExternalSymbol(IntrMemName, 0);
2353 MIB.addReg(CalleeReg);
2354 else if (!IntrMemName)
2355 MIB.addGlobalAddress(GV, 0, 0);
2357 MIB.addExternalSymbol(IntrMemName, 0);
2359 // Explicitly adding the predicate here.
2360 AddDefaultPred(MIB);
2363 // Add implicit physical register uses to the call.
2364 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2365 MIB.addReg(RegArgs[i]);
2367 // Add a register mask with the call-preserved registers.
2368 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2369 MIB.addRegMask(TRI.getCallPreservedMask(CC));
2371 // Finish off the call including any return values.
2372 SmallVector<unsigned, 4> UsedRegs;
2373 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
2375 // Set all unused physreg defs as dead.
2376 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2381 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2385 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
2387 // Make sure we don't bloat code by inlining very large memcpy's.
2388 if (!ARMIsMemCpySmall(Len))
2391 // We don't care about alignment here since we just emit integer accesses.
2405 RV = ARMEmitLoad(VT, ResultReg, Src);
2406 assert (RV == true && "Should be able to handle this load.");
2407 RV = ARMEmitStore(VT, ResultReg, Dest);
2408 assert (RV == true && "Should be able to handle this store.");
2411 unsigned Size = VT.getSizeInBits()/8;
2413 Dest.Offset += Size;
2420 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2421 // FIXME: Handle more intrinsics.
2422 switch (I.getIntrinsicID()) {
2423 default: return false;
2424 case Intrinsic::frameaddress: {
2425 MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2426 MFI->setFrameAddressIsTaken(true);
2429 const TargetRegisterClass *RC;
2431 LdrOpc = ARM::t2LDRi12;
2432 RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
2434 LdrOpc = ARM::LDRi12;
2435 RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
2438 const ARMBaseRegisterInfo *RegInfo =
2439 static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
2440 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2441 unsigned SrcReg = FramePtr;
2443 // Recursively load frame address
2449 unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
2451 DestReg = createResultReg(RC);
2452 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2453 TII.get(LdrOpc), DestReg)
2454 .addReg(SrcReg).addImm(0));
2457 UpdateValueMap(&I, SrcReg);
2460 case Intrinsic::memcpy:
2461 case Intrinsic::memmove: {
2462 const MemTransferInst &MTI = cast<MemTransferInst>(I);
2463 // Don't handle volatile.
2464 if (MTI.isVolatile())
2467 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2468 // we would emit dead code because we don't currently handle memmoves.
2469 bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2470 if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
2471 // Small memcpy's are common enough that we want to do them without a call
2473 uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
2474 if (ARMIsMemCpySmall(Len)) {
2476 if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
2477 !ARMComputeAddress(MTI.getRawSource(), Src))
2479 if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
2484 if (!MTI.getLength()->getType()->isIntegerTy(32))
2487 if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2490 const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
2491 return SelectCall(&I, IntrMemName);
2493 case Intrinsic::memset: {
2494 const MemSetInst &MSI = cast<MemSetInst>(I);
2495 // Don't handle volatile.
2496 if (MSI.isVolatile())
2499 if (!MSI.getLength()->getType()->isIntegerTy(32))
2502 if (MSI.getDestAddressSpace() > 255)
2505 return SelectCall(&I, "memset");
2507 case Intrinsic::trap: {
2508 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
2514 bool ARMFastISel::SelectTrunc(const Instruction *I) {
2515 // The high bits for a type smaller than the register size are assumed to be
2517 Value *Op = I->getOperand(0);
2520 SrcVT = TLI.getValueType(Op->getType(), true);
2521 DestVT = TLI.getValueType(I->getType(), true);
2523 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2525 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2528 unsigned SrcReg = getRegForValue(Op);
2529 if (!SrcReg) return false;
2531 // Because the high bits are undefined, a truncate doesn't generate
2533 UpdateValueMap(I, SrcReg);
2537 unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
2539 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2543 bool isBoolZext = false;
2544 if (!SrcVT.isSimple()) return 0;
2545 switch (SrcVT.getSimpleVT().SimpleTy) {
2548 if (!Subtarget->hasV6Ops()) return 0;
2550 Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
2552 Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
2555 if (!Subtarget->hasV6Ops()) return 0;
2557 Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
2559 Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
2563 Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
2570 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
2571 MachineInstrBuilder MIB;
2572 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
2578 AddOptionalDefs(MIB);
2582 bool ARMFastISel::SelectIntExt(const Instruction *I) {
2583 // On ARM, in general, integer casts don't involve legal types; this code
2584 // handles promotable integers.
2585 Type *DestTy = I->getType();
2586 Value *Src = I->getOperand(0);
2587 Type *SrcTy = Src->getType();
2590 SrcVT = TLI.getValueType(SrcTy, true);
2591 DestVT = TLI.getValueType(DestTy, true);
2593 bool isZExt = isa<ZExtInst>(I);
2594 unsigned SrcReg = getRegForValue(Src);
2595 if (!SrcReg) return false;
2597 unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2598 if (ResultReg == 0) return false;
2599 UpdateValueMap(I, ResultReg);
2603 // TODO: SoftFP support.
2604 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
2606 switch (I->getOpcode()) {
2607 case Instruction::Load:
2608 return SelectLoad(I);
2609 case Instruction::Store:
2610 return SelectStore(I);
2611 case Instruction::Br:
2612 return SelectBranch(I);
2613 case Instruction::IndirectBr:
2614 return SelectIndirectBr(I);
2615 case Instruction::ICmp:
2616 case Instruction::FCmp:
2617 return SelectCmp(I);
2618 case Instruction::FPExt:
2619 return SelectFPExt(I);
2620 case Instruction::FPTrunc:
2621 return SelectFPTrunc(I);
2622 case Instruction::SIToFP:
2623 return SelectIToFP(I, /*isSigned*/ true);
2624 case Instruction::UIToFP:
2625 return SelectIToFP(I, /*isSigned*/ false);
2626 case Instruction::FPToSI:
2627 return SelectFPToI(I, /*isSigned*/ true);
2628 case Instruction::FPToUI:
2629 return SelectFPToI(I, /*isSigned*/ false);
2630 case Instruction::Add:
2631 return SelectBinaryIntOp(I, ISD::ADD);
2632 case Instruction::Or:
2633 return SelectBinaryIntOp(I, ISD::OR);
2634 case Instruction::Sub:
2635 return SelectBinaryIntOp(I, ISD::SUB);
2636 case Instruction::FAdd:
2637 return SelectBinaryFPOp(I, ISD::FADD);
2638 case Instruction::FSub:
2639 return SelectBinaryFPOp(I, ISD::FSUB);
2640 case Instruction::FMul:
2641 return SelectBinaryFPOp(I, ISD::FMUL);
2642 case Instruction::SDiv:
2643 return SelectDiv(I, /*isSigned*/ true);
2644 case Instruction::UDiv:
2645 return SelectDiv(I, /*isSigned*/ false);
2646 case Instruction::SRem:
2647 return SelectRem(I, /*isSigned*/ true);
2648 case Instruction::URem:
2649 return SelectRem(I, /*isSigned*/ false);
2650 case Instruction::Call:
2651 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2652 return SelectIntrinsicCall(*II);
2653 return SelectCall(I);
2654 case Instruction::Select:
2655 return SelectSelect(I);
2656 case Instruction::Ret:
2657 return SelectRet(I);
2658 case Instruction::Trunc:
2659 return SelectTrunc(I);
2660 case Instruction::ZExt:
2661 case Instruction::SExt:
2662 return SelectIntExt(I);
2668 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2669 /// vreg is being provided by the specified load instruction. If possible,
2670 /// try to fold the load as an operand to the instruction, returning true if
2672 bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
2673 const LoadInst *LI) {
2674 // Verify we have a legal type before going any further.
2676 if (!isLoadTypeLegal(LI->getType(), VT))
2679 // Combine load followed by zero- or sign-extend.
2680 // ldrb r1, [r0] ldrb r1, [r0]
2682 // mov r3, r2 mov r3, r1
2684 switch(MI->getOpcode()) {
2685 default: return false;
2703 // See if we can handle this address.
2705 if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
2707 unsigned ResultReg = MI->getOperand(0).getReg();
2708 if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
2710 MI->eraseFromParent();
2715 FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
2716 // Completely untested on non-iOS.
2717 const TargetMachine &TM = funcInfo.MF->getTarget();
2719 // Darwin and thumb1 only for now.
2720 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
2721 if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
2722 return new ARMFastISel(funcInfo);