1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ARM-specific support for the FastISel class. Some
11 // of the target-specific code is generated by tablegen in the file
12 // ARMGenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMCallingConv.h"
19 #include "ARMRegisterInfo.h"
20 #include "ARMTargetMachine.h"
21 #include "ARMSubtarget.h"
22 #include "llvm/CallingConv.h"
23 #include "llvm/DerivedTypes.h"
24 #include "llvm/GlobalVariable.h"
25 #include "llvm/Instructions.h"
26 #include "llvm/IntrinsicInst.h"
27 #include "llvm/Module.h"
28 #include "llvm/CodeGen/Analysis.h"
29 #include "llvm/CodeGen/FastISel.h"
30 #include "llvm/CodeGen/FunctionLoweringInfo.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineModuleInfo.h"
33 #include "llvm/CodeGen/MachineConstantPool.h"
34 #include "llvm/CodeGen/MachineFrameInfo.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/Support/CallSite.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/GetElementPtrTypeIterator.h"
40 #include "llvm/Target/TargetData.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetLowering.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetOptions.h"
48 EnableARMFastISel("arm-fast-isel",
49 cl::desc("Turn on experimental ARM fast-isel support"),
50 cl::init(false), cl::Hidden);
54 class ARMFastISel : public FastISel {
56 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
57 /// make the right decision when generating code for different targets.
58 const ARMSubtarget *Subtarget;
59 const TargetMachine &TM;
60 const TargetInstrInfo &TII;
61 const TargetLowering &TLI;
62 const ARMFunctionInfo *AFI;
64 // Convenience variable to avoid checking all the time.
68 explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
70 TM(funcInfo.MF->getTarget()),
71 TII(*TM.getInstrInfo()),
72 TLI(*TM.getTargetLowering()) {
73 Subtarget = &TM.getSubtarget<ARMSubtarget>();
74 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
75 isThumb = AFI->isThumbFunction();
78 // Code from FastISel.cpp.
79 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
80 const TargetRegisterClass *RC);
81 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
82 const TargetRegisterClass *RC,
83 unsigned Op0, bool Op0IsKill);
84 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
85 const TargetRegisterClass *RC,
86 unsigned Op0, bool Op0IsKill,
87 unsigned Op1, bool Op1IsKill);
88 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
89 const TargetRegisterClass *RC,
90 unsigned Op0, bool Op0IsKill,
92 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
93 const TargetRegisterClass *RC,
94 unsigned Op0, bool Op0IsKill,
95 const ConstantFP *FPImm);
96 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
97 const TargetRegisterClass *RC,
99 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
100 const TargetRegisterClass *RC,
101 unsigned Op0, bool Op0IsKill,
102 unsigned Op1, bool Op1IsKill,
104 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
105 unsigned Op0, bool Op0IsKill,
108 // Backend specific FastISel code.
109 virtual bool TargetSelectInstruction(const Instruction *I);
110 virtual unsigned TargetMaterializeConstant(const Constant *C);
112 #include "ARMGenFastISel.inc"
114 // Instruction selection routines.
116 virtual bool SelectLoad(const Instruction *I);
117 virtual bool SelectStore(const Instruction *I);
118 virtual bool SelectBranch(const Instruction *I);
119 virtual bool SelectCmp(const Instruction *I);
120 virtual bool SelectFPExt(const Instruction *I);
121 virtual bool SelectFPTrunc(const Instruction *I);
122 virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
123 virtual bool SelectSIToFP(const Instruction *I);
124 virtual bool SelectFPToSI(const Instruction *I);
125 virtual bool SelectSDiv(const Instruction *I);
129 bool isTypeLegal(const Type *Ty, EVT &VT);
130 bool isLoadTypeLegal(const Type *Ty, EVT &VT);
131 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
132 bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
133 bool ARMLoadAlloca(const Instruction *I, EVT VT);
134 bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT);
135 bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
136 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
137 unsigned ARMMaterializeInt(const Constant *C);
138 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
139 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
141 // Call handling routines.
143 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
144 bool ARMEmitLibcall(const Instruction *I, Function *F);
146 // OptionalDef handling routines.
148 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
149 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
152 } // end anonymous namespace
154 #include "ARMGenCallingConv.inc"
156 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
157 // we don't care about implicit defs here, just places we'll need to add a
158 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
159 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
160 const TargetInstrDesc &TID = MI->getDesc();
161 if (!TID.hasOptionalDef())
164 // Look to see if our OptionalDef is defining CPSR or CCR.
165 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
166 const MachineOperand &MO = MI->getOperand(i);
167 if (!MO.isReg() || !MO.isDef()) continue;
168 if (MO.getReg() == ARM::CPSR)
174 // If the machine is predicable go ahead and add the predicate operands, if
175 // it needs default CC operands add those.
176 const MachineInstrBuilder &
177 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
178 MachineInstr *MI = &*MIB;
180 // Do we use a predicate?
181 if (TII.isPredicable(MI))
184 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
185 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
187 if (DefinesOptionalPredicate(MI, &CPSR)) {
196 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
197 const TargetRegisterClass* RC) {
198 unsigned ResultReg = createResultReg(RC);
199 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
201 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
205 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
206 const TargetRegisterClass *RC,
207 unsigned Op0, bool Op0IsKill) {
208 unsigned ResultReg = createResultReg(RC);
209 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
211 if (II.getNumDefs() >= 1)
212 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
213 .addReg(Op0, Op0IsKill * RegState::Kill));
215 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
216 .addReg(Op0, Op0IsKill * RegState::Kill));
217 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
218 TII.get(TargetOpcode::COPY), ResultReg)
219 .addReg(II.ImplicitDefs[0]));
224 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
225 const TargetRegisterClass *RC,
226 unsigned Op0, bool Op0IsKill,
227 unsigned Op1, bool Op1IsKill) {
228 unsigned ResultReg = createResultReg(RC);
229 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
231 if (II.getNumDefs() >= 1)
232 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
233 .addReg(Op0, Op0IsKill * RegState::Kill)
234 .addReg(Op1, Op1IsKill * RegState::Kill));
236 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
237 .addReg(Op0, Op0IsKill * RegState::Kill)
238 .addReg(Op1, Op1IsKill * RegState::Kill));
239 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
240 TII.get(TargetOpcode::COPY), ResultReg)
241 .addReg(II.ImplicitDefs[0]));
246 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
247 const TargetRegisterClass *RC,
248 unsigned Op0, bool Op0IsKill,
250 unsigned ResultReg = createResultReg(RC);
251 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
253 if (II.getNumDefs() >= 1)
254 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
255 .addReg(Op0, Op0IsKill * RegState::Kill)
258 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
259 .addReg(Op0, Op0IsKill * RegState::Kill)
261 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
262 TII.get(TargetOpcode::COPY), ResultReg)
263 .addReg(II.ImplicitDefs[0]));
268 unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
269 const TargetRegisterClass *RC,
270 unsigned Op0, bool Op0IsKill,
271 const ConstantFP *FPImm) {
272 unsigned ResultReg = createResultReg(RC);
273 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
275 if (II.getNumDefs() >= 1)
276 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
277 .addReg(Op0, Op0IsKill * RegState::Kill)
280 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
281 .addReg(Op0, Op0IsKill * RegState::Kill)
283 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
284 TII.get(TargetOpcode::COPY), ResultReg)
285 .addReg(II.ImplicitDefs[0]));
290 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
291 const TargetRegisterClass *RC,
292 unsigned Op0, bool Op0IsKill,
293 unsigned Op1, bool Op1IsKill,
295 unsigned ResultReg = createResultReg(RC);
296 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
298 if (II.getNumDefs() >= 1)
299 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
300 .addReg(Op0, Op0IsKill * RegState::Kill)
301 .addReg(Op1, Op1IsKill * RegState::Kill)
304 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
305 .addReg(Op0, Op0IsKill * RegState::Kill)
306 .addReg(Op1, Op1IsKill * RegState::Kill)
308 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
309 TII.get(TargetOpcode::COPY), ResultReg)
310 .addReg(II.ImplicitDefs[0]));
315 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
316 const TargetRegisterClass *RC,
318 unsigned ResultReg = createResultReg(RC);
319 const TargetInstrDesc &II = TII.get(MachineInstOpcode);
321 if (II.getNumDefs() >= 1)
322 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
325 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
327 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
328 TII.get(TargetOpcode::COPY), ResultReg)
329 .addReg(II.ImplicitDefs[0]));
334 unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
335 unsigned Op0, bool Op0IsKill,
337 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
338 assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
339 "Cannot yet extract from physregs");
340 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
341 DL, TII.get(TargetOpcode::COPY), ResultReg)
342 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
346 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
347 // checks from the various callers.
348 unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
349 if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0;
351 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
352 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
353 TII.get(ARM::VMOVRS), MoveReg)
358 unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
359 if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0;
361 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
362 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
363 TII.get(ARM::VMOVSR), MoveReg)
368 // For double width floating point we need to materialize two constants
369 // (the high and the low) into integer registers then use a move to get
370 // the combined constant into an FP reg.
371 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
372 const APFloat Val = CFP->getValueAPF();
373 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64;
375 // This checks to see if we can use VFP3 instructions to materialize
376 // a constant, otherwise we have to go through the constant pool.
377 if (TLI.isFPImmLegal(Val, VT)) {
378 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
379 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
380 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
386 // Require VFP2 for loading fp constants.
387 if (!Subtarget->hasVFP2()) return false;
389 // MachineConstantPool wants an explicit alignment.
390 unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
392 // TODO: Figure out if this is correct.
393 Align = TD.getTypeAllocSize(CFP->getType());
395 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
396 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
397 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
399 // The extra reg is for addrmode5.
400 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
402 .addConstantPoolIndex(Idx)
407 // TODO: Verify 64-bit.
408 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C) {
409 // MachineConstantPool wants an explicit alignment.
410 unsigned Align = TD.getPrefTypeAlignment(C->getType());
412 // TODO: Figure out if this is correct.
413 Align = TD.getTypeAllocSize(C->getType());
415 unsigned Idx = MCP.getConstantPoolIndex(C, Align);
416 unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
419 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
420 TII.get(ARM::t2LDRpci))
421 .addReg(DestReg).addConstantPoolIndex(Idx));
423 // The extra reg and immediate are for addrmode2.
424 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
426 .addReg(DestReg).addConstantPoolIndex(Idx)
427 .addReg(0).addImm(0));
432 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
433 EVT VT = TLI.getValueType(C->getType(), true);
435 // Only handle simple types.
436 if (!VT.isSimple()) return 0;
438 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
439 return ARMMaterializeFP(CFP, VT);
440 return ARMMaterializeInt(C);
443 bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
444 VT = TLI.getValueType(Ty, true);
446 // Only handle simple types.
447 if (VT == MVT::Other || !VT.isSimple()) return false;
449 // Handle all legal types, i.e. a register that will directly hold this
451 return TLI.isTypeLegal(VT);
454 bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
455 if (isTypeLegal(Ty, VT)) return true;
457 // If this is a type than can be sign or zero-extended to a basic operation
458 // go ahead and accept it now.
459 if (VT == MVT::i8 || VT == MVT::i16)
465 // Computes the Reg+Offset to get to an object.
466 bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
468 // Some boilerplate from the X86 FastISel.
469 const User *U = NULL;
470 unsigned Opcode = Instruction::UserOp1;
471 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
472 // Don't walk into other basic blocks; it's possible we haven't
473 // visited them yet, so the instructions may not yet be assigned
474 // virtual registers.
475 if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
477 Opcode = I->getOpcode();
479 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
480 Opcode = C->getOpcode();
484 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
485 if (Ty->getAddressSpace() > 255)
486 // Fast instruction selection doesn't support the special
493 case Instruction::Alloca: {
494 assert(false && "Alloca should have been handled earlier!");
499 // FIXME: Handle global variables.
500 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
505 // Try to get this in a register if nothing else has worked.
506 Reg = getRegForValue(Obj);
507 if (Reg == 0) return false;
509 // Since the offset may be too large for the load instruction
510 // get the reg+offset into a register.
511 // TODO: Verify the additions work, otherwise we'll need to add the
512 // offset instead of 0 to the instructions and do all sorts of operand
514 // TODO: Optimize this somewhat.
516 ARMCC::CondCodes Pred = ARMCC::AL;
517 unsigned PredReg = 0;
520 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
521 Reg, Reg, Offset, Pred, PredReg,
522 static_cast<const ARMBaseInstrInfo&>(TII));
524 assert(AFI->isThumb2Function());
525 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
526 Reg, Reg, Offset, Pred, PredReg,
527 static_cast<const ARMBaseInstrInfo&>(TII));
533 bool ARMFastISel::ARMLoadAlloca(const Instruction *I, EVT VT) {
534 Value *Op0 = I->getOperand(0);
536 // Verify it's an alloca.
537 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
538 DenseMap<const AllocaInst*, int>::iterator SI =
539 FuncInfo.StaticAllocaMap.find(AI);
541 if (SI != FuncInfo.StaticAllocaMap.end()) {
542 TargetRegisterClass* RC = TLI.getRegClassFor(VT);
543 unsigned ResultReg = createResultReg(RC);
544 TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
545 ResultReg, SI->second, RC,
546 TM.getRegisterInfo());
547 UpdateValueMap(I, ResultReg);
554 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
555 unsigned Reg, int Offset) {
557 assert(VT.isSimple() && "Non-simple types are invalid here!");
559 bool isFloat = false;
560 switch (VT.getSimpleVT().SimpleTy) {
562 assert(false && "Trying to emit for an unhandled type!");
565 Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
569 Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
573 Opc = isThumb ? ARM::tLDR : ARM::LDR;
585 ResultReg = createResultReg(TLI.getRegClassFor(VT));
587 // TODO: Fix the Addressing modes so that these can share some code.
588 // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
589 // The thumb addressing mode has operands swapped from the arm addressing
590 // mode, the floating point one only has two operands.
592 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
593 TII.get(Opc), ResultReg)
594 .addReg(Reg).addImm(Offset));
596 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
597 TII.get(Opc), ResultReg)
598 .addReg(Reg).addImm(Offset).addReg(0));
600 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
601 TII.get(Opc), ResultReg)
602 .addReg(Reg).addReg(0).addImm(Offset));
606 bool ARMFastISel::SelectLoad(const Instruction *I) {
607 // Verify we have a legal type before going any further.
609 if (!isLoadTypeLegal(I->getType(), VT))
612 // If we're an alloca we know we have a frame index and can emit the load
613 // directly in short order.
614 if (ARMLoadAlloca(I, VT))
617 // Our register and offset with innocuous defaults.
621 // See if we can handle this as Reg + Offset
622 if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
626 if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
628 UpdateValueMap(I, ResultReg);
632 bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT){
633 Value *Op1 = I->getOperand(1);
635 // Verify it's an alloca.
636 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
637 DenseMap<const AllocaInst*, int>::iterator SI =
638 FuncInfo.StaticAllocaMap.find(AI);
640 if (SI != FuncInfo.StaticAllocaMap.end()) {
641 TargetRegisterClass* RC = TLI.getRegClassFor(VT);
642 assert(SrcReg != 0 && "Nothing to store!");
643 TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
644 SrcReg, true /*isKill*/, SI->second, RC,
645 TM.getRegisterInfo());
652 bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
653 unsigned DstReg, int Offset) {
655 bool isFloat = false;
656 switch (VT.getSimpleVT().SimpleTy) {
657 default: return false;
659 case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
660 case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
661 case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
663 if (!Subtarget->hasVFP2()) return false;
668 if (!Subtarget->hasVFP2()) return false;
674 // The thumb addressing mode has operands swapped from the arm addressing
675 // mode, the floating point one only has two operands.
677 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
678 TII.get(StrOpc), SrcReg)
679 .addReg(DstReg).addImm(Offset));
681 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
682 TII.get(StrOpc), SrcReg)
683 .addReg(DstReg).addImm(Offset).addReg(0));
686 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
687 TII.get(StrOpc), SrcReg)
688 .addReg(DstReg).addReg(0).addImm(Offset));
693 bool ARMFastISel::SelectStore(const Instruction *I) {
694 Value *Op0 = I->getOperand(0);
697 // Yay type legalization
699 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
702 // Get the value to be stored into a register.
703 SrcReg = getRegForValue(Op0);
707 // If we're an alloca we know we have a frame index and can emit the store
709 if (ARMStoreAlloca(I, SrcReg, VT))
712 // Our register and offset with innocuous defaults.
716 // See if we can handle this as Reg + Offset
717 if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
720 if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
725 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
727 // Needs two compares...
728 case CmpInst::FCMP_ONE:
729 case CmpInst::FCMP_UEQ:
731 assert(false && "Unhandled CmpInst::Predicate!");
733 case CmpInst::ICMP_EQ:
734 case CmpInst::FCMP_OEQ:
736 case CmpInst::ICMP_SGT:
737 case CmpInst::FCMP_OGT:
739 case CmpInst::ICMP_SGE:
740 case CmpInst::FCMP_OGE:
742 case CmpInst::ICMP_UGT:
743 case CmpInst::FCMP_UGT:
745 case CmpInst::FCMP_OLT:
747 case CmpInst::ICMP_ULE:
748 case CmpInst::FCMP_OLE:
750 case CmpInst::FCMP_ORD:
752 case CmpInst::FCMP_UNO:
754 case CmpInst::FCMP_UGE:
756 case CmpInst::ICMP_SLT:
757 case CmpInst::FCMP_ULT:
759 case CmpInst::ICMP_SLE:
760 case CmpInst::FCMP_ULE:
762 case CmpInst::FCMP_UNE:
763 case CmpInst::ICMP_NE:
765 case CmpInst::ICMP_UGE:
767 case CmpInst::ICMP_ULT:
772 bool ARMFastISel::SelectBranch(const Instruction *I) {
773 const BranchInst *BI = cast<BranchInst>(I);
774 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
775 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
777 // Simple branch support.
778 // TODO: Hopefully we've already handled the condition since we won't
779 // have left an update in the value map. See the TODO below in SelectCMP.
780 Value *Cond = BI->getCondition();
781 unsigned CondReg = getRegForValue(Cond);
782 if (CondReg == 0) return false;
784 ARMCC::CondCodes ARMPred = ARMCC::NE;
785 CmpInst *CI = dyn_cast<CmpInst>(Cond);
786 if (!CI) return false;
788 // Get the compare predicate.
789 ARMPred = getComparePred(CI->getPredicate());
791 // We may not handle every CC for now.
792 if (ARMPred == ARMCC::AL) return false;
794 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
795 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
796 .addMBB(TBB).addImm(ARMPred).addReg(CondReg);
797 FastEmitBranch(FBB, DL);
798 FuncInfo.MBB->addSuccessor(TBB);
802 bool ARMFastISel::SelectCmp(const Instruction *I) {
803 const CmpInst *CI = cast<CmpInst>(I);
806 const Type *Ty = CI->getOperand(0)->getType();
807 if (!isTypeLegal(Ty, VT))
810 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
811 if (isFloat && !Subtarget->hasVFP2())
816 switch (VT.getSimpleVT().SimpleTy) {
817 default: return false;
818 // TODO: Verify compares.
820 CmpOpc = ARM::VCMPES;
821 DestReg = ARM::FPSCR;
824 CmpOpc = ARM::VCMPED;
825 DestReg = ARM::FPSCR;
828 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
833 unsigned Arg1 = getRegForValue(CI->getOperand(0));
834 if (Arg1 == 0) return false;
836 unsigned Arg2 = getRegForValue(CI->getOperand(1));
837 if (Arg2 == 0) return false;
839 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
840 .addReg(Arg1).addReg(Arg2));
842 // For floating point we need to move the result to a comparison register
843 // that we can then use for branches.
845 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
846 TII.get(ARM::FMSTAT)));
848 // Update the value to the implicit def reg.
849 UpdateValueMap(I, DestReg);
853 bool ARMFastISel::SelectFPExt(const Instruction *I) {
854 // Make sure we have VFP and that we're extending float to double.
855 if (!Subtarget->hasVFP2()) return false;
857 Value *V = I->getOperand(0);
858 if (!I->getType()->isDoubleTy() ||
859 !V->getType()->isFloatTy()) return false;
861 unsigned Op = getRegForValue(V);
862 if (Op == 0) return false;
864 unsigned Result = createResultReg(ARM::DPRRegisterClass);
865 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
866 TII.get(ARM::VCVTDS), Result)
868 UpdateValueMap(I, Result);
872 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
873 // Make sure we have VFP and that we're truncating double to float.
874 if (!Subtarget->hasVFP2()) return false;
876 Value *V = I->getOperand(0);
877 if (!I->getType()->isFloatTy() ||
878 !V->getType()->isDoubleTy()) return false;
880 unsigned Op = getRegForValue(V);
881 if (Op == 0) return false;
883 unsigned Result = createResultReg(ARM::SPRRegisterClass);
884 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
885 TII.get(ARM::VCVTSD), Result)
887 UpdateValueMap(I, Result);
891 bool ARMFastISel::SelectSIToFP(const Instruction *I) {
892 // Make sure we have VFP.
893 if (!Subtarget->hasVFP2()) return false;
896 const Type *Ty = I->getType();
897 if (!isTypeLegal(Ty, DstVT))
900 unsigned Op = getRegForValue(I->getOperand(0));
901 if (Op == 0) return false;
903 // The conversion routine works on fp-reg to fp-reg and the operand above
904 // was an integer, move it to the fp registers if possible.
905 unsigned FP = ARMMoveToFPReg(DstVT, Op);
906 if (FP == 0) return false;
909 if (Ty->isFloatTy()) Opc = ARM::VSITOS;
910 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
913 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
914 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
917 UpdateValueMap(I, ResultReg);
921 bool ARMFastISel::SelectFPToSI(const Instruction *I) {
922 // Make sure we have VFP.
923 if (!Subtarget->hasVFP2()) return false;
926 const Type *RetTy = I->getType();
927 if (!isTypeLegal(RetTy, DstVT))
930 unsigned Op = getRegForValue(I->getOperand(0));
931 if (Op == 0) return false;
934 const Type *OpTy = I->getOperand(0)->getType();
935 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
936 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
938 EVT OpVT = TLI.getValueType(OpTy, true);
940 unsigned ResultReg = createResultReg(TLI.getRegClassFor(OpVT));
941 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
945 // This result needs to be in an integer register, but the conversion only
946 // takes place in fp-regs.
947 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
948 if (IntReg == 0) return false;
950 UpdateValueMap(I, IntReg);
954 bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
955 EVT VT = TLI.getValueType(I->getType(), true);
957 // We can get here in the case when we want to use NEON for our fp
958 // operations, but can't figure out how to. Just use the vfp instructions
960 // FIXME: It'd be nice to use NEON instructions.
961 const Type *Ty = I->getType();
962 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
963 if (isFloat && !Subtarget->hasVFP2())
966 unsigned Op1 = getRegForValue(I->getOperand(0));
967 if (Op1 == 0) return false;
969 unsigned Op2 = getRegForValue(I->getOperand(1));
970 if (Op2 == 0) return false;
973 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 ||
974 VT.getSimpleVT().SimpleTy == MVT::i64;
976 default: return false;
978 Opc = is64bit ? ARM::VADDD : ARM::VADDS;
981 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
984 Opc = is64bit ? ARM::VMULD : ARM::VMULS;
987 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
988 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
989 TII.get(Opc), ResultReg)
990 .addReg(Op1).addReg(Op2));
991 UpdateValueMap(I, ResultReg);
995 // Call Handling Code
997 // This is largely taken directly from CCAssignFnForNode - we don't support
998 // varargs in FastISel so that part has been removed.
999 // TODO: We may not support all of this.
1000 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
1003 llvm_unreachable("Unsupported calling convention");
1004 case CallingConv::C:
1005 case CallingConv::Fast:
1006 // Use target triple & subtarget features to do actual dispatch.
1007 if (Subtarget->isAAPCS_ABI()) {
1008 if (Subtarget->hasVFP2() &&
1009 FloatABIType == FloatABI::Hard)
1010 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1012 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1014 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1015 case CallingConv::ARM_AAPCS_VFP:
1016 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1017 case CallingConv::ARM_AAPCS:
1018 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1019 case CallingConv::ARM_APCS:
1020 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1024 // A quick function that will emit a call for a named libcall in F with the
1025 // vector of passed arguments for the Instruction in I. We can assume that we
1026 // can emit a call for any libcall we can produce. This is an abridged version
1027 // of the full call infrastructure since we won't need to worry about things
1028 // like computed function pointers or strange arguments at call sites.
1029 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
1031 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, Function *F) {
1032 CallingConv::ID CC = F->getCallingConv();
1034 // Handle *simple* calls for now.
1035 const Type *RetTy = F->getReturnType();
1037 if (RetTy->isVoidTy())
1038 RetVT = MVT::isVoid;
1039 else if (!isTypeLegal(RetTy, RetVT))
1042 assert(!F->isVarArg() && "Vararg libcall?!");
1044 // Abridged from the X86 FastISel call selection mechanism
1045 SmallVector<Value*, 8> Args;
1046 SmallVector<unsigned, 8> ArgRegs;
1047 SmallVector<EVT, 8> ArgVTs;
1048 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1049 Args.reserve(I->getNumOperands());
1050 ArgRegs.reserve(I->getNumOperands());
1051 ArgVTs.reserve(I->getNumOperands());
1052 ArgFlags.reserve(I->getNumOperands());
1053 for (unsigned i = 0; i < Args.size(); ++i) {
1054 Value *Op = I->getOperand(i);
1055 unsigned Arg = getRegForValue(Op);
1056 if (Arg == 0) return false;
1058 const Type *ArgTy = Op->getType();
1060 if (!isTypeLegal(ArgTy, ArgVT)) return false;
1062 ISD::ArgFlagsTy Flags;
1063 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1064 Flags.setOrigAlign(OriginalAlignment);
1067 ArgRegs.push_back(Arg);
1068 ArgVTs.push_back(ArgVT);
1069 ArgFlags.push_back(Flags);
1072 SmallVector<CCValAssign, 16> ArgLocs;
1073 CCState CCInfo(CC, false, TM, ArgLocs, F->getContext());
1074 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
1076 // Process the args.
1077 SmallVector<unsigned, 4> RegArgs;
1078 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1079 CCValAssign &VA = ArgLocs[i];
1080 unsigned Arg = ArgRegs[VA.getValNo()];
1081 EVT ArgVT = ArgVTs[VA.getValNo()];
1083 // Should we ever have to promote?
1084 switch (VA.getLocInfo()) {
1085 case CCValAssign::Full: break;
1087 assert(false && "Handle arg promotion for libcalls?");
1091 // Now copy/store arg to correct locations.
1092 if (VA.isRegLoc()) {
1093 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1094 VA.getLocReg()).addReg(Arg);
1095 RegArgs.push_back(VA.getLocReg());
1102 // Issue the call, BLr9 for darwin, BL otherwise.
1103 MachineInstrBuilder MIB;
1106 CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLr9 : ARM::tBL;
1108 CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
1109 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1110 .addGlobalAddress(F, 0, 0);
1112 // Add implicit physical register uses to the call.
1113 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1114 MIB.addReg(RegArgs[i]);
1116 // Now the return value.
1117 SmallVector<unsigned, 4> UsedRegs;
1118 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1119 SmallVector<CCValAssign, 16> RVLocs;
1120 CCState CCInfo(CC, false, TM, RVLocs, F->getContext());
1121 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
1123 // Copy all of the result registers out of their specified physreg.
1124 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1125 EVT CopyVT = RVLocs[0].getValVT();
1126 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1128 unsigned ResultReg = createResultReg(DstRC);
1129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1130 ResultReg).addReg(RVLocs[0].getLocReg());
1131 UsedRegs.push_back(RVLocs[0].getLocReg());
1133 // Finally update the result.
1134 UpdateValueMap(I, ResultReg);
1137 // Set all unused physreg defs as dead.
1138 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1143 bool ARMFastISel::SelectSDiv(const Instruction *I) {
1145 const Type *Ty = I->getType();
1146 if (!isTypeLegal(Ty, VT))
1149 // If we have integer div support we should have selected this automagically.
1150 // In case we have a real miss go ahead and return false and we'll pick
1152 if (Subtarget->hasDivide()) return false;
1154 // Otherwise emit a libcall.
1155 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1157 LC = RTLIB::SDIV_I16;
1158 else if (VT == MVT::i32)
1159 LC = RTLIB::SDIV_I32;
1160 else if (VT == MVT::i64)
1161 LC = RTLIB::SDIV_I64;
1162 else if (VT == MVT::i128)
1163 LC = RTLIB::SDIV_I128;
1164 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1166 // Binary operand with all the same type.
1167 std::vector<const Type*> ArgTys;
1168 ArgTys.push_back(Ty);
1169 ArgTys.push_back(Ty);
1170 const FunctionType *FTy = FunctionType::get(Ty, ArgTys, false);
1171 Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage,
1172 TLI.getLibcallName(LC));
1173 if (Subtarget->isAAPCS_ABI())
1174 F->setCallingConv(CallingConv::ARM_AAPCS);
1176 F->setCallingConv(I->getParent()->getParent()->getCallingConv());
1178 return ARMEmitLibcall(I, F);
1181 // TODO: SoftFP support.
1182 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
1183 // No Thumb-1 for now.
1184 if (isThumb && !AFI->isThumb2Function()) return false;
1186 switch (I->getOpcode()) {
1187 case Instruction::Load:
1188 return SelectLoad(I);
1189 case Instruction::Store:
1190 return SelectStore(I);
1191 case Instruction::Br:
1192 return SelectBranch(I);
1193 case Instruction::ICmp:
1194 case Instruction::FCmp:
1195 return SelectCmp(I);
1196 case Instruction::FPExt:
1197 return SelectFPExt(I);
1198 case Instruction::FPTrunc:
1199 return SelectFPTrunc(I);
1200 case Instruction::SIToFP:
1201 return SelectSIToFP(I);
1202 case Instruction::FPToSI:
1203 return SelectFPToSI(I);
1204 case Instruction::FAdd:
1205 return SelectBinaryOp(I, ISD::FADD);
1206 case Instruction::FSub:
1207 return SelectBinaryOp(I, ISD::FSUB);
1208 case Instruction::FMul:
1209 return SelectBinaryOp(I, ISD::FMUL);
1210 case Instruction::SDiv:
1211 return SelectSDiv(I);
1218 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
1219 if (EnableARMFastISel) return new ARMFastISel(funcInfo);