1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the X86-specific support for the FastISel class. Much
11 // of the target-specific code is generated by tablegen in the file
12 // X86GenFastISel.inc, which is #included here.
14 //===----------------------------------------------------------------------===//
17 #include "X86InstrBuilder.h"
18 #include "X86ISelLowering.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/DerivedTypes.h"
24 #include "llvm/CodeGen/FastISel.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 class X86FastISel : public FastISel {
31 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
32 /// make the right decision when generating code for different targets.
33 const X86Subtarget *Subtarget;
36 explicit X86FastISel(MachineFunction &mf,
37 DenseMap<const Value *, unsigned> &vm,
38 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm)
39 : FastISel(mf, vm, bm) {
40 Subtarget = &TM.getSubtarget<X86Subtarget>();
43 virtual bool TargetSelectInstruction(Instruction *I);
45 #include "X86GenFastISel.inc"
48 bool X86SelectConstAddr(Value *V, unsigned &Op0);
50 bool X86SelectLoad(Instruction *I);
52 bool X86SelectStore(Instruction *I);
54 bool X86SelectCmp(Instruction *I);
56 bool X86SelectZExt(Instruction *I);
58 bool X86SelectBranch(Instruction *I);
60 bool X86SelectShift(Instruction *I);
62 bool X86SelectSelect(Instruction *I);
64 unsigned TargetSelectConstantPoolLoad(Constant *C, MachineConstantPool* MCP);
67 /// X86SelectConstAddr - Select and emit code to materialize constant address.
69 bool X86FastISel::X86SelectConstAddr(Value *V,
71 // FIXME: Only GlobalAddress for now.
72 GlobalValue *GV = dyn_cast<GlobalValue>(V);
76 if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) {
77 // Issue load from stub if necessary.
79 const TargetRegisterClass *RC = NULL;
80 if (TLI.getPointerTy() == MVT::i32) {
82 RC = X86::GR32RegisterClass;
85 RC = X86::GR64RegisterClass;
87 Op0 = createResultReg(RC);
90 addFullAddress(BuildMI(MBB, TII.get(Opc), Op0), AM);
91 // Prevent loading GV stub multiple times in same MBB.
92 LocalValueMap[V] = Op0;
97 /// X86SelectStore - Select and emit code to implement store instructions.
98 bool X86FastISel::X86SelectStore(Instruction* I) {
99 MVT VT = MVT::getMVT(I->getOperand(0)->getType());
100 if (VT == MVT::Other || !VT.isSimple())
101 // Unhandled type. Halt "fast" selection and bail.
105 VT = TLI.getPointerTy();
106 // We only handle legal types. For example, on x86-32 the instruction
107 // selector contains all of the 64-bit instructions from x86-64,
108 // under the assumption that i64 won't be used if the target doesn't
110 if (!TLI.isTypeLegal(VT))
112 unsigned Op0 = getRegForValue(I->getOperand(0));
114 // Unhandled operand. Halt "fast" selection and bail.
117 Value *V = I->getOperand(1);
118 unsigned Op1 = getRegForValue(V);
120 // Handle constant load address.
121 if (!isa<Constant>(V) || !X86SelectConstAddr(V, Op1))
122 // Unhandled operand. Halt "fast" selection and bail.
126 // Get opcode and regclass of the output for the given load instruction.
128 const TargetRegisterClass *RC = NULL;
129 switch (VT.getSimpleVT()) {
130 default: return false;
133 RC = X86::GR8RegisterClass;
137 RC = X86::GR16RegisterClass;
141 RC = X86::GR32RegisterClass;
144 // Must be in x86-64 mode.
146 RC = X86::GR64RegisterClass;
149 if (Subtarget->hasSSE1()) {
151 RC = X86::FR32RegisterClass;
154 RC = X86::RFP32RegisterClass;
158 if (Subtarget->hasSSE2()) {
160 RC = X86::FR64RegisterClass;
163 RC = X86::RFP64RegisterClass;
168 RC = X86::RFP80RegisterClass;
174 // Address is in register.
177 AM.GV = cast<GlobalValue>(V);
178 addFullAddress(BuildMI(MBB, TII.get(Opc)), AM).addReg(Op0);
182 /// X86SelectLoad - Select and emit code to implement load instructions.
184 bool X86FastISel::X86SelectLoad(Instruction *I) {
185 MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true);
186 if (VT == MVT::Other || !VT.isSimple())
187 // Unhandled type. Halt "fast" selection and bail.
191 VT = TLI.getPointerTy();
192 // We only handle legal types. For example, on x86-32 the instruction
193 // selector contains all of the 64-bit instructions from x86-64,
194 // under the assumption that i64 won't be used if the target doesn't
196 if (!TLI.isTypeLegal(VT))
199 Value *V = I->getOperand(0);
200 unsigned Op0 = getRegForValue(V);
202 // Handle constant load address.
203 if (!isa<Constant>(V) || !X86SelectConstAddr(V, Op0))
204 // Unhandled operand. Halt "fast" selection and bail.
208 // Get opcode and regclass of the output for the given load instruction.
210 const TargetRegisterClass *RC = NULL;
211 switch (VT.getSimpleVT()) {
212 default: return false;
215 RC = X86::GR8RegisterClass;
219 RC = X86::GR16RegisterClass;
223 RC = X86::GR32RegisterClass;
226 // Must be in x86-64 mode.
228 RC = X86::GR64RegisterClass;
231 if (Subtarget->hasSSE1()) {
233 RC = X86::FR32RegisterClass;
236 RC = X86::RFP32RegisterClass;
240 if (Subtarget->hasSSE2()) {
242 RC = X86::FR64RegisterClass;
245 RC = X86::RFP64RegisterClass;
250 RC = X86::RFP80RegisterClass;
254 unsigned ResultReg = createResultReg(RC);
257 // Address is in register.
260 AM.GV = cast<GlobalValue>(V);
261 addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM);
262 UpdateValueMap(I, ResultReg);
266 bool X86FastISel::X86SelectCmp(Instruction *I) {
267 CmpInst *CI = cast<CmpInst>(I);
269 MVT VT = TLI.getValueType(I->getOperand(0)->getType());
270 if (!TLI.isTypeLegal(VT))
273 unsigned Op0Reg = getRegForValue(CI->getOperand(0));
274 if (Op0Reg == 0) return false;
275 unsigned Op1Reg = getRegForValue(CI->getOperand(1));
276 if (Op1Reg == 0) return false;
279 switch (VT.getSimpleVT()) {
280 case MVT::i8: Opc = X86::CMP8rr; break;
281 case MVT::i16: Opc = X86::CMP16rr; break;
282 case MVT::i32: Opc = X86::CMP32rr; break;
283 case MVT::i64: Opc = X86::CMP64rr; break;
284 case MVT::f32: Opc = X86::UCOMISSrr; break;
285 case MVT::f64: Opc = X86::UCOMISDrr; break;
286 default: return false;
289 unsigned ResultReg = createResultReg(&X86::GR8RegClass);
290 switch (CI->getPredicate()) {
291 case CmpInst::FCMP_OEQ: {
292 unsigned EReg = createResultReg(&X86::GR8RegClass);
293 unsigned NPReg = createResultReg(&X86::GR8RegClass);
294 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
295 BuildMI(MBB, TII.get(X86::SETEr), EReg);
296 BuildMI(MBB, TII.get(X86::SETNPr), NPReg);
297 BuildMI(MBB, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
300 case CmpInst::FCMP_UNE: {
301 unsigned NEReg = createResultReg(&X86::GR8RegClass);
302 unsigned PReg = createResultReg(&X86::GR8RegClass);
303 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
304 BuildMI(MBB, TII.get(X86::SETNEr), NEReg);
305 BuildMI(MBB, TII.get(X86::SETPr), PReg);
306 BuildMI(MBB, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
309 case CmpInst::FCMP_OGT:
310 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
311 BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
313 case CmpInst::FCMP_OGE:
314 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
315 BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
317 case CmpInst::FCMP_OLT:
318 BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
319 BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
321 case CmpInst::FCMP_OLE:
322 BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
323 BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
325 case CmpInst::FCMP_ONE:
326 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
327 BuildMI(MBB, TII.get(X86::SETNEr), ResultReg);
329 case CmpInst::FCMP_ORD:
330 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
331 BuildMI(MBB, TII.get(X86::SETNPr), ResultReg);
333 case CmpInst::FCMP_UNO:
334 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
335 BuildMI(MBB, TII.get(X86::SETPr), ResultReg);
337 case CmpInst::FCMP_UEQ:
338 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
339 BuildMI(MBB, TII.get(X86::SETEr), ResultReg);
341 case CmpInst::FCMP_UGT:
342 BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
343 BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
345 case CmpInst::FCMP_UGE:
346 BuildMI(MBB, TII.get(Opc)).addReg(Op1Reg).addReg(Op0Reg);
347 BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
349 case CmpInst::FCMP_ULT:
350 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
351 BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
353 case CmpInst::FCMP_ULE:
354 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
355 BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
357 case CmpInst::ICMP_EQ:
358 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
359 BuildMI(MBB, TII.get(X86::SETEr), ResultReg);
361 case CmpInst::ICMP_NE:
362 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
363 BuildMI(MBB, TII.get(X86::SETNEr), ResultReg);
365 case CmpInst::ICMP_UGT:
366 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
367 BuildMI(MBB, TII.get(X86::SETAr), ResultReg);
369 case CmpInst::ICMP_UGE:
370 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
371 BuildMI(MBB, TII.get(X86::SETAEr), ResultReg);
373 case CmpInst::ICMP_ULT:
374 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
375 BuildMI(MBB, TII.get(X86::SETBr), ResultReg);
377 case CmpInst::ICMP_ULE:
378 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
379 BuildMI(MBB, TII.get(X86::SETBEr), ResultReg);
381 case CmpInst::ICMP_SGT:
382 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
383 BuildMI(MBB, TII.get(X86::SETGr), ResultReg);
385 case CmpInst::ICMP_SGE:
386 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
387 BuildMI(MBB, TII.get(X86::SETGEr), ResultReg);
389 case CmpInst::ICMP_SLT:
390 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
391 BuildMI(MBB, TII.get(X86::SETLr), ResultReg);
393 case CmpInst::ICMP_SLE:
394 BuildMI(MBB, TII.get(Opc)).addReg(Op0Reg).addReg(Op1Reg);
395 BuildMI(MBB, TII.get(X86::SETLEr), ResultReg);
401 UpdateValueMap(I, ResultReg);
405 bool X86FastISel::X86SelectZExt(Instruction *I) {
406 // Special-case hack: The only i1 values we know how to produce currently
407 // set the upper bits of an i8 value to zero.
408 if (I->getType() == Type::Int8Ty &&
409 I->getOperand(0)->getType() == Type::Int1Ty) {
410 unsigned ResultReg = getRegForValue(I->getOperand(0));
411 if (ResultReg == 0) return false;
412 UpdateValueMap(I, ResultReg);
419 bool X86FastISel::X86SelectBranch(Instruction *I) {
420 BranchInst *BI = cast<BranchInst>(I);
421 // Unconditional branches are selected by tablegen-generated code.
422 unsigned OpReg = getRegForValue(BI->getCondition());
423 if (OpReg == 0) return false;
424 MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
425 MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
427 BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
428 BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB);
429 BuildMI(MBB, TII.get(X86::JMP)).addMBB(FalseMBB);
431 MBB->addSuccessor(TrueMBB);
432 MBB->addSuccessor(FalseMBB);
437 bool X86FastISel::X86SelectShift(Instruction *I) {
440 const TargetRegisterClass *RC = NULL;
441 if (I->getType() == Type::Int8Ty) {
443 RC = &X86::GR8RegClass;
444 switch (I->getOpcode()) {
445 case Instruction::LShr: Opc = X86::SHL8rCL; break;
446 case Instruction::AShr: Opc = X86::SAR8rCL; break;
447 case Instruction::Shl: Opc = X86::SHR8rCL; break;
448 default: return false;
450 } else if (I->getType() == Type::Int16Ty) {
452 RC = &X86::GR16RegClass;
453 switch (I->getOpcode()) {
454 case Instruction::LShr: Opc = X86::SHL16rCL; break;
455 case Instruction::AShr: Opc = X86::SAR16rCL; break;
456 case Instruction::Shl: Opc = X86::SHR16rCL; break;
457 default: return false;
459 } else if (I->getType() == Type::Int32Ty) {
461 RC = &X86::GR32RegClass;
462 switch (I->getOpcode()) {
463 case Instruction::LShr: Opc = X86::SHL32rCL; break;
464 case Instruction::AShr: Opc = X86::SAR32rCL; break;
465 case Instruction::Shl: Opc = X86::SHR32rCL; break;
466 default: return false;
468 } else if (I->getType() == Type::Int64Ty) {
470 RC = &X86::GR64RegClass;
471 switch (I->getOpcode()) {
472 case Instruction::LShr: Opc = X86::SHL64rCL; break;
473 case Instruction::AShr: Opc = X86::SAR64rCL; break;
474 case Instruction::Shl: Opc = X86::SHR64rCL; break;
475 default: return false;
481 unsigned Op0Reg = getRegForValue(I->getOperand(0));
482 if (Op0Reg == 0) return false;
483 unsigned Op1Reg = getRegForValue(I->getOperand(1));
484 if (Op1Reg == 0) return false;
485 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
486 unsigned ResultReg = createResultReg(RC);
487 BuildMI(MBB, TII.get(Opc), ResultReg).addReg(Op0Reg);
488 UpdateValueMap(I, ResultReg);
492 bool X86FastISel::X86SelectSelect(Instruction *I) {
493 const Type *Ty = I->getOperand(1)->getType();
494 if (isa<PointerType>(Ty))
495 Ty = TLI.getTargetData()->getIntPtrType();
498 const TargetRegisterClass *RC = NULL;
499 if (Ty == Type::Int16Ty) {
500 Opc = X86::CMOVNE16rr;
501 RC = &X86::GR16RegClass;
502 } else if (Ty == Type::Int32Ty) {
503 Opc = X86::CMOVNE32rr;
504 RC = &X86::GR32RegClass;
505 } else if (Ty == Type::Int64Ty) {
506 Opc = X86::CMOVNE64rr;
507 RC = &X86::GR64RegClass;
512 unsigned Op0Reg = getRegForValue(I->getOperand(0));
513 if (Op0Reg == 0) return false;
514 unsigned Op1Reg = getRegForValue(I->getOperand(1));
515 if (Op1Reg == 0) return false;
516 unsigned Op2Reg = getRegForValue(I->getOperand(2));
517 if (Op2Reg == 0) return false;
519 BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
520 unsigned ResultReg = createResultReg(RC);
521 BuildMI(MBB, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
522 UpdateValueMap(I, ResultReg);
527 X86FastISel::TargetSelectInstruction(Instruction *I) {
528 switch (I->getOpcode()) {
530 case Instruction::Load:
531 return X86SelectLoad(I);
532 case Instruction::Store:
533 return X86SelectStore(I);
534 case Instruction::ICmp:
535 case Instruction::FCmp:
536 return X86SelectCmp(I);
537 case Instruction::ZExt:
538 return X86SelectZExt(I);
539 case Instruction::Br:
540 return X86SelectBranch(I);
541 case Instruction::LShr:
542 case Instruction::AShr:
543 case Instruction::Shl:
544 return X86SelectShift(I);
545 case Instruction::Select:
546 return X86SelectSelect(I);
552 unsigned X86FastISel::TargetSelectConstantPoolLoad(Constant *C,
553 MachineConstantPool* MCP) {
554 unsigned CPLoad = getRegForValue(C);
558 // Can't handle PIC-mode yet.
559 if (TM.getRelocationModel() == Reloc::PIC_)
562 MVT VT = MVT::getMVT(C->getType(), /*HandleUnknown=*/true);
563 if (VT == MVT::Other || !VT.isSimple())
564 // Unhandled type. Halt "fast" selection and bail.
568 VT = TLI.getPointerTy();
569 // We only handle legal types. For example, on x86-32 the instruction
570 // selector contains all of the 64-bit instructions from x86-64,
571 // under the assumption that i64 won't be used if the target doesn't
573 if (!TLI.isTypeLegal(VT))
576 // Get opcode and regclass of the output for the given load instruction.
578 const TargetRegisterClass *RC = NULL;
579 switch (VT.getSimpleVT()) {
580 default: return false;
583 RC = X86::GR8RegisterClass;
587 RC = X86::GR16RegisterClass;
591 RC = X86::GR32RegisterClass;
594 // Must be in x86-64 mode.
596 RC = X86::GR64RegisterClass;
599 if (Subtarget->hasSSE1()) {
601 RC = X86::FR32RegisterClass;
604 RC = X86::RFP32RegisterClass;
608 if (Subtarget->hasSSE2()) {
610 RC = X86::FR64RegisterClass;
613 RC = X86::RFP64RegisterClass;
618 RC = X86::RFP80RegisterClass;
622 unsigned ResultReg = createResultReg(RC);
623 if (isa<GlobalValue>(C)) {
624 if (X86SelectConstAddr(C, ResultReg))
631 unsigned MCPOffset = MCP->getConstantPoolIndex(C, 0);
632 addConstantPoolReference(BuildMI(MBB, TII.get(Opc), ResultReg), MCPOffset);
633 UpdateValueMap(C, ResultReg);
638 llvm::FastISel *X86::createFastISel(MachineFunction &mf,
639 DenseMap<const Value *, unsigned> &vm,
640 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm) {
641 return new X86FastISel(mf, vm, bm);