lib/Target/ARM/Thumb2SizeReduction.cpp

   1 //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #define DEBUG_TYPE "t2-reduce-size"
  11 #include "ARM.h"
  12 #include "ARMBaseRegisterInfo.h"
  13 #include "ARMBaseInstrInfo.h"
  14 #include "Thumb2InstrInfo.h"
  15 #include "llvm/CodeGen/MachineInstr.h"
  16 #include "llvm/CodeGen/MachineInstrBuilder.h"
  17 #include "llvm/CodeGen/MachineFunctionPass.h"
  18 #include "llvm/Support/Compiler.h"
  19 #include "llvm/Support/Debug.h"
  20 #include "llvm/ADT/DenseMap.h"
  21 #include "llvm/ADT/Statistic.h"
  22 using namespace llvm;
  23
  24 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
  25 STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
  26
  27 namespace {
  28   /// ReduceTable - A static table with information on mapping from wide
  29   /// opcodes to narrow
  30   struct ReduceEntry {
  31     unsigned WideOpc;      // Wide opcode
  32     unsigned NarrowOpc1;   // Narrow opcode to transform to
  33     unsigned NarrowOpc2;   // Narrow opcode when it's two-address
  34     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
  35     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
  36     unsigned LowRegs1 : 1; // Only possible if low-registers are used
  37     unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
  38     unsigned PredCC1  : 1; // 0 - If predicated, cc is on and vice versa.
  39                            // 1 - No cc field.
  40     unsigned PredCC2  : 1;
  41     unsigned Special  : 1; // Needs to be dealt with specially
  42   };
  43
  44   static const ReduceEntry ReduceTable[] = {
  45     // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
  46     { ARM::t2ADCrr, ARM::tADC,    0,             0,   0,    1,   0,  0,0, 0 },
  47     // FIXME: t2ADDS variants.
  48     { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
  49     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
  50     { ARM::t2ANDrr, ARM::tAND,    0,             0,   0,    1,   0,  0,0, 0 },
  51     { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
  52     { ARM::t2ASRrr, ARM::tASRrr,  0,             0,   0,    1,   0,  0,0, 0 },
  53     { ARM::t2BICrr, ARM::tBIC,    0,             0,   0,    1,   0,  0,0, 0 },
  54     { ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  1,0, 0 },
  55     { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  1,0, 0 },
  56     { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  1,0, 0 },
  57     { ARM::t2CMPzri,ARM::tCMPzi8, 0,             8,   0,    1,   0,  1,0, 0 },
  58     { ARM::t2CMPzrr,ARM::tCMPzhir,0,             0,   0,    0,   0,  1,0, 0 },
  59     { ARM::t2EORrr, ARM::tEOR,    0,             0,   0,    1,   0,  0,0, 0 },
  60     { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
  61     { ARM::t2LSLrr, ARM::tLSLrr,  0,             0,   0,    1,   0,  0,0, 0 },
  62     { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
  63     { ARM::t2LSRrr, ARM::tLSRrr,  0,             0,   0,    1,   0,  0,0, 0 },
  64     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
  65     // FIXME: Do we need the 16-bit 'S' variant?
  66     // FIXME: t2MOVcc
  67     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
  68     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    1,   0,  0,0, 0 },
  69     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
  70     { ARM::t2ORRrr, ARM::tORR,    0,             0,   0,    1,   0,  0,0, 0 },
  71     { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  0,0, 0 },
  72     { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  0,0, 0 },
  73     { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  0,0, 0 },
  74     { ARM::t2RORrr, ARM::tROR,    0,             0,   0,    1,   0,  0,0, 0 },
  75     // FIXME: T2RSBri immediate must be zero. Also need entry for T2RSBS
  76     //{ ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0 },
  77     { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
  78     { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
  79     { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
  80     { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
  81     { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  1,0, 0 },
  82     { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
  83     { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 }
  84   };
  85
  86   class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass {
  87   public:
  88     static char ID;
  89     Thumb2SizeReduce();
  90
  91     const TargetInstrInfo *TII;
  92
  93     virtual bool runOnMachineFunction(MachineFunction &MF);
  94
  95     virtual const char *getPassName() const {
  96       return "Thumb2 instruction size reduction pass";
  97     }
  98
  99   private:
 100     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
 101     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 102
 103     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
 104     /// instruction.
 105     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 106                        const ReduceEntry &Entry,
 107                        bool LiveCPSR);
 108
 109     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
 110     /// non-two-address instruction.
 111     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 112                         const ReduceEntry &Entry,
 113                         bool LiveCPSR);
 114
 115     /// ReduceMBB - Reduce width of instructions in the specified basic block.
 116     bool ReduceMBB(MachineBasicBlock &MBB);
 117   };
 118   char Thumb2SizeReduce::ID = 0;
 119 }
 120
 121 Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
 122   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
 123     unsigned FromOpc = ReduceTable[i].WideOpc;
 124     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
 125       assert(false && "Duplicated entries?");
 126   }
 127 }
 128
 129 static bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
 130                             bool is2Addr, ARMCC::CondCodes Pred,
 131                             bool LiveCPSR, bool &HasCC, bool &CCDead) {
 132   if ((is2Addr  && Entry.PredCC2 == 0) ||
 133       (!is2Addr && Entry.PredCC1 == 0)) {
 134     if (Pred == ARMCC::AL) {
 135       // Not predicated, must set CPSR.
 136       if (!HasCC) {
 137         // Original instruction was not setting CPSR, but CPSR is not
 138         // currently live anyway. It's ok to set it. The CPSR def is
 139         // dead though.
 140         if (!LiveCPSR) {
 141           HasCC = true;
 142           CCDead = true;
 143           return true;
 144         }
 145         return false;
 146       }
 147     } else {
 148       // Predicated, must not set CPSR.
 149       if (HasCC)
 150         return false;
 151     }
 152   } else {
 153     // 16-bit instruction does not set CPSR.
 154     if (HasCC)
 155       return false;
 156   }
 157
 158   return true;
 159 }
 160
 161 bool
 162 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 163                                 const ReduceEntry &Entry,
 164                                 bool LiveCPSR) {
 165   const TargetInstrDesc &TID = MI->getDesc();
 166   unsigned Reg0 = MI->getOperand(0).getReg();
 167   unsigned Reg1 = MI->getOperand(1).getReg();
 168   if (Reg0 != Reg1)
 169     return false;
 170   if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
 171     return false;
 172   if (Entry.Imm2Limit) {
 173     unsigned Imm = MI->getOperand(2).getImm();
 174     unsigned Limit = (1 << Entry.Imm2Limit) - 1;
 175     if (Imm > Limit)
 176       return false;
 177   } else {
 178     unsigned Reg2 = MI->getOperand(2).getReg();
 179     if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
 180       return false;
 181   }
 182
 183   // Check if it's possible / necessary to transfer the predicate.
 184   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
 185   unsigned PredReg = 0;
 186   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
 187   bool SkipPred = false;
 188   if (Pred != ARMCC::AL) {
 189     if (!NewTID.isPredicable())
 190       // Can't transfer predicate, fail.
 191       return false;
 192   } else {
 193     SkipPred = !NewTID.isPredicable();
 194   }
 195
 196   bool HasCC = false;
 197   bool CCDead = false;
 198   if (TID.hasOptionalDef()) {
 199     unsigned NumOps = TID.getNumOperands();
 200     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
 201     if (HasCC && MI->getOperand(NumOps-1).isDead())
 202       CCDead = true;
 203   }
 204   if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
 205     return false;
 206
 207   // Add the 16-bit instruction.
 208   DebugLoc dl = MI->getDebugLoc();
 209   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc2));
 210   MIB.addOperand(MI->getOperand(0));
 211   if (HasCC)
 212     AddDefaultT1CC(MIB, CCDead);
 213
 214   // Transfer the rest of operands.
 215   unsigned NumOps = TID.getNumOperands();
 216   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
 217     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
 218       continue;
 219     if (SkipPred && TID.OpInfo[i].isPredicate())
 220       continue;
 221     MIB.addOperand(MI->getOperand(i));
 222   }
 223
 224   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
 225
 226   MBB.erase(MI);
 227   ++Num2Addrs;
 228   return true;
 229 }
 230
 231 bool
 232 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 233                                  const ReduceEntry &Entry,
 234                                  bool LiveCPSR) {
 235   unsigned Limit = ~0U;
 236   if (Entry.Imm1Limit)
 237     Limit = (1 << Entry.Imm1Limit) - 1;
 238
 239   const TargetInstrDesc &TID = MI->getDesc();
 240   for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
 241     if (TID.OpInfo[i].isPredicate())
 242       continue;
 243     const MachineOperand &MO = MI->getOperand(i);
 244     if (MO.isReg()) {
 245       unsigned Reg = MO.getReg();
 246       if (!Reg || Reg == ARM::CPSR)
 247         continue;
 248       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
 249         return false;
 250     } else if (MO.isImm()) {
 251       if (MO.getImm() > Limit)
 252         return false;
 253     }
 254   }
 255
 256   // Check if it's possible / necessary to transfer the predicate.
 257   const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
 258   unsigned PredReg = 0;
 259   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
 260   bool SkipPred = false;
 261   if (Pred != ARMCC::AL) {
 262     if (!NewTID.isPredicable())
 263       // Can't transfer predicate, fail.
 264       return false;
 265   } else {
 266     SkipPred = !NewTID.isPredicable();
 267   }
 268
 269   bool HasCC = false;
 270   bool CCDead = false;
 271   if (TID.hasOptionalDef()) {
 272     unsigned NumOps = TID.getNumOperands();
 273     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
 274     if (HasCC && MI->getOperand(NumOps-1).isDead())
 275       CCDead = true;
 276   }
 277   if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
 278     return false;
 279
 280   // Add the 16-bit instruction.
 281   DebugLoc dl = MI->getDebugLoc();
 282   MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Entry.NarrowOpc1));
 283   MIB.addOperand(MI->getOperand(0));
 284   if (HasCC)
 285     AddDefaultT1CC(MIB, CCDead);
 286
 287   // Transfer the rest of operands.
 288   unsigned NumOps = TID.getNumOperands();
 289   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
 290     if (i < NumOps && TID.OpInfo[i].isOptionalDef())
 291       continue;
 292     if (SkipPred && TID.OpInfo[i].isPredicate())
 293       continue;
 294     MIB.addOperand(MI->getOperand(i));
 295   }
 296
 297
 298   DOUT << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB;
 299
 300   MBB.erase(MI);
 301   ++NumNarrows;
 302   return true;
 303 }
 304
 305 static bool UpdateCPSRLiveness(MachineInstr &MI, bool LiveCPSR) {
 306   bool HasDef = false;
 307   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 308     const MachineOperand &MO = MI.getOperand(i);
 309     if (!MO.isReg() || MO.isUndef())
 310       continue;
 311     if (MO.getReg() != ARM::CPSR)
 312       continue;
 313     if (MO.isDef()) {
 314       if (!MO.isDead())
 315         HasDef = true;
 316       continue;
 317     }
 318
 319     assert(LiveCPSR && "CPSR liveness tracking is wrong!");
 320     if (MO.isKill()) {
 321       LiveCPSR = false;
 322       break;
 323     }
 324   }
 325
 326   return HasDef || LiveCPSR;
 327 }
 328
 329 bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 330   bool Modified = false;
 331
 332   bool LiveCPSR = false;
 333   // Yes, CPSR could be livein.
 334   for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(),
 335          E = MBB.livein_end(); I != E; ++I) {
 336     if (*I == ARM::CPSR) {
 337       LiveCPSR = true;
 338       break;
 339     }
 340   }
 341
 342   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
 343   MachineBasicBlock::iterator NextMII = next(MII);
 344   for (; MII != E; MII = NextMII) {
 345     NextMII = next(MII);
 346
 347     MachineInstr *MI = &*MII;
 348     unsigned Opcode = MI->getOpcode();
 349     DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
 350     if (OPI != ReduceOpcodeMap.end()) {
 351       const ReduceEntry &Entry = ReduceTable[OPI->second];
 352       // Ignore "special" cases for now.
 353       if (Entry.Special)
 354         goto ProcessNext;
 355
 356       // Try to transform to a 16-bit two-address instruction.
 357       if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
 358         Modified = true;
 359         MachineBasicBlock::iterator I = prior(NextMII);
 360         MI = &*I;
 361         goto ProcessNext;
 362       }
 363
 364       // Try to transform ro a 16-bit non-two-address instruction.
 365       if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR))
 366         Modified = true;
 367     }
 368
 369   ProcessNext:
 370     LiveCPSR = UpdateCPSRLiveness(*MI, LiveCPSR);
 371   }
 372
 373   return Modified;
 374 }
 375
 376 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
 377   const TargetMachine &TM = MF.getTarget();
 378   TII = TM.getInstrInfo();
 379
 380   bool Modified = false;
 381   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
 382     Modified |= ReduceMBB(*I);
 383   return Modified;
 384 }
 385
 386 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
 387 /// reduction pass.
 388 FunctionPass *llvm::createThumb2SizeReductionPass() {
 389   return new Thumb2SizeReduce();
 390 }