On some ARM cpus, flags setting movs with shifter operand, i.e. lsl, lsr, asr,

author Evan Cheng <evan.cheng@apple.com>

Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)
author Evan Cheng <evan.cheng@apple.com>
Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 5ea251a795f4b8de760621c9a1155678eafa32f9..45a65fd6f103d7b43bdf3d9f9642295c2df4ae37 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                 "AvoidCPSRPartialUpdate", "true",
                                   "Avoid CPSR partial update for OOO execution">;
  
+def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
+                                            "AvoidMOVsShifterOperand", "true",
+                                "Avoid movs instructions with shifter operand">;
+
  // Some processors perform return stack prediction. CodeGen should avoid issue
  // "normal" call instructions to callees which do not return.
  def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
@@ -152,6 +156,7 @@ def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                     [FeatureNEONForFP, FeatureT2XtPk,
                                      FeatureVFP4, FeatureMP, FeatureHWDiv,
                                      FeatureHWDivARM, FeatureAvoidPartialCPSR,
+                                    FeatureAvoidMOVsShOp,
                                      FeatureHasSlowFPVMLx]>;
  
  // FIXME: It has not been determined if A15 has these features.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index 2766e7b1b5630dfd65b1b27ce2bf700cc44a9ccd..60427272c67b5ac0e9ecc73ca7c5e817f8183e77 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -74,6 +74,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
    , HasDataBarrier(false)
    , Pref32BitThumb(false)
    , AvoidCPSRPartialUpdate(false)
+  , AvoidMOVsShifterOperand(false)
    , HasRAS(false)
    , HasMPExtension(false)
    , FPOnlySP(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index c7e3adcb512202e923bf66db7cfdecd559af42fc..b96e738a6cee70e517a58816f73edadcc67c5142 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -131,6 +131,10 @@ protected:
    /// CPSR setting instruction.
    bool AvoidCPSRPartialUpdate;
  
+  /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
+  /// movs with shifter operand (i.e. asr, lsl, lsr).
+  bool AvoidMOVsShifterOperand;
+
    /// HasRAS - Some processors perform return stack prediction. CodeGen should
    /// avoid issue "normal" call instructions to callees which do not return.
    bool HasRAS;
@@ -232,6 +236,7 @@ protected:
    bool isFPOnlySP() const { return FPOnlySP; }
    bool prefers32BitThumb() const { return Pref32BitThumb; }
    bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+  bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
    bool hasRAS() const { return HasRAS; }
    bool hasMPExtension() const { return HasMPExtension; }
    bool hasThumb2DSP() const { return Thumb2DSP; }
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp

index fb0df3edcbd8e74b0fc704bbf53a58ca983b30e2..a4f0847450927e7df10ddc087d3ff5e57a6cbd46 100644 (file)
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -53,81 +53,82 @@ namespace {
      unsigned PredCC2  : 2;
      unsigned PartFlag : 1; // 16-bit instruction does partial flag update
      unsigned Special  : 1; // Needs to be dealt with specially
+    unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
    };
  
    static const ReduceEntry ReduceTable[] = {
-    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
-    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
-    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
-    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
-    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
-    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 1,0 },
-    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
-    //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 0,1 },
-    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 1,0 },
-    // FIXME: adr.n immediate offset must be multiple of 4.
-    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 1,0 },
-    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 1,0 },
-    // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
-    // likely to cause issue in the loop. As a size / performance workaround,
-    // they are not marked as such.
-    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
-    // FIXME: Do we need the 16-bit 'S' variant?
-    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
-    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0,0 },
-    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 1,0 },
-    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 0,1 },
-    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0,0 },
-    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0,0 },
-    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0,0 },
-    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0,0 },
-    { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0,1 },
-    { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0,1 },
-
-    // FIXME: Clean this up after splitting each Thumb load / store opcode
-    // into multiple ones.
-    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 0,1 },
-    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 0,1 },
-
-    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 0,1 },
-    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
-    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 0,1 },
-    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 0,1 },
+  // Wide,        Narrow1,      Narrow2,     imm1,imm2, lo1, lo2, P/C,PF,S,AM
+  { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,   0,   1,  0,0, 0,0,0 },
+  { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  0,0, 0,1,0 },
+  { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,   1,   0,  0,1, 0,0,0 },
+  { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,   1,   1,  2,2, 0,1,0 },
+  { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,   1,   0,  2,0, 0,1,0 },
+  { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,   0,   1,  0,0, 1,0,0 },
+  //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+  //{ ARM::t2CMNrr, ARM::tCMN,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMNzrr, ARM::tCMNz,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,   0,   0,  2,0, 0,1,0 },
+  { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  // FIXME: adr.n immediate offset must be multiple of 4.
+  //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,   0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,   1,   0,  0,0, 1,0,1 },
+  { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,   0,   1,  0,0, 1,0,1 },
+  // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+  // likely to cause issue in the loop. As a size / performance workaround,
+  // they are not marked as such.
+  { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 0,0,0 },
+  { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,   1,   0,  0,0, 0,1,0 },
+  // FIXME: Do we need the 16-bit 'S' variant?
+  { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,   0,   0,  1,0, 0,0,0 },
+  { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,   1,   0,  0,0, 0,0,0 },
+  { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2REV,   ARM::tREV,    0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2REV16, ARM::tREV16,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,   1,   0,  1,0, 0,0,0 },
+  { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,   0,   1,  0,0, 1,0,0 },
+  { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
+  { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,   0,   1,  0,0, 0,0,0 },
+  { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  0,0, 0,0,0 },
+  { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,   1,   0,  0,0, 0,0,0 },
+  { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,   1,   1,  2,2, 0,0,0 },
+  { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
+  { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+
+  // FIXME: Clean this up after splitting each Thumb load / store opcode
+  // into multiple ones.
+  { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,   1,   0,  0,0, 0,1,0 },
+  { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,   1,   0,  0,0, 0,1,0 },
+
+  { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
+  // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+  { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
+  { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
    };
  
    class Thumb2SizeReduce : public MachineFunctionPass {
@@ -184,13 +185,14 @@ namespace {
      /// ReduceMBB - Reduce width of instructions in the specified basic block.
      bool ReduceMBB(MachineBasicBlock &MBB);
  
+    bool OptimizeSize;
      bool MinimizeSize;
    };
    char Thumb2SizeReduce::ID = 0;
  }
  
  Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
-  MinimizeSize = false;
+  OptimizeSize = MinimizeSize = false;
    for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
      unsigned FromOpc = ReduceTable[i].WideOpc;
      if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -587,7 +589,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
      // are prioritized, but the table assumes a unique entry for each
      // source insn opcode. So for now, we hack a local entry record to use.
      static const ReduceEntry NarrowEntry =
-      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
      if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
        return true;
      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
@@ -605,6 +607,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
    if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
      return false;
  
+  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+      STI->avoidMOVsShifterOperand())
+    // Don't issue movs with shifter operand for some CPUs unless we
+    // are optimizing / minimizing for size.
+    return false;
+
    unsigned Reg0 = MI->getOperand(0).getReg();
    unsigned Reg1 = MI->getOperand(1).getReg();
    // t2MUL is "special". The tied source operand is second, not first.
@@ -717,6 +725,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
    if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
      return false;
  
+  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
+      STI->avoidMOVsShifterOperand())
+    // Don't issue movs with shifter operand for some CPUs unless we
+    // are optimizing / minimizing for size.
+    return false;
+
    unsigned Limit = ~0U;
    if (Entry.Imm1Limit)
      Limit = (1 << Entry.Imm1Limit) - 1;
@@ -946,9 +960,10 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
    TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
    STI = &TM.getSubtarget<ARMSubtarget>();
  
-  // When -Oz is set, the function carries MinSize attribute.
-  MinimizeSize =
-    MF.getFunction()->getFnAttributes().hasAttribute(Attribute::MinSize);
+  // Optimizing / minimizing size?
+  Attribute FnAttrs = MF.getFunction()->getFnAttributes();
+  OptimizeSize = FnAttrs.hasAttribute(Attribute::OptimizeForSize);
+  MinimizeSize = FnAttrs.hasAttribute(Attribute::MinSize);
  
    bool Modified = false;
    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/test/CodeGen/Thumb2/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll

index 98854a1205f830c51be9992610d344caf6b3688d..05dd90cfbfedfd355e7f1f420bd1ac0ee3274e9b 100644 (file)
--- a/test/CodeGen/Thumb2/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s --check-prefix=A8
+; RUN: llc < %s -march=thumb -mcpu=swift | FileCheck %s --check-prefix=SWIFT
+
+; rdar://12892707
  
  define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsl
-; CHECK: add.w  r0, r0, r1, lsl #16
+; A8: t2ADDrs_lsl
+; A8: add.w  r0, r0, r1, lsl #16
          %A = shl i32 %Y, 16
          %B = add i32 %X, %A
          ret i32 %B
  }
  
  define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_lsr
-; CHECK: add.w  r0, r0, r1, lsr #16
+; A8: t2ADDrs_lsr
+; A8: add.w  r0, r0, r1, lsr #16
          %A = lshr i32 %Y, 16
          %B = add i32 %X, %A
          ret i32 %B
  }
  
  define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_asr
-; CHECK: add.w  r0, r0, r1, asr #16
+; A8: t2ADDrs_asr
+; A8: add.w  r0, r0, r1, asr #16
          %A = ashr i32 %Y, 16
          %B = add i32 %X, %A
          ret i32 %B
@@ -26,8 +29,8 @@ define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
  
  ; i32 ror(n) = (x >> n) | (x << (32 - n))
  define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
-; CHECK: t2ADDrs_ror
-; CHECK: add.w  r0, r0, r1, ror #16
+; A8: t2ADDrs_ror
+; A8: add.w  r0, r0, r1, ror #16
          %A = lshr i32 %Y, 16
          %B = shl  i32 %Y, 16
          %C = or   i32 %B, %A
@@ -36,13 +39,66 @@ define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
  }
  
  define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
-; CHECK: t2ADDrs_noRegShift
-; CHECK: uxtb r2, r2
-; CHECK: lsls r1, r2
-; CHECK: add  r0, r1
+; A8: t2ADDrs_noRegShift
+; A8: uxtb r2, r2
+; A8: lsls r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift
+; SWIFT-NOT: lsls
+; SWIFT: lsl.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift2(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift2
+; A8: uxtb r2, r2
+; A8: lsrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift2
+; SWIFT-NOT: lsrs
+; SWIFT: lsr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_noRegShift3(i32 %X, i32 %Y, i8 %sh) {
+; A8: t2ADDrs_noRegShift3
+; A8: uxtb r2, r2
+; A8: asrs r1, r2
+; A8: add  r0, r1
+
+; SWIFT: t2ADDrs_noRegShift3
+; SWIFT-NOT: asrs
+; SWIFT: asr.w
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_optsize(i32 %X, i32 %Y, i8 %sh) optsize {
+; SWIFT: t2ADDrs_optsize
+; SWIFT-NOT: lsl.w
+; SWIFT: lsls
          %shift.upgrd.1 = zext i8 %sh to i32
          %A = shl i32 %Y, %shift.upgrd.1
          %B = add i32 %X, %A
          ret i32 %B
  }
  
+define i32 @t2ADDrs_minsize(i32 %X, i32 %Y, i8 %sh) minsize {
+; SWIFT: t2ADDrs_minsize
+; SWIFT-NOT: lsr.w
+; SWIFT: lsrs
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = lshr i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
author	Evan Cheng <evan.cheng@apple.com>
	Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Thu, 20 Dec 2012 19:59:30 +0000 (19:59 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
lib/Target/ARM/Thumb2SizeReduction.cpp		patch \| blob \| history
test/CodeGen/Thumb2/thumb2-shifter.ll		patch \| blob \| history