Prevent ARM assembler from losing a right shift by #32 applied to a register
[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.cpp
index e8b886cb544cab421b0d73edf4a207b74bab7dea..f493438c29c0377b32fb5279e93658d27da5cd59 100644 (file)
@@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load",
 
 enum {
   // Select which memory operand is being unfolded.
-  // (stored in bits 0 - 7)
+  // (stored in bits 0 - 3)
   TB_INDEX_0    = 0,
   TB_INDEX_1    = 1,
   TB_INDEX_2    = 2,
   TB_INDEX_3    = 3,
-  TB_INDEX_MASK = 0xff,
-
-  // Minimum alignment required for load/store.
-  // Used for RegOp->MemOp conversion.
-  // (stored in bits 8 - 15)
-  TB_ALIGN_SHIFT = 8,
-  TB_ALIGN_NONE  =    0 << TB_ALIGN_SHIFT,
-  TB_ALIGN_16    =   16 << TB_ALIGN_SHIFT,
-  TB_ALIGN_32    =   32 << TB_ALIGN_SHIFT,
-  TB_ALIGN_MASK  = 0xff << TB_ALIGN_SHIFT,
+  TB_INDEX_MASK = 0xf,
 
   // Do not insert the reverse map (MemOp -> RegOp) into the table.
   // This may be needed because there is a many -> one mapping.
-  TB_NO_REVERSE   = 1 << 16,
+  TB_NO_REVERSE   = 1 << 4,
 
   // Do not insert the forward map (RegOp -> MemOp) into the table.
   // This is needed for Native Client, which prohibits branch
   // instructions from using a memory operand.
-  TB_NO_FORWARD   = 1 << 17,
+  TB_NO_FORWARD   = 1 << 5,
+
+  TB_FOLDED_LOAD  = 1 << 6,
+  TB_FOLDED_STORE = 1 << 7,
 
-  TB_FOLDED_LOAD  = 1 << 18,
-  TB_FOLDED_STORE = 1 << 19
+  // Minimum alignment required for load/store.
+  // Used for RegOp->MemOp conversion.
+  // (stored in bits 8 - 15)
+  TB_ALIGN_SHIFT = 8,
+  TB_ALIGN_NONE  =    0 << TB_ALIGN_SHIFT,
+  TB_ALIGN_16    =   16 << TB_ALIGN_SHIFT,
+  TB_ALIGN_32    =   32 << TB_ALIGN_SHIFT,
+  TB_ALIGN_MASK  = 0xff << TB_ALIGN_SHIFT
 };
 
 struct X86OpTblEntry {
   uint16_t RegOp;
   uint16_t MemOp;
-  uint32_t Flags;
+  uint16_t Flags;
 };
 
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
@@ -410,14 +410,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL64rri8,      X86::IMUL64rmi8,          0 },
     { X86::Int_COMISDrr,    X86::Int_COMISDrm,        0 },
     { X86::Int_COMISSrr,    X86::Int_COMISSrm,        0 },
-    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm,      TB_ALIGN_16 },
-    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm,      TB_ALIGN_16 },
-    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm,      TB_ALIGN_16 },
-    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm,      TB_ALIGN_16 },
-    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm,      TB_ALIGN_16 },
-    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm,      0 },
     { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm,        0 },
     { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          0 },
+    { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        0 },
+    { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          0 },
     { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      0 },
     { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
     { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
@@ -494,14 +490,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     // AVX 128-bit versions of foldable instructions
     { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
     { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
-    { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm,     TB_ALIGN_16 },
-    { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm,     TB_ALIGN_16 },
-    { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm,     TB_ALIGN_16 },
-    { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm,     TB_ALIGN_16 },
-    { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm,     TB_ALIGN_16 },
-    { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm,     0 },
     { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      0 },
     { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      0 },
+    { X86::VCVTTSD2SI64rr,  X86::VCVTTSD2SI64rm,      0 },
+    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+    { X86::VCVTTSD2SIrr,    X86::VCVTTSD2SIrm,        0 },
+    { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm,    0 },
+    { X86::VCVTTSS2SI64rr,  X86::VCVTTSS2SI64rm,      0 },
+    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+    { X86::VCVTTSS2SIrr,    X86::VCVTTSS2SIrm,        0 },
+    { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm,    0 },
+    { X86::VCVTSD2SI64rr,   X86::VCVTSD2SI64rm,       0 },
+    { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         0 },
+    { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       0 },
+    { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         0 },
     { X86::FsVMOVAPDrr,     X86::VMOVSDrm,            TB_NO_REVERSE },
     { X86::FsVMOVAPSrr,     X86::VMOVSSrm,            TB_NO_REVERSE },
     { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 },
@@ -810,17 +812,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
     { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
     { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
-    { X86::VCVTTSD2SI64rr,    X86::VCVTTSD2SI64rm,     0 },
-    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
-    { X86::VCVTTSD2SIrr,      X86::VCVTTSD2SIrm,       0 },
-    { X86::Int_VCVTTSD2SIrr,  X86::Int_VCVTTSD2SIrm,   0 },
-    { X86::VCVTTSS2SI64rr,    X86::VCVTTSS2SI64rm,     0 },
-    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
-    { X86::VCVTTSS2SIrr,      X86::VCVTTSS2SIrm,       0 },
-    { X86::Int_VCVTTSS2SIrr,  X86::Int_VCVTTSS2SIrm,   0 },
-    { X86::VCVTSD2SI64rr,     X86::VCVTSD2SI64rm,      0 },
-    { X86::VCVTSD2SIrr,       X86::VCVTSD2SIrm,        0 },
-    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQrm,       TB_ALIGN_16 },
+    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQXrm,      TB_ALIGN_16 },
     { X86::VCVTTPS2DQrr,      X86::VCVTTPS2DQrm,       TB_ALIGN_16 },
     { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
     { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
@@ -1127,111 +1119,143 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 
   static const X86OpTblEntry OpTbl3[] = {
     // FMA foldable instructions
-    { X86::VFMADDSSr231r,     X86::VFMADDSSr231m,      0 },
-    { X86::VFMADDSDr231r,     X86::VFMADDSDr231m,      0 },
-    { X86::VFMADDSSr132r,     X86::VFMADDSSr132m,      0 },
-    { X86::VFMADDSDr132r,     X86::VFMADDSDr132m,      0 },
-    { X86::VFMADDSSr213r,     X86::VFMADDSSr213m,      0 },
-    { X86::VFMADDSDr213r,     X86::VFMADDSDr213m,      0 },
-
-    { X86::VFMADDPSr231r,     X86::VFMADDPSr231m,      TB_ALIGN_16 },
-    { X86::VFMADDPDr231r,     X86::VFMADDPDr231m,      TB_ALIGN_16 },
-    { X86::VFMADDPSr132r,     X86::VFMADDPSr132m,      TB_ALIGN_16 },
-    { X86::VFMADDPDr132r,     X86::VFMADDPDr132m,      TB_ALIGN_16 },
-    { X86::VFMADDPSr213r,     X86::VFMADDPSr213m,      TB_ALIGN_16 },
-    { X86::VFMADDPDr213r,     X86::VFMADDPDr213m,      TB_ALIGN_16 },
-    { X86::VFMADDPSr231rY,    X86::VFMADDPSr231mY,     TB_ALIGN_32 },
-    { X86::VFMADDPDr231rY,    X86::VFMADDPDr231mY,     TB_ALIGN_32 },
-    { X86::VFMADDPSr132rY,    X86::VFMADDPSr132mY,     TB_ALIGN_32 },
-    { X86::VFMADDPDr132rY,    X86::VFMADDPDr132mY,     TB_ALIGN_32 },
-    { X86::VFMADDPSr213rY,    X86::VFMADDPSr213mY,     TB_ALIGN_32 },
-    { X86::VFMADDPDr213rY,    X86::VFMADDPDr213mY,     TB_ALIGN_32 },
-
-    { X86::VFNMADDSSr231r,    X86::VFNMADDSSr231m,     0 },
-    { X86::VFNMADDSDr231r,    X86::VFNMADDSDr231m,     0 },
-    { X86::VFNMADDSSr132r,    X86::VFNMADDSSr132m,     0 },
-    { X86::VFNMADDSDr132r,    X86::VFNMADDSDr132m,     0 },
-    { X86::VFNMADDSSr213r,    X86::VFNMADDSSr213m,     0 },
-    { X86::VFNMADDSDr213r,    X86::VFNMADDSDr213m,     0 },
-
-    { X86::VFNMADDPSr231r,    X86::VFNMADDPSr231m,     TB_ALIGN_16 },
-    { X86::VFNMADDPDr231r,    X86::VFNMADDPDr231m,     TB_ALIGN_16 },
-    { X86::VFNMADDPSr132r,    X86::VFNMADDPSr132m,     TB_ALIGN_16 },
-    { X86::VFNMADDPDr132r,    X86::VFNMADDPDr132m,     TB_ALIGN_16 },
-    { X86::VFNMADDPSr213r,    X86::VFNMADDPSr213m,     TB_ALIGN_16 },
-    { X86::VFNMADDPDr213r,    X86::VFNMADDPDr213m,     TB_ALIGN_16 },
-    { X86::VFNMADDPSr231rY,   X86::VFNMADDPSr231mY,    TB_ALIGN_32 },
-    { X86::VFNMADDPDr231rY,   X86::VFNMADDPDr231mY,    TB_ALIGN_32 },
-    { X86::VFNMADDPSr132rY,   X86::VFNMADDPSr132mY,    TB_ALIGN_32 },
-    { X86::VFNMADDPDr132rY,   X86::VFNMADDPDr132mY,    TB_ALIGN_32 },
-    { X86::VFNMADDPSr213rY,   X86::VFNMADDPSr213mY,    TB_ALIGN_32 },
-    { X86::VFNMADDPDr213rY,   X86::VFNMADDPDr213mY,    TB_ALIGN_32 },
-
-    { X86::VFMSUBSSr231r,     X86::VFMSUBSSr231m,      0 },
-    { X86::VFMSUBSDr231r,     X86::VFMSUBSDr231m,      0 },
-    { X86::VFMSUBSSr132r,     X86::VFMSUBSSr132m,      0 },
-    { X86::VFMSUBSDr132r,     X86::VFMSUBSDr132m,      0 },
-    { X86::VFMSUBSSr213r,     X86::VFMSUBSSr213m,      0 },
-    { X86::VFMSUBSDr213r,     X86::VFMSUBSDr213m,      0 },
-
-    { X86::VFMSUBPSr231r,     X86::VFMSUBPSr231m,      TB_ALIGN_16 },
-    { X86::VFMSUBPDr231r,     X86::VFMSUBPDr231m,      TB_ALIGN_16 },
-    { X86::VFMSUBPSr132r,     X86::VFMSUBPSr132m,      TB_ALIGN_16 },
-    { X86::VFMSUBPDr132r,     X86::VFMSUBPDr132m,      TB_ALIGN_16 },
-    { X86::VFMSUBPSr213r,     X86::VFMSUBPSr213m,      TB_ALIGN_16 },
-    { X86::VFMSUBPDr213r,     X86::VFMSUBPDr213m,      TB_ALIGN_16 },
-    { X86::VFMSUBPSr231rY,    X86::VFMSUBPSr231mY,     TB_ALIGN_32 },
-    { X86::VFMSUBPDr231rY,    X86::VFMSUBPDr231mY,     TB_ALIGN_32 },
-    { X86::VFMSUBPSr132rY,    X86::VFMSUBPSr132mY,     TB_ALIGN_32 },
-    { X86::VFMSUBPDr132rY,    X86::VFMSUBPDr132mY,     TB_ALIGN_32 },
-    { X86::VFMSUBPSr213rY,    X86::VFMSUBPSr213mY,     TB_ALIGN_32 },
-    { X86::VFMSUBPDr213rY,    X86::VFMSUBPDr213mY,     TB_ALIGN_32 },
-
-    { X86::VFNMSUBSSr231r,    X86::VFNMSUBSSr231m,     0 },
-    { X86::VFNMSUBSDr231r,    X86::VFNMSUBSDr231m,     0 },
-    { X86::VFNMSUBSSr132r,    X86::VFNMSUBSSr132m,     0 },
-    { X86::VFNMSUBSDr132r,    X86::VFNMSUBSDr132m,     0 },
-    { X86::VFNMSUBSSr213r,    X86::VFNMSUBSSr213m,     0 },
-    { X86::VFNMSUBSDr213r,    X86::VFNMSUBSDr213m,     0 },
-
-    { X86::VFNMSUBPSr231r,    X86::VFNMSUBPSr231m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPDr231r,    X86::VFNMSUBPDr231m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPSr132r,    X86::VFNMSUBPSr132m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPDr132r,    X86::VFNMSUBPDr132m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPSr213r,    X86::VFNMSUBPSr213m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPDr213r,    X86::VFNMSUBPDr213m,     TB_ALIGN_16 },
-    { X86::VFNMSUBPSr231rY,   X86::VFNMSUBPSr231mY,    TB_ALIGN_32 },
-    { X86::VFNMSUBPDr231rY,   X86::VFNMSUBPDr231mY,    TB_ALIGN_32 },
-    { X86::VFNMSUBPSr132rY,   X86::VFNMSUBPSr132mY,    TB_ALIGN_32 },
-    { X86::VFNMSUBPDr132rY,   X86::VFNMSUBPDr132mY,    TB_ALIGN_32 },
-    { X86::VFNMSUBPSr213rY,   X86::VFNMSUBPSr213mY,    TB_ALIGN_32 },
-    { X86::VFNMSUBPDr213rY,   X86::VFNMSUBPDr213mY,    TB_ALIGN_32 },
-
-    { X86::VFMADDSUBPSr231r,  X86::VFMADDSUBPSr231m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPDr231r,  X86::VFMADDSUBPDr231m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPSr132r,  X86::VFMADDSUBPSr132m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPDr132r,  X86::VFMADDSUBPDr132m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPSr213r,  X86::VFMADDSUBPSr213m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPDr213r,  X86::VFMADDSUBPDr213m,   TB_ALIGN_16 },
-    { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY,  TB_ALIGN_32 },
-    { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY,  TB_ALIGN_32 },
-    { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY,  TB_ALIGN_32 },
-    { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY,  TB_ALIGN_32 },
-    { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY,  TB_ALIGN_32 },
-    { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY,  TB_ALIGN_32 },
-
-    { X86::VFMSUBADDPSr231r,  X86::VFMSUBADDPSr231m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPDr231r,  X86::VFMSUBADDPDr231m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPSr132r,  X86::VFMSUBADDPSr132m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPDr132r,  X86::VFMSUBADDPDr132m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPSr213r,  X86::VFMSUBADDPSr213m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPDr213r,  X86::VFMSUBADDPDr213m,   TB_ALIGN_16 },
-    { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY,  TB_ALIGN_32 },
-    { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY,  TB_ALIGN_32 },
-    { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY,  TB_ALIGN_32 },
-    { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY,  TB_ALIGN_32 },
-    { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY,  TB_ALIGN_32 },
-    { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY,  TB_ALIGN_32 },
+    { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         0 },
+    { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         0 },
+    { X86::VFMADDSSr132r,         X86::VFMADDSSr132m,         0 },
+    { X86::VFMADDSDr132r,         X86::VFMADDSDr132m,         0 },
+    { X86::VFMADDSSr213r,         X86::VFMADDSSr213m,         0 },
+    { X86::VFMADDSDr213r,         X86::VFMADDSDr213m,         0 },
+    { X86::VFMADDSSr132r_Int,     X86::VFMADDSSr132m_Int,     0 },
+    { X86::VFMADDSDr132r_Int,     X86::VFMADDSDr132m_Int,     0 },
+
+    { X86::VFMADDPSr231r,         X86::VFMADDPSr231m,         TB_ALIGN_16 },
+    { X86::VFMADDPDr231r,         X86::VFMADDPDr231m,         TB_ALIGN_16 },
+    { X86::VFMADDPSr132r,         X86::VFMADDPSr132m,         TB_ALIGN_16 },
+    { X86::VFMADDPDr132r,         X86::VFMADDPDr132m,         TB_ALIGN_16 },
+    { X86::VFMADDPSr213r,         X86::VFMADDPSr213m,         TB_ALIGN_16 },
+    { X86::VFMADDPDr213r,         X86::VFMADDPDr213m,         TB_ALIGN_16 },
+    { X86::VFMADDPSr231rY,        X86::VFMADDPSr231mY,        TB_ALIGN_32 },
+    { X86::VFMADDPDr231rY,        X86::VFMADDPDr231mY,        TB_ALIGN_32 },
+    { X86::VFMADDPSr132rY,        X86::VFMADDPSr132mY,        TB_ALIGN_32 },
+    { X86::VFMADDPDr132rY,        X86::VFMADDPDr132mY,        TB_ALIGN_32 },
+    { X86::VFMADDPSr213rY,        X86::VFMADDPSr213mY,        TB_ALIGN_32 },
+    { X86::VFMADDPDr213rY,        X86::VFMADDPDr213mY,        TB_ALIGN_32 },
+    { X86::VFMADDPSr132r_Int,     X86::VFMADDPSr132m_Int,     TB_ALIGN_16 },
+    { X86::VFMADDPDr132r_Int,     X86::VFMADDPDr132m_Int,     TB_ALIGN_16 },
+    { X86::VFMADDPSr132rY_Int,    X86::VFMADDPSr132mY_Int,    TB_ALIGN_32 },
+    { X86::VFMADDPDr132rY_Int,    X86::VFMADDPDr132mY_Int,    TB_ALIGN_32 },
+
+    { X86::VFNMADDSSr231r,        X86::VFNMADDSSr231m,        0 },
+    { X86::VFNMADDSDr231r,        X86::VFNMADDSDr231m,        0 },
+    { X86::VFNMADDSSr132r,        X86::VFNMADDSSr132m,        0 },
+    { X86::VFNMADDSDr132r,        X86::VFNMADDSDr132m,        0 },
+    { X86::VFNMADDSSr213r,        X86::VFNMADDSSr213m,        0 },
+    { X86::VFNMADDSDr213r,        X86::VFNMADDSDr213m,        0 },
+    { X86::VFNMADDSSr132r_Int,    X86::VFNMADDSSr132m_Int,    0 },
+    { X86::VFNMADDSDr132r_Int,    X86::VFNMADDSDr132m_Int,    0 },
+
+    { X86::VFNMADDPSr231r,        X86::VFNMADDPSr231m,        TB_ALIGN_16 },
+    { X86::VFNMADDPDr231r,        X86::VFNMADDPDr231m,        TB_ALIGN_16 },
+    { X86::VFNMADDPSr132r,        X86::VFNMADDPSr132m,        TB_ALIGN_16 },
+    { X86::VFNMADDPDr132r,        X86::VFNMADDPDr132m,        TB_ALIGN_16 },
+    { X86::VFNMADDPSr213r,        X86::VFNMADDPSr213m,        TB_ALIGN_16 },
+    { X86::VFNMADDPDr213r,        X86::VFNMADDPDr213m,        TB_ALIGN_16 },
+    { X86::VFNMADDPSr231rY,       X86::VFNMADDPSr231mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPDr231rY,       X86::VFNMADDPDr231mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPSr132rY,       X86::VFNMADDPSr132mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPDr132rY,       X86::VFNMADDPDr132mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPSr213rY,       X86::VFNMADDPSr213mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPDr213rY,       X86::VFNMADDPDr213mY,       TB_ALIGN_32 },
+    { X86::VFNMADDPSr132r_Int,    X86::VFNMADDPSr132m_Int,    TB_ALIGN_16 },
+    { X86::VFNMADDPDr132r_Int,    X86::VFNMADDPDr132m_Int,    TB_ALIGN_16 },
+    { X86::VFNMADDPSr132rY_Int,   X86::VFNMADDPSr132mY_Int,   TB_ALIGN_32 },
+    { X86::VFNMADDPDr132rY_Int,   X86::VFNMADDPDr132mY_Int,   TB_ALIGN_32 },
+
+    { X86::VFMSUBSSr231r,         X86::VFMSUBSSr231m,         0 },
+    { X86::VFMSUBSDr231r,         X86::VFMSUBSDr231m,         0 },
+    { X86::VFMSUBSSr132r,         X86::VFMSUBSSr132m,         0 },
+    { X86::VFMSUBSDr132r,         X86::VFMSUBSDr132m,         0 },
+    { X86::VFMSUBSSr213r,         X86::VFMSUBSSr213m,         0 },
+    { X86::VFMSUBSDr213r,         X86::VFMSUBSDr213m,         0 },
+    { X86::VFMSUBSSr132r_Int,     X86::VFMSUBSSr132m_Int,     0 },
+    { X86::VFMSUBSDr132r_Int,     X86::VFMSUBSDr132m_Int,     0 },
+
+    { X86::VFMSUBPSr231r,         X86::VFMSUBPSr231m,         TB_ALIGN_16 },
+    { X86::VFMSUBPDr231r,         X86::VFMSUBPDr231m,         TB_ALIGN_16 },
+    { X86::VFMSUBPSr132r,         X86::VFMSUBPSr132m,         TB_ALIGN_16 },
+    { X86::VFMSUBPDr132r,         X86::VFMSUBPDr132m,         TB_ALIGN_16 },
+    { X86::VFMSUBPSr213r,         X86::VFMSUBPSr213m,         TB_ALIGN_16 },
+    { X86::VFMSUBPDr213r,         X86::VFMSUBPDr213m,         TB_ALIGN_16 },
+    { X86::VFMSUBPSr231rY,        X86::VFMSUBPSr231mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPDr231rY,        X86::VFMSUBPDr231mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPSr132rY,        X86::VFMSUBPSr132mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPDr132rY,        X86::VFMSUBPDr132mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPSr213rY,        X86::VFMSUBPSr213mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPDr213rY,        X86::VFMSUBPDr213mY,        TB_ALIGN_32 },
+    { X86::VFMSUBPSr132r_Int,     X86::VFMSUBPSr132m_Int,     TB_ALIGN_16 },
+    { X86::VFMSUBPDr132r_Int,     X86::VFMSUBPDr132m_Int,     TB_ALIGN_16 },
+    { X86::VFMSUBPSr132rY_Int,    X86::VFMSUBPSr132mY_Int,    TB_ALIGN_32 },
+    { X86::VFMSUBPDr132rY_Int,    X86::VFMSUBPDr132mY_Int,    TB_ALIGN_32 },
+
+    { X86::VFNMSUBSSr231r,        X86::VFNMSUBSSr231m,        0 },
+    { X86::VFNMSUBSDr231r,        X86::VFNMSUBSDr231m,        0 },
+    { X86::VFNMSUBSSr132r,        X86::VFNMSUBSSr132m,        0 },
+    { X86::VFNMSUBSDr132r,        X86::VFNMSUBSDr132m,        0 },
+    { X86::VFNMSUBSSr213r,        X86::VFNMSUBSSr213m,        0 },
+    { X86::VFNMSUBSDr213r,        X86::VFNMSUBSDr213m,        0 },
+    { X86::VFNMSUBSSr132r_Int,    X86::VFNMSUBSSr132m_Int,    0 },
+    { X86::VFNMSUBSDr132r_Int,    X86::VFNMSUBSDr132m_Int,    0 },
+
+    { X86::VFNMSUBPSr231r,        X86::VFNMSUBPSr231m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPDr231r,        X86::VFNMSUBPDr231m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPSr132r,        X86::VFNMSUBPSr132m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPDr132r,        X86::VFNMSUBPDr132m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPSr213r,        X86::VFNMSUBPSr213m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPDr213r,        X86::VFNMSUBPDr213m,        TB_ALIGN_16 },
+    { X86::VFNMSUBPSr231rY,       X86::VFNMSUBPSr231mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPDr231rY,       X86::VFNMSUBPDr231mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPSr132rY,       X86::VFNMSUBPSr132mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPDr132rY,       X86::VFNMSUBPDr132mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPSr213rY,       X86::VFNMSUBPSr213mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPDr213rY,       X86::VFNMSUBPDr213mY,       TB_ALIGN_32 },
+    { X86::VFNMSUBPSr132r_Int,    X86::VFNMSUBPSr132m_Int,    TB_ALIGN_16 },
+    { X86::VFNMSUBPDr132r_Int,    X86::VFNMSUBPDr132m_Int,    TB_ALIGN_16 },
+    { X86::VFNMSUBPSr132rY_Int,   X86::VFNMSUBPSr132mY_Int,   TB_ALIGN_32 },
+    { X86::VFNMSUBPDr132rY_Int,   X86::VFNMSUBPDr132mY_Int,   TB_ALIGN_32 },
+
+    { X86::VFMADDSUBPSr231r,      X86::VFMADDSUBPSr231m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPDr231r,      X86::VFMADDSUBPDr231m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPSr132r,      X86::VFMADDSUBPSr132m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPDr132r,      X86::VFMADDSUBPDr132m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPSr213r,      X86::VFMADDSUBPSr213m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPDr213r,      X86::VFMADDSUBPDr213m,      TB_ALIGN_16 },
+    { X86::VFMADDSUBPSr231rY,     X86::VFMADDSUBPSr231mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPDr231rY,     X86::VFMADDSUBPDr231mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPSr132rY,     X86::VFMADDSUBPSr132mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPDr132rY,     X86::VFMADDSUBPDr132mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPSr213rY,     X86::VFMADDSUBPSr213mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPDr213rY,     X86::VFMADDSUBPDr213mY,     TB_ALIGN_32 },
+    { X86::VFMADDSUBPSr132r_Int,  X86::VFMADDSUBPSr132m_Int,  TB_ALIGN_16 },
+    { X86::VFMADDSUBPDr132r_Int,  X86::VFMADDSUBPDr132m_Int,  TB_ALIGN_16 },
+    { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
+    { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
+
+    { X86::VFMSUBADDPSr231r,      X86::VFMSUBADDPSr231m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPDr231r,      X86::VFMSUBADDPDr231m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPSr132r,      X86::VFMSUBADDPSr132m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPDr132r,      X86::VFMSUBADDPDr132m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPSr213r,      X86::VFMSUBADDPSr213m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPDr213r,      X86::VFMSUBADDPDr213m,      TB_ALIGN_16 },
+    { X86::VFMSUBADDPSr231rY,     X86::VFMSUBADDPSr231mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPDr231rY,     X86::VFMSUBADDPDr231mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPSr132rY,     X86::VFMSUBADDPSr132mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPDr132rY,     X86::VFMSUBADDPDr132mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPSr213rY,     X86::VFMSUBADDPSr213mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_32 },
+    { X86::VFMSUBADDPSr132r_Int,  X86::VFMSUBADDPSr132m_Int,  TB_ALIGN_16 },
+    { X86::VFMSUBADDPDr132r_Int,  X86::VFMSUBADDPDr132m_Int,  TB_ALIGN_16 },
+    { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
+    { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -2271,6 +2295,37 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
   }
 }
 
+/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and
+/// register size in bytes.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) {
+  static const unsigned Opc[16][3] = {
+    { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
+    { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+    { X86::CMOVB16rr,  X86::CMOVB32rr,  X86::CMOVB64rr  },
+    { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+    { X86::CMOVE16rr,  X86::CMOVE32rr,  X86::CMOVE64rr  },
+    { X86::CMOVG16rr,  X86::CMOVG32rr,  X86::CMOVG64rr  },
+    { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+    { X86::CMOVL16rr,  X86::CMOVL32rr,  X86::CMOVL64rr  },
+    { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+    { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+    { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+    { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+    { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+    { X86::CMOVO16rr,  X86::CMOVO32rr,  X86::CMOVO64rr  },
+    { X86::CMOVP16rr,  X86::CMOVP32rr,  X86::CMOVP64rr  },
+    { X86::CMOVS16rr,  X86::CMOVS32rr,  X86::CMOVS64rr  }
+  };
+
+  assert(CC < 16 && "Can only handle standard cond codes");
+  switch(RegBytes) {
+  default: llvm_unreachable("Illegal register size!");
+  case 2: return Opc[CC][0];
+  case 4: return Opc[CC][1];
+  case 8: return Opc[CC][2];
+  }
+}
+
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   if (!MI->isTerminator()) return false;
 
@@ -2495,6 +2550,55 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return Count;
 }
 
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+                const SmallVectorImpl<MachineOperand> &Cond,
+                unsigned TrueReg, unsigned FalseReg,
+                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+  // Not all subtargets have cmov instructions.
+  if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+    return false;
+  if (Cond.size() != 1)
+    return false;
+  // We cannot do the composite conditions, at least not in SSA form.
+  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+    return false;
+
+  // Check register classes.
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  if (!RC)
+    return false;
+
+  // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+  if (X86::GR16RegClass.hasSubClassEq(RC) ||
+      X86::GR32RegClass.hasSubClassEq(RC) ||
+      X86::GR64RegClass.hasSubClassEq(RC)) {
+    // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+    // Bridge. Probably Ivy Bridge as well.
+    CondCycles = 2;
+    TrueCycles = 2;
+    FalseCycles = 2;
+    return true;
+  }
+
+  // Can't do vectors.
+  return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I, DebugLoc DL,
+                                unsigned DstReg,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned TrueReg, unsigned FalseReg) const {
+   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+   assert(Cond.size() == 1 && "Invalid Cond array");
+   unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+                                  MRI.getRegClass(DstReg)->getSize());
+   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
 /// isHReg - Test if the given register is a physical h register.
 static bool isHReg(unsigned Reg) {
   return X86::GR8_ABCD_HRegClass.contains(Reg);
@@ -2762,40 +2866,307 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
 }
 
 bool X86InstrInfo::
-OptimizeSubInstr(MachineInstr *SubInstr, const MachineRegisterInfo *MRI) const {
-  // If destination is a memory operand, do not perform this optimization.
-  if ((SubInstr->getOpcode() != X86::SUB64rr) &&
-      (SubInstr->getOpcode() != X86::SUB32rr) &&
-      (SubInstr->getOpcode() != X86::SUB16rr) &&
-      (SubInstr->getOpcode() != X86::SUB8rr) &&
-      (SubInstr->getOpcode() != X86::SUB64ri32) &&
-      (SubInstr->getOpcode() != X86::SUB64ri8) &&
-      (SubInstr->getOpcode() != X86::SUB32ri) &&
-      (SubInstr->getOpcode() != X86::SUB32ri8) &&
-      (SubInstr->getOpcode() != X86::SUB16ri) &&
-      (SubInstr->getOpcode() != X86::SUB16ri8) &&
-      (SubInstr->getOpcode() != X86::SUB8ri))
-    return false;
-  unsigned DestReg = SubInstr->getOperand(0).getReg();
-  if (MRI->use_begin(DestReg) != MRI->use_end())
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+               int &CmpMask, int &CmpValue) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri:
+    SrcReg = MI->getOperand(0).getReg();
+    SrcReg2 = 0;
+    CmpMask = ~0;
+    CmpValue = MI->getOperand(1).getImm();
+    return true;
+  case X86::CMP64rr:
+  case X86::CMP32rr:
+  case X86::CMP16rr:
+  case X86::CMP8rr:
+    SrcReg = MI->getOperand(0).getReg();
+    SrcReg2 = MI->getOperand(1).getReg();
+    CmpMask = ~0;
+    CmpValue = 0;
+    return true;
+  }
+  return false;
+}
+
+/// getSwappedConditionForSET - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForSET(unsigned SETOpc) {
+  switch (SETOpc) {
+  default: return 0;
+  case X86::SETEr:  return X86::SETEr;
+  case X86::SETEm:  return X86::SETEm;
+  case X86::SETNEr: return X86::SETNEr;
+  case X86::SETNEm: return X86::SETNEm;
+  case X86::SETLr:  return X86::SETGr;
+  case X86::SETLm:  return X86::SETGm;
+  case X86::SETLEr: return X86::SETGEr;
+  case X86::SETLEm: return X86::SETGEm;
+  case X86::SETGr:  return X86::SETLr;
+  case X86::SETGm:  return X86::SETLm;
+  case X86::SETGEr: return X86::SETLEr;
+  case X86::SETGEm: return X86::SETLEm;
+  case X86::SETBr:  return X86::SETAr;
+  case X86::SETBm:  return X86::SETAm;
+  case X86::SETBEr: return X86::SETAEr;
+  case X86::SETBEm: return X86::SETAEm;
+  case X86::SETAr:  return X86::SETBr;
+  case X86::SETAm:  return X86::SETBm;
+  case X86::SETAEr: return X86::SETBEr;
+  case X86::SETAEm: return X86::SETBEm;
+  }
+}
+
+/// getSwappedConditionForBranch - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForBranch(unsigned BranchOpc) {
+  switch (BranchOpc) {
+  default: return 0;
+  case X86::JE_4:  return X86::JE_4;
+  case X86::JNE_4: return X86::JNE_4;
+  case X86::JL_4:  return X86::JG_4;
+  case X86::JLE_4: return X86::JGE_4;
+  case X86::JG_4:  return X86::JL_4;
+  case X86::JGE_4: return X86::JLE_4;
+  case X86::JB_4:  return X86::JA_4;
+  case X86::JBE_4: return X86::JAE_4;
+  case X86::JA_4:  return X86::JB_4;
+  case X86::JAE_4: return X86::JBE_4;
+  }
+}
+
+/// getSwappedConditionForCMov - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForCMov(unsigned CMovOpc) {
+  switch (CMovOpc) {
+  default: return 0;
+  case X86::CMOVE16rm:  return X86::CMOVE16rm;
+  case X86::CMOVE16rr:  return X86::CMOVE16rr;
+  case X86::CMOVE32rm:  return X86::CMOVE32rm;
+  case X86::CMOVE32rr:  return X86::CMOVE32rr;
+  case X86::CMOVE64rm:  return X86::CMOVE64rm;
+  case X86::CMOVE64rr:  return X86::CMOVE64rr;
+  case X86::CMOVNE16rm: return X86::CMOVNE16rm;
+  case X86::CMOVNE16rr: return X86::CMOVNE16rr;
+  case X86::CMOVNE32rm: return X86::CMOVNE32rm;
+  case X86::CMOVNE32rr: return X86::CMOVNE32rr;
+  case X86::CMOVNE64rm: return X86::CMOVNE64rm;
+  case X86::CMOVNE64rr: return X86::CMOVNE64rr;
+
+  case X86::CMOVL16rm:  return X86::CMOVG16rm;
+  case X86::CMOVL16rr:  return X86::CMOVG16rr;
+  case X86::CMOVL32rm:  return X86::CMOVG32rm;
+  case X86::CMOVL32rr:  return X86::CMOVG32rr;
+  case X86::CMOVL64rm:  return X86::CMOVG64rm;
+  case X86::CMOVL64rr:  return X86::CMOVG64rr;
+  case X86::CMOVLE16rm: return X86::CMOVGE16rm;
+  case X86::CMOVLE16rr: return X86::CMOVGE16rr;
+  case X86::CMOVLE32rm: return X86::CMOVGE32rm;
+  case X86::CMOVLE32rr: return X86::CMOVGE32rr;
+  case X86::CMOVLE64rm: return X86::CMOVGE64rm;
+  case X86::CMOVLE64rr: return X86::CMOVGE64rr;
+
+  case X86::CMOVG16rm:  return X86::CMOVL16rm;
+  case X86::CMOVG16rr:  return X86::CMOVL16rr;
+  case X86::CMOVG32rm:  return X86::CMOVL32rm;
+  case X86::CMOVG32rr:  return X86::CMOVL32rr;
+  case X86::CMOVG64rm:  return X86::CMOVL64rm;
+  case X86::CMOVG64rr:  return X86::CMOVL64rr;
+  case X86::CMOVGE16rm: return X86::CMOVLE16rm;
+  case X86::CMOVGE16rr: return X86::CMOVLE16rr;
+  case X86::CMOVGE32rm: return X86::CMOVLE32rm;
+  case X86::CMOVGE32rr: return X86::CMOVLE32rr;
+  case X86::CMOVGE64rm: return X86::CMOVLE64rm;
+  case X86::CMOVGE64rr: return X86::CMOVLE64rr;
+
+  case X86::CMOVB16rm:  return X86::CMOVA16rm;
+  case X86::CMOVB16rr:  return X86::CMOVA16rr;
+  case X86::CMOVB32rm:  return X86::CMOVA32rm;
+  case X86::CMOVB32rr:  return X86::CMOVA32rr;
+  case X86::CMOVB64rm:  return X86::CMOVA64rm;
+  case X86::CMOVB64rr:  return X86::CMOVA64rr;
+  case X86::CMOVBE16rm: return X86::CMOVAE16rm;
+  case X86::CMOVBE16rr: return X86::CMOVAE16rr;
+  case X86::CMOVBE32rm: return X86::CMOVAE32rm;
+  case X86::CMOVBE32rr: return X86::CMOVAE32rr;
+  case X86::CMOVBE64rm: return X86::CMOVAE64rm;
+  case X86::CMOVBE64rr: return X86::CMOVAE64rr;
+
+  case X86::CMOVA16rm:  return X86::CMOVB16rm;
+  case X86::CMOVA16rr:  return X86::CMOVB16rr;
+  case X86::CMOVA32rm:  return X86::CMOVB32rm;
+  case X86::CMOVA32rr:  return X86::CMOVB32rr;
+  case X86::CMOVA64rm:  return X86::CMOVB64rm;
+  case X86::CMOVA64rr:  return X86::CMOVB64rr;
+  case X86::CMOVAE16rm: return X86::CMOVBE16rm;
+  case X86::CMOVAE16rr: return X86::CMOVBE16rr;
+  case X86::CMOVAE32rm: return X86::CMOVBE32rm;
+  case X86::CMOVAE32rr: return X86::CMOVBE32rr;
+  case X86::CMOVAE64rm: return X86::CMOVBE64rm;
+  case X86::CMOVAE64rr: return X86::CMOVBE64rr;
+  }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// This function can be extended later on.
+/// SrcReg, SrcRegs: register operands for FlagI.
+/// ImmValue: immediate for FlagI if it takes an immediate.
+inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
+                                        unsigned SrcReg2, int ImmValue,
+                                        MachineInstr *OI) {
+  if (((FlagI->getOpcode() == X86::CMP64rr &&
+        OI->getOpcode() == X86::SUB64rr) ||
+       (FlagI->getOpcode() == X86::CMP32rr &&
+        OI->getOpcode() == X86::SUB32rr)||
+       (FlagI->getOpcode() == X86::CMP16rr &&
+        OI->getOpcode() == X86::SUB16rr)||
+       (FlagI->getOpcode() == X86::CMP8rr &&
+        OI->getOpcode() == X86::SUB8rr)) &&
+      ((OI->getOperand(1).getReg() == SrcReg &&
+        OI->getOperand(2).getReg() == SrcReg2) ||
+       (OI->getOperand(1).getReg() == SrcReg2 &&
+        OI->getOperand(2).getReg() == SrcReg)))
+    return true;
+
+  if (((FlagI->getOpcode() == X86::CMP64ri32 &&
+        OI->getOpcode() == X86::SUB64ri32) ||
+       (FlagI->getOpcode() == X86::CMP64ri8 &&
+        OI->getOpcode() == X86::SUB64ri8) ||
+       (FlagI->getOpcode() == X86::CMP32ri &&
+        OI->getOpcode() == X86::SUB32ri) ||
+       (FlagI->getOpcode() == X86::CMP32ri8 &&
+        OI->getOpcode() == X86::SUB32ri8) ||
+       (FlagI->getOpcode() == X86::CMP16ri &&
+        OI->getOpcode() == X86::SUB16ri) ||
+       (FlagI->getOpcode() == X86::CMP16ri8 &&
+        OI->getOpcode() == X86::SUB16ri8) ||
+       (FlagI->getOpcode() == X86::CMP8ri &&
+        OI->getOpcode() == X86::SUB8ri)) &&
+      OI->getOperand(1).getReg() == SrcReg &&
+      OI->getOperand(2).getImm() == ImmValue)
+    return true;
+  return false;
+}
+
+/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// operates on the same source operands and sets flags in the same way as
+/// Compare; remove Compare if possible.
+bool X86InstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+                     int CmpMask, int CmpValue,
+                     const MachineRegisterInfo *MRI) const {
+  // Get the unique definition of SrcReg.
+  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+  if (!MI) return false;
+
+  // CmpInstr is the first instruction of the BB.
+  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+
+  // We are searching for an earlier instruction that can make CmpInstr
+  // redundant and that instruction will be saved in Sub.
+  MachineInstr *Sub = NULL;
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  
+  // We iterate backward, starting from the instruction before CmpInstr and
+  // stop when reaching the definition of a source register or done with the BB.
+  // RI points to the instruction before CmpInstr.
+  // If the definition is in this basic block, RE points to the definition;
+  // otherwise, RE is the rend of the basic block.
+  MachineBasicBlock::reverse_iterator
+      RI = MachineBasicBlock::reverse_iterator(I),
+      RE = CmpInstr->getParent() == MI->getParent() ?
+           MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
+           CmpInstr->getParent()->rend();
+  for (; RI != RE; ++RI) {
+    MachineInstr *Instr = &*RI;
+    // Check whether CmpInstr can be made redundant by the current instruction.
+    if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+      Sub = Instr;
+      break;
+    }
+
+    if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
+        Instr->readsRegister(X86::EFLAGS, TRI))
+      // This instruction modifies or uses EFLAGS.
+      // We can't remove CmpInstr.
+      return false;
+  }
+
+  // Return false if no candidates exist.
+  if (!Sub)
     return false;
 
-  // There is no use of the destination register, we can replace SUB with CMP.
-  switch (SubInstr->getOpcode()) {
-    default: break;
-    case X86::SUB64rr:   SubInstr->setDesc(get(X86::CMP64rr));   break;
-    case X86::SUB32rr:   SubInstr->setDesc(get(X86::CMP32rr));   break;
-    case X86::SUB16rr:   SubInstr->setDesc(get(X86::CMP16rr));   break;
-    case X86::SUB8rr:    SubInstr->setDesc(get(X86::CMP8rr));    break;
-    case X86::SUB64ri32: SubInstr->setDesc(get(X86::CMP64ri32)); break;
-    case X86::SUB64ri8:  SubInstr->setDesc(get(X86::CMP64ri8));  break;
-    case X86::SUB32ri:   SubInstr->setDesc(get(X86::CMP32ri));   break;
-    case X86::SUB32ri8:  SubInstr->setDesc(get(X86::CMP32ri8));  break;
-    case X86::SUB16ri:   SubInstr->setDesc(get(X86::CMP16ri));   break;
-    case X86::SUB16ri8:  SubInstr->setDesc(get(X86::CMP16ri8));  break;
-    case X86::SUB8ri:    SubInstr->setDesc(get(X86::CMP8ri));    break;
+  bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+                    Sub->getOperand(2).getReg() == SrcReg);
+
+  // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
+  // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
+  // If we are done with the basic block, we need to check whether EFLAGS is
+  // live-out.
+  bool IsSafe = false;
+  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+  MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
+  for (++I; I != E; ++I) {
+    const MachineInstr &Instr = *I;
+    if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+      // It is safe to remove CmpInstr if EFLAGS is updated again.
+      IsSafe = true;
+      break;
+    }
+    if (!Instr.readsRegister(X86::EFLAGS, TRI))
+      continue;
+
+    // EFLAGS is used by this instruction.
+    if (IsSwapped) {
+      // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+      // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+      unsigned NewOpc = getSwappedConditionForSET(Instr.getOpcode());
+      if (!NewOpc) NewOpc = getSwappedConditionForBranch(Instr.getOpcode());
+      if (!NewOpc) NewOpc = getSwappedConditionForCMov(Instr.getOpcode());
+      if (!NewOpc) return false;
+
+      // Push the MachineInstr to OpsToUpdate.
+      // If it is safe to remove CmpInstr, the condition code of these
+      // instructions will be modified.
+      OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+    }
+    if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+      IsSafe = true;
+      break;
+    }
   }
-  SubInstr->RemoveOperand(0);
+
+  // If EFLAGS is not killed nor re-defined, we should check whether it is
+  // live-out. If it is live-out, do not optimize.
+  if (IsSwapped && !IsSafe) {
+    MachineBasicBlock *MBB = CmpInstr->getParent();
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+             SE = MBB->succ_end(); SI != SE; ++SI)
+      if ((*SI)->isLiveIn(X86::EFLAGS))
+        return false;
+  }
+
+  // Make sure Sub instruction defines EFLAGS.
+  assert(Sub->getNumOperands() >= 4 && Sub->getOperand(3).isReg() &&
+         Sub->getOperand(3).getReg() == X86::EFLAGS &&
+         "EFLAGS should be the 4th operand of SUBrr or SUBri.");
+  Sub->getOperand(3).setIsDef(true);
+  CmpInstr->eraseFromParent();
+
+  // Modify the condition code of instructions in OpsToUpdate.
+  for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
+    OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
   return true;
 }
 
@@ -4025,9 +4396,6 @@ namespace {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-
-   private:
-    unsigned BaseReg;
   };
 }