Fixed/added namespace ending comments using clang-tidy. NFC

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.cpp
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 57a078ef95c085966f7a4194a3c4e21d6d7d0b1f..05bb019bfa498f868186142f861e8106114f8be1 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -91,7 +91,7 @@ enum {
    TB_ALIGN_MASK  = 0xff << TB_ALIGN_SHIFT
  };
  
-struct X86OpTblEntry {
+struct X86MemoryFoldTableEntry {
    uint16_t RegOp;
    uint16_t MemOp;
    uint16_t Flags;
@@ -104,9 +104,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      : X86GenInstrInfo(
            (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
            (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
-      Subtarget(STI), RI(STI) {
+      Subtarget(STI), RI(STI.getTargetTriple()) {
  
-  static const X86OpTblEntry OpTbl2Addr[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
      { X86::ADC32ri,     X86::ADC32mi,    0 },
      { X86::ADC32ri8,    X86::ADC32mi8,   0 },
      { X86::ADC32rr,     X86::ADC32mr,    0 },
@@ -269,17 +269,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::XOR8rr,      X86::XOR8mr,     0 }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
-    unsigned RegOp = OpTbl2Addr[i].RegOp;
-    unsigned MemOp = OpTbl2Addr[i].MemOp;
-    unsigned Flags = OpTbl2Addr[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable2Addr[i].RegOp;
+    unsigned MemOp = MemoryFoldTable2Addr[i].MemOp;
+    unsigned Flags = MemoryFoldTable2Addr[i].Flags;
      AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
                    RegOp, MemOp,
                    // Index 0, folded load and store, no alignment requirement.
                    Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
    }
  
-  static const X86OpTblEntry OpTbl0[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
      { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
      { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
      { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
@@ -424,15 +424,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp      = OpTbl0[i].RegOp;
-    unsigned MemOp      = OpTbl0[i].MemOp;
-    unsigned Flags      = OpTbl0[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) {
+    unsigned RegOp      = MemoryFoldTable0[i].RegOp;
+    unsigned MemOp      = MemoryFoldTable0[i].MemOp;
+    unsigned Flags      = MemoryFoldTable0[i].Flags;
      AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
                    RegOp, MemOp, TB_INDEX_0 | Flags);
    }
  
-  static const X86OpTblEntry OpTbl1[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
+    { X86::BSF16rr,         X86::BSF16rm,             0 },
+    { X86::BSF32rr,         X86::BSF32rm,             0 },
+    { X86::BSF64rr,         X86::BSF64rm,             0 },
+    { X86::BSR16rr,         X86::BSR16rm,             0 },
+    { X86::BSR32rr,         X86::BSR32rm,             0 },
+    { X86::BSR64rr,         X86::BSR64rm,             0 },
      { X86::CMP16rr,         X86::CMP16rm,             0 },
      { X86::CMP32rr,         X86::CMP32rm,             0 },
      { X86::CMP64rr,         X86::CMP64rm,             0 },
@@ -526,11 +532,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
      { X86::PTESTrr,         X86::PTESTrm,             TB_ALIGN_16 },
      { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
-    { X86::RCPPSr_Int,      X86::RCPPSm_Int,          TB_ALIGN_16 },
+    { X86::RCPSSr,          X86::RCPSSm,              0 },
+    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          0 },
      { X86::ROUNDPDr,        X86::ROUNDPDm,            TB_ALIGN_16 },
      { X86::ROUNDPSr,        X86::ROUNDPSm,            TB_ALIGN_16 },
      { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
-    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int,        TB_ALIGN_16 },
      { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
      { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
      { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
@@ -547,6 +553,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
      { X86::UCOMISSrr,       X86::UCOMISSrm,           0 },
  
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPD2PIirr,   X86::MMX_CVTPD2PIirm,   0 },
+    { X86::MMX_CVTPI2PDirr,   X86::MMX_CVTPI2PDirm,   0 },
+    { X86::MMX_CVTPS2PIirr,   X86::MMX_CVTPS2PIirm,   0 },
+    { X86::MMX_CVTTPD2PIirr,  X86::MMX_CVTTPD2PIirm,  0 },
+    { X86::MMX_CVTTPS2PIirr,  X86::MMX_CVTTPS2PIirm,  0 },
+    { X86::MMX_MOVD64to64rr,  X86::MMX_MOVQ64rm,      0 },
+    { X86::MMX_PABSBrr64,     X86::MMX_PABSBrm64,     0 },
+    { X86::MMX_PABSDrr64,     X86::MMX_PABSDrm64,     0 },
+    { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
+    { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PF2IDrr,         X86::PF2IDrm,             0 },
+    { X86::PF2IWrr,         X86::PF2IWrm,             0 },
+    { X86::PFRCPrr,         X86::PFRCPrm,             0 },
+    { X86::PFRSQRTrr,       X86::PFRSQRTrm,           0 },
+    { X86::PI2FDrr,         X86::PI2FDrm,             0 },
+    { X86::PI2FWrr,         X86::PI2FWrm,             0 },
+    { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
+
      // AVX 128-bit versions of foldable instructions
      { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
      { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
@@ -613,11 +640,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
      { X86::VPTESTrr,        X86::VPTESTrm,            0 },
      { X86::VRCPPSr,         X86::VRCPPSm,             0 },
-    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         0 },
      { X86::VROUNDPDr,       X86::VROUNDPDm,           0 },
      { X86::VROUNDPSr,       X86::VROUNDPSm,           0 },
      { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
-    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       0 },
      { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
      { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
      { X86::VTESTPDrr,       X86::VTESTPDrm,           0 },
@@ -646,17 +671,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
      { X86::VPTESTYrr,       X86::VPTESTYrm,           0 },
      { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
-    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
      { X86::VROUNDYPDr,      X86::VROUNDYPDm,          0 },
      { X86::VROUNDYPSr,      X86::VROUNDYPSm,          0 },
      { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
-    { X86::VRSQRTPSYr_Int,  X86::VRSQRTPSYm_Int,      0 },
      { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
      { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
      { X86::VTESTPDYrr,      X86::VTESTPDYrm,          0 },
      { X86::VTESTPSYrr,      X86::VTESTPSYrm,          0 },
  
      // AVX2 foldable instructions
+
+    // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
+    // VBROADCASTS{SD}rm memory instructions were available from AVX1.
+    // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
+    // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
+    // so they don't need an equivalent limitation.
      { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
      { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
      { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
@@ -833,17 +862,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
-    unsigned RegOp = OpTbl1[i].RegOp;
-    unsigned MemOp = OpTbl1[i].MemOp;
-    unsigned Flags = OpTbl1[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable1[i].RegOp;
+    unsigned MemOp = MemoryFoldTable1[i].MemOp;
+    unsigned Flags = MemoryFoldTable1[i].Flags;
      AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
                    RegOp, MemOp,
                    // Index 1, folded load
                    Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
    }
  
-  static const X86OpTblEntry OpTbl2[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
      { X86::ADC32rr,         X86::ADC32rm,       0 },
      { X86::ADC64rr,         X86::ADC64rm,       0 },
      { X86::ADD16rr,         X86::ADD16rm,       0 },
@@ -925,6 +954,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
      { X86::CMPSDrr,         X86::CMPSDrm,       0 },
      { X86::CMPSSrr,         X86::CMPSSrm,       0 },
+    { X86::CRC32r32r32,     X86::CRC32r32m32,   0 },
+    { X86::CRC32r64r64,     X86::CRC32r64m64,   0 },
      { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
      { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
      { X86::DIVSDrr,         X86::DIVSDrm,       0 },
@@ -933,6 +964,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::DIVSSrr_Int,     X86::DIVSSrm_Int,   0 },
      { X86::DPPDrri,         X86::DPPDrmi,       TB_ALIGN_16 },
      { X86::DPPSrri,         X86::DPPSrmi,       TB_ALIGN_16 },
+
+    // FIXME: We should not be folding Fs* scalar loads into vector
+    // instructions because the vector instructions require vector-sized
+    // loads. Lowering should create vector-sized instructions (the Fv*
+    // variants below) to allow load folding.
      { X86::FsANDNPDrr,      X86::FsANDNPDrm,    TB_ALIGN_16 },
      { X86::FsANDNPSrr,      X86::FsANDNPSrm,    TB_ALIGN_16 },
      { X86::FsANDPDrr,       X86::FsANDPDrm,     TB_ALIGN_16 },
@@ -941,6 +977,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::FsORPSrr,        X86::FsORPSrm,      TB_ALIGN_16 },
      { X86::FsXORPDrr,       X86::FsXORPDrm,     TB_ALIGN_16 },
      { X86::FsXORPSrr,       X86::FsXORPSrm,     TB_ALIGN_16 },
+
+    { X86::FvANDNPDrr,      X86::FvANDNPDrm,    TB_ALIGN_16 },
+    { X86::FvANDNPSrr,      X86::FvANDNPSrm,    TB_ALIGN_16 },
+    { X86::FvANDPDrr,       X86::FvANDPDrm,     TB_ALIGN_16 },
+    { X86::FvANDPSrr,       X86::FvANDPSrm,     TB_ALIGN_16 },
+    { X86::FvORPDrr,        X86::FvORPDrm,      TB_ALIGN_16 },
+    { X86::FvORPSrr,        X86::FvORPSrm,      TB_ALIGN_16 },
+    { X86::FvXORPDrr,       X86::FvXORPDrm,     TB_ALIGN_16 },
+    { X86::FvXORPSrr,       X86::FvXORPSrm,     TB_ALIGN_16 },
      { X86::HADDPDrr,        X86::HADDPDrm,      TB_ALIGN_16 },
      { X86::HADDPSrr,        X86::HADDPSrm,      TB_ALIGN_16 },
      { X86::HSUBPDrr,        X86::HSUBPDrm,      TB_ALIGN_16 },
@@ -1097,6 +1142,97 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 },
      { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 },
  
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPI2PSirr,   X86::MMX_CVTPI2PSirm,   0 },
+    { X86::MMX_PACKSSDWirr,   X86::MMX_PACKSSDWirm,   0 },
+    { X86::MMX_PACKSSWBirr,   X86::MMX_PACKSSWBirm,   0 },
+    { X86::MMX_PACKUSWBirr,   X86::MMX_PACKUSWBirm,   0 },
+    { X86::MMX_PADDBirr,      X86::MMX_PADDBirm,      0 },
+    { X86::MMX_PADDDirr,      X86::MMX_PADDDirm,      0 },
+    { X86::MMX_PADDQirr,      X86::MMX_PADDQirm,      0 },
+    { X86::MMX_PADDSBirr,     X86::MMX_PADDSBirm,     0 },
+    { X86::MMX_PADDSWirr,     X86::MMX_PADDSWirm,     0 },
+    { X86::MMX_PADDUSBirr,    X86::MMX_PADDUSBirm,    0 },
+    { X86::MMX_PADDUSWirr,    X86::MMX_PADDUSWirm,    0 },
+    { X86::MMX_PADDWirr,      X86::MMX_PADDWirm,      0 },
+    { X86::MMX_PALIGNR64irr,  X86::MMX_PALIGNR64irm,  0 },
+    { X86::MMX_PANDNirr,      X86::MMX_PANDNirm,      0 },
+    { X86::MMX_PANDirr,       X86::MMX_PANDirm,       0 },
+    { X86::MMX_PAVGBirr,      X86::MMX_PAVGBirm,      0 },
+    { X86::MMX_PAVGWirr,      X86::MMX_PAVGWirm,      0 },
+    { X86::MMX_PCMPEQBirr,    X86::MMX_PCMPEQBirm,    0 },
+    { X86::MMX_PCMPEQDirr,    X86::MMX_PCMPEQDirm,    0 },
+    { X86::MMX_PCMPEQWirr,    X86::MMX_PCMPEQWirm,    0 },
+    { X86::MMX_PCMPGTBirr,    X86::MMX_PCMPGTBirm,    0 },
+    { X86::MMX_PCMPGTDirr,    X86::MMX_PCMPGTDirm,    0 },
+    { X86::MMX_PCMPGTWirr,    X86::MMX_PCMPGTWirm,    0 },
+    { X86::MMX_PHADDSWrr64,   X86::MMX_PHADDSWrm64,   0 },
+    { X86::MMX_PHADDWrr64,    X86::MMX_PHADDWrm64,    0 },
+    { X86::MMX_PHADDrr64,     X86::MMX_PHADDrm64,     0 },
+    { X86::MMX_PHSUBDrr64,    X86::MMX_PHSUBDrm64,    0 },
+    { X86::MMX_PHSUBSWrr64,   X86::MMX_PHSUBSWrm64,   0 },
+    { X86::MMX_PHSUBWrr64,    X86::MMX_PHSUBWrm64,    0 },
+    { X86::MMX_PINSRWirri,    X86::MMX_PINSRWirmi,    0 },
+    { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
+    { X86::MMX_PMADDWDirr,    X86::MMX_PMADDWDirm,    0 },
+    { X86::MMX_PMAXSWirr,     X86::MMX_PMAXSWirm,     0 },
+    { X86::MMX_PMAXUBirr,     X86::MMX_PMAXUBirm,     0 },
+    { X86::MMX_PMINSWirr,     X86::MMX_PMINSWirm,     0 },
+    { X86::MMX_PMINUBirr,     X86::MMX_PMINUBirm,     0 },
+    { X86::MMX_PMULHRSWrr64,  X86::MMX_PMULHRSWrm64,  0 },
+    { X86::MMX_PMULHUWirr,    X86::MMX_PMULHUWirm,    0 },
+    { X86::MMX_PMULHWirr,     X86::MMX_PMULHWirm,     0 },
+    { X86::MMX_PMULLWirr,     X86::MMX_PMULLWirm,     0 },
+    { X86::MMX_PMULUDQirr,    X86::MMX_PMULUDQirm,    0 },
+    { X86::MMX_PORirr,        X86::MMX_PORirm,        0 },
+    { X86::MMX_PSADBWirr,     X86::MMX_PSADBWirm,     0 },
+    { X86::MMX_PSHUFBrr64,    X86::MMX_PSHUFBrm64,    0 },
+    { X86::MMX_PSIGNBrr64,    X86::MMX_PSIGNBrm64,    0 },
+    { X86::MMX_PSIGNDrr64,    X86::MMX_PSIGNDrm64,    0 },
+    { X86::MMX_PSIGNWrr64,    X86::MMX_PSIGNWrm64,    0 },
+    { X86::MMX_PSLLDrr,       X86::MMX_PSLLDrm,       0 },
+    { X86::MMX_PSLLQrr,       X86::MMX_PSLLQrm,       0 },
+    { X86::MMX_PSLLWrr,       X86::MMX_PSLLWrm,       0 },
+    { X86::MMX_PSRADrr,       X86::MMX_PSRADrm,       0 },
+    { X86::MMX_PSRAWrr,       X86::MMX_PSRAWrm,       0 },
+    { X86::MMX_PSRLDrr,       X86::MMX_PSRLDrm,       0 },
+    { X86::MMX_PSRLQrr,       X86::MMX_PSRLQrm,       0 },
+    { X86::MMX_PSRLWrr,       X86::MMX_PSRLWrm,       0 },
+    { X86::MMX_PSUBBirr,      X86::MMX_PSUBBirm,      0 },
+    { X86::MMX_PSUBDirr,      X86::MMX_PSUBDirm,      0 },
+    { X86::MMX_PSUBQirr,      X86::MMX_PSUBQirm,      0 },
+    { X86::MMX_PSUBSBirr,     X86::MMX_PSUBSBirm,     0 },
+    { X86::MMX_PSUBSWirr,     X86::MMX_PSUBSWirm,     0 },
+    { X86::MMX_PSUBUSBirr,    X86::MMX_PSUBUSBirm,    0 },
+    { X86::MMX_PSUBUSWirr,    X86::MMX_PSUBUSWirm,    0 },
+    { X86::MMX_PSUBWirr,      X86::MMX_PSUBWirm,      0 },
+    { X86::MMX_PUNPCKHBWirr,  X86::MMX_PUNPCKHBWirm,  0 },
+    { X86::MMX_PUNPCKHDQirr,  X86::MMX_PUNPCKHDQirm,  0 },
+    { X86::MMX_PUNPCKHWDirr,  X86::MMX_PUNPCKHWDirm,  0 },
+    { X86::MMX_PUNPCKLBWirr,  X86::MMX_PUNPCKLBWirm,  0 },
+    { X86::MMX_PUNPCKLDQirr,  X86::MMX_PUNPCKLDQirm,  0 },
+    { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
+    { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PAVGUSBrr,         X86::PAVGUSBrm,         0 },
+    { X86::PFACCrr,           X86::PFACCrm,           0 },
+    { X86::PFADDrr,           X86::PFADDrm,           0 },
+    { X86::PFCMPEQrr,         X86::PFCMPEQrm,         0 },
+    { X86::PFCMPGErr,         X86::PFCMPGErm,         0 },
+    { X86::PFCMPGTrr,         X86::PFCMPGTrm,         0 },
+    { X86::PFMAXrr,           X86::PFMAXrm,           0 },
+    { X86::PFMINrr,           X86::PFMINrm,           0 },
+    { X86::PFMULrr,           X86::PFMULrm,           0 },
+    { X86::PFNACCrr,          X86::PFNACCrm,          0 },
+    { X86::PFPNACCrr,         X86::PFPNACCrm,         0 },
+    { X86::PFRCPIT1rr,        X86::PFRCPIT1rm,        0 },
+    { X86::PFRCPIT2rr,        X86::PFRCPIT2rm,        0 },
+    { X86::PFRSQIT1rr,        X86::PFRSQIT1rm,        0 },
+    { X86::PFSUBrr,           X86::PFSUBrm,           0 },
+    { X86::PFSUBRrr,          X86::PFSUBRrm,          0 },
+    { X86::PMULHRWrr,         X86::PMULHRWrm,         0 },
+
      // AVX 128-bit versions of foldable instructions
      { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
      { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
@@ -1111,9 +1247,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
      { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
      { X86::VRCPSSr,           X86::VRCPSSm,            0 },
+    { X86::VRCPSSr_Int,       X86::VRCPSSm_Int,        0 },
      { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
+    { X86::VRSQRTSSr_Int,     X86::VRSQRTSSm_Int,      0 },
      { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
+    { X86::VSQRTSDr_Int,      X86::VSQRTSDm_Int,       0 },
      { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
+    { X86::VSQRTSSr_Int,      X86::VSQRTSSm_Int,       0 },
      { X86::VADDPDrr,          X86::VADDPDrm,           0 },
      { X86::VADDPSrr,          X86::VADDPSrm,           0 },
      { X86::VADDSDrr,          X86::VADDSDrm,           0 },
@@ -1142,14 +1282,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VDIVSSrr_Int,      X86::VDIVSSrm_Int,       0 },
      { X86::VDPPDrri,          X86::VDPPDrmi,           0 },
      { X86::VDPPSrri,          X86::VDPPSrmi,           0 },
-    { X86::VFsANDNPDrr,       X86::VFsANDNPDrm,        0 },
-    { X86::VFsANDNPSrr,       X86::VFsANDNPSrm,        0 },
-    { X86::VFsANDPDrr,        X86::VFsANDPDrm,         0 },
-    { X86::VFsANDPSrr,        X86::VFsANDPSrm,         0 },
-    { X86::VFsORPDrr,         X86::VFsORPDrm,          0 },
-    { X86::VFsORPSrr,         X86::VFsORPSrm,          0 },
-    { X86::VFsXORPDrr,        X86::VFsXORPDrm,         0 },
-    { X86::VFsXORPSrr,        X86::VFsXORPSrm,         0 },
+    // Do not fold VFs* loads because there are no scalar load variants for
+    // these instructions. When folded, the load is required to be 128-bits, so
+    // the load size would not match.
+    { X86::VFvANDNPDrr,       X86::VFvANDNPDrm,        0 },
+    { X86::VFvANDNPSrr,       X86::VFvANDNPSrm,        0 },
+    { X86::VFvANDPDrr,        X86::VFvANDPDrm,         0 },
+    { X86::VFvANDPSrr,        X86::VFvANDPSrm,         0 },
+    { X86::VFvORPDrr,         X86::VFvORPDrm,          0 },
+    { X86::VFvORPSrr,         X86::VFvORPSrm,          0 },
+    { X86::VFvXORPDrr,        X86::VFvXORPDrm,         0 },
+    { X86::VFvXORPSrr,        X86::VFvXORPSrm,         0 },
      { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
      { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
      { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
@@ -1553,8 +1696,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VPSUBQZrr,         X86::VPSUBQZrm,           0 },
      { X86::VSHUFPDZrri,       X86::VSHUFPDZrmi,         0 },
      { X86::VSHUFPSZrri,       X86::VSHUFPSZrmi,         0 },
-    { X86::VALIGNQrri,        X86::VALIGNQrmi,          0 },
-    { X86::VALIGNDrri,        X86::VALIGNDrmi,          0 },
+    { X86::VALIGNQZrri,       X86::VALIGNQZrmi,         0 },
+    { X86::VALIGNDZrri,       X86::VALIGNDZrmi,         0 },
      { X86::VPMULUDQZrr,       X86::VPMULUDQZrm,         0 },
      { X86::VBROADCASTSSZrkz,  X86::VBROADCASTSSZmkz,    TB_NO_REVERSE },
      { X86::VBROADCASTSDZrkz,  X86::VBROADCASTSDZmkz,    TB_NO_REVERSE },
@@ -1590,17 +1733,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
-    unsigned RegOp = OpTbl2[i].RegOp;
-    unsigned MemOp = OpTbl2[i].MemOp;
-    unsigned Flags = OpTbl2[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable2[i].RegOp;
+    unsigned MemOp = MemoryFoldTable2[i].MemOp;
+    unsigned Flags = MemoryFoldTable2[i].Flags;
      AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
                    RegOp, MemOp,
                    // Index 2, folded load
                    Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
    }
  
-  static const X86OpTblEntry OpTbl3[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
      // FMA foldable instructions
      { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         TB_ALIGN_NONE },
      { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         TB_ALIGN_NONE },
@@ -1806,17 +1949,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
-    unsigned RegOp = OpTbl3[i].RegOp;
-    unsigned MemOp = OpTbl3[i].MemOp;
-    unsigned Flags = OpTbl3[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable3[i].RegOp;
+    unsigned MemOp = MemoryFoldTable3[i].MemOp;
+    unsigned Flags = MemoryFoldTable3[i].Flags;
      AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
                    RegOp, MemOp,
                    // Index 3, folded load
                    Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
    }
  
-  static const X86OpTblEntry OpTbl4[] = {
+  static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
       // AVX-512 foldable instructions
      { X86::VADDPSZrrk,         X86::VADDPSZrmk,           0 },
      { X86::VADDPDZrrk,         X86::VADDPDZrmk,           0 },
@@ -1858,10 +2001,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
      { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 }
    };
  
-  for (unsigned i = 0, e = array_lengthof(OpTbl4); i != e; ++i) {
-    unsigned RegOp = OpTbl4[i].RegOp;
-    unsigned MemOp = OpTbl4[i].MemOp;
-    unsigned Flags = OpTbl4[i].Flags;
+  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) {
+    unsigned RegOp = MemoryFoldTable4[i].RegOp;
+    unsigned MemOp = MemoryFoldTable4[i].MemOp;
+    unsigned Flags = MemoryFoldTable4[i].Flags;
      AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
                    RegOp, MemOp,
                    // Index 4, folded load
@@ -1984,7 +2127,7 @@ int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
    }
  }
  
-/// isFrameOperand - Return true and the FrameIndex if the specified
+/// Return true and the FrameIndex if the specified
  /// operand and follow operands form a reference to the stack frame.
  bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
                                    int &FrameIndex) const {
@@ -2111,8 +2254,7 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
    return 0;
  }
  
-/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
-/// X86::MOVPC32r.
+/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
  static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
    // Don't waste compile time scanning use-def chains of physregs.
    if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
@@ -2308,8 +2450,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
    NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
  }
  
-/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
-/// is not marked dead.
+/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
  static bool hasLiveCondCodeDef(MachineInstr *MI) {
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
@@ -2321,8 +2462,7 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
    return false;
  }
  
-/// getTruncatedShiftCount - check whether the shift count for a machine operand
-/// is non-zero.
+/// Check whether the shift count for a machine operand is non-zero.
  inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
                                                unsigned ShiftAmtOperandIdx) {
    // The shift count is six bits with the REX.W prefix and five bits without.
@@ -2331,7 +2471,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
    return Imm & ShiftCountMask;
  }
  
-/// isTruncatedShiftCountForLEA - check whether the given shift count is appropriate
+/// Check whether the given shift count is appropriate
  /// can be represented by a LEA instruction.
  inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
    // Left shift instructions can be transformed into load-effective-address
@@ -2413,10 +2553,9 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
    return true;
  }
  
-/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
-/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
-/// to a 32-bit superregister and then truncating back down to a 16-bit
-/// subregister.
+/// Helper for convertToThreeAddress when 16-bit LEA is disabled, use 32-bit
+/// LEA to form 3-address code by promoting to a 32-bit superregister and then
+/// truncating back down to a 16-bit subregister.
  MachineInstr *
  X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
                                             MachineFunction::iterator &MFI,
@@ -2523,7 +2662,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
    return ExtMI;
  }
  
-/// convertToThreeAddress - This method must be implemented by targets that
+/// This method must be implemented by targets that
  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
  /// may be able to convert a two-address instruction into a true
  /// three-address instruction on demand.  This allows the X86 target (for
@@ -2798,8 +2937,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    return NewMI;
  }
  
-/// commuteInstruction - We have a few instructions that must be hacked on to
-/// commute them.
+/// We have a few instructions that must be hacked on to commute them.
  ///
  MachineInstr *
  X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
@@ -3090,7 +3228,7 @@ static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
    }
  }
  
-/// getCondFromSETOpc - return condition code of a SET opcode.
+/// Return condition code of a SET opcode.
  static X86::CondCode getCondFromSETOpc(unsigned Opc) {
    switch (Opc) {
    default: return X86::COND_INVALID;
@@ -3113,7 +3251,7 @@ static X86::CondCode getCondFromSETOpc(unsigned Opc) {
    }
  }
  
-/// getCondFromCmovOpc - return condition code of a CMov opcode.
+/// Return condition code of a CMov opcode.
  X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
    switch (Opc) {
    default: return X86::COND_INVALID;
@@ -3190,7 +3328,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
    }
  }
  
-/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// Return the inverse of the specified condition,
  /// e.g. turning COND_E to COND_NE.
  X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
    switch (CC) {
@@ -3214,9 +3352,8 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
    }
  }
  
-/// getSwappedCondition - assume the flags are set by MI(a,b), return
-/// the condition code if we modify the instructions such that flags are
-/// set by MI(b,a).
+/// Assuming the flags are set by MI(a,b), return the condition code if we
+/// modify the instructions such that flags are set by MI(b,a).
  static X86::CondCode getSwappedCondition(X86::CondCode CC) {
    switch (CC) {
    default: return X86::COND_INVALID;
@@ -3233,7 +3370,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
    }
  }
  
-/// getSETFromCond - Return a set opcode for the given condition and
+/// Return a set opcode for the given condition and
  /// whether it has memory operand.
  unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
    static const uint16_t Opc[16][2] = {
@@ -3259,7 +3396,7 @@ unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
    return Opc[CC][HasMemoryOperand ? 1 : 0];
  }
  
-/// getCMovFromCond - Return a cmov opcode for the given condition,
+/// Return a cmov opcode for the given condition,
  /// register size in bytes, and operand type.
  unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
                                bool HasMemoryOperand) {
@@ -3319,11 +3456,11 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
    return !isPredicated(MI);
  }
  
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                 MachineBasicBlock *&TBB,
-                                 MachineBasicBlock *&FBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 bool AllowModify) const {
+bool X86InstrInfo::AnalyzeBranchImpl(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    SmallVectorImpl<MachineOperand> &Cond,
+    SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
+
    // Start from the bottom of the block and work up, examining the
    // terminator instructions.
    MachineBasicBlock::iterator I = MBB.end();
@@ -3421,6 +3558,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
        FBB = TBB;
        TBB = I->getOperand(0).getMBB();
        Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      CondBranches.push_back(I);
        continue;
      }
  
@@ -3458,11 +3596,90 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
  
      // Update the MachineOperand.
      Cond[0].setImm(BranchCode);
+    CondBranches.push_back(I);
    }
  
    return false;
  }
  
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  SmallVector<MachineInstr *, 4> CondBranches;
+  return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
+}
+
+bool X86InstrInfo::AnalyzeBranchPredicate(MachineBasicBlock &MBB,
+                                          MachineBranchPredicate &MBP,
+                                          bool AllowModify) const {
+  using namespace std::placeholders;
+
+  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineInstr *, 4> CondBranches;
+  if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
+                        AllowModify))
+    return true;
+
+  if (Cond.size() != 1)
+    return true;
+
+  assert(MBP.TrueDest && "expected!");
+
+  if (!MBP.FalseDest)
+    MBP.FalseDest = MBB.getNextNode();
+
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+  MachineInstr *ConditionDef = nullptr;
+  bool SingleUseCondition = true;
+
+  for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
+    if (I->modifiesRegister(X86::EFLAGS, TRI)) {
+      ConditionDef = &*I;
+      break;
+    }
+
+    if (I->readsRegister(X86::EFLAGS, TRI))
+      SingleUseCondition = false;
+  }
+
+  if (!ConditionDef)
+    return true;
+
+  if (SingleUseCondition) {
+    for (auto *Succ : MBB.successors())
+      if (Succ->isLiveIn(X86::EFLAGS))
+        SingleUseCondition = false;
+  }
+
+  MBP.ConditionDef = ConditionDef;
+  MBP.SingleUseCondition = SingleUseCondition;
+
+  // Currently we only recognize the simple pattern:
+  //
+  //   test %reg, %reg
+  //   je %label
+  //
+  const unsigned TestOpcode =
+      Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
+
+  if (ConditionDef->getOpcode() == TestOpcode &&
+      ConditionDef->getNumOperands() == 3 &&
+      ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
+      (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
+    MBP.LHS = ConditionDef->getOperand(0);
+    MBP.RHS = MachineOperand::CreateImm(0);
+    MBP.Predicate = Cond[0].getImm() == X86::COND_NE
+                        ? MachineBranchPredicate::PRED_NE
+                        : MachineBranchPredicate::PRED_EQ;
+    return false;
+  }
+
+  return true;
+}
+
  unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    MachineBasicBlock::iterator I = MBB.end();
    unsigned Count = 0;
@@ -3485,8 +3702,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
  
  unsigned
  X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond,
+                           MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                             DebugLoc DL) const {
    // Shouldn't be a fall through.
    assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -3534,7 +3750,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
  
  bool X86InstrInfo::
  canInsertSelect(const MachineBasicBlock &MBB,
-                const SmallVectorImpl<MachineOperand> &Cond,
+                ArrayRef<MachineOperand> Cond,
                  unsigned TrueReg, unsigned FalseReg,
                  int &CondCycles, int &TrueCycles, int &FalseCycles) const {
    // Not all subtargets have cmov instructions.
@@ -3571,8 +3787,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
  
  void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                unsigned DstReg,
-                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned DstReg, ArrayRef<MachineOperand> Cond,
                                  unsigned TrueReg, unsigned FalseReg) const {
     MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
     assert(Cond.size() == 1 && "Invalid Cond array");
@@ -3582,7 +3797,7 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
  }
  
-/// isHReg - Test if the given register is a physical h register.
+/// Test if the given register is a physical h register.
  static bool isHReg(unsigned Reg) {
    return X86::GR8_ABCD_HRegClass.contains(Reg);
  }
@@ -3830,6 +4045,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
    }
  }
  
+bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr *MemOp, unsigned &BaseReg,
+                                         unsigned &Offset,
+                                         const TargetRegisterInfo *TRI) const {
+  const MCInstrDesc &Desc = MemOp->getDesc();
+  int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags, MemOp->getOpcode());
+  if (MemRefBegin < 0)
+    return false;
+
+  MemRefBegin += X86II::getOperandBias(Desc);
+
+  BaseReg = MemOp->getOperand(MemRefBegin + X86::AddrBaseReg).getReg();
+  if (MemOp->getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
+    return false;
+
+  if (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
+      X86::NoRegister)
+    return false;
+
+  const MachineOperand &DispMO = MemOp->getOperand(MemRefBegin + X86::AddrDisp);
+
+  // Displacement can be symbolic
+  if (!DispMO.isImm())
+    return false;
+
+  Offset = DispMO.getImm();
+
+  return (MemOp->getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
+          X86::NoRegister);
+}
+
  static unsigned getStoreRegOpcode(unsigned SrcReg,
                                    const TargetRegisterClass *RC,
                                    bool isStackAligned,
@@ -3989,7 +4234,7 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
    return false;
  }
  
-/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// Check whether the first instruction, whose only
  /// purpose is to update flags, can be made redundant.
  /// CMPrr can be made redundant by SUBrr if the operands are the same.
  /// This function can be extended later on.
@@ -4032,7 +4277,7 @@ inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
    return false;
  }
  
-/// isDefConvertible - check whether the definition can be converted
+/// Check whether the definition can be converted
  /// to remove a comparison against zero.
  inline static bool isDefConvertible(MachineInstr *MI) {
    switch (MI->getOpcode()) {
@@ -4118,8 +4363,7 @@ inline static bool isDefConvertible(MachineInstr *MI) {
    }
  }
  
-/// isUseDefConvertible - check whether the use can be converted
-/// to remove a comparison against zero.
+/// Check whether the use can be converted to remove a comparison against zero.
  static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
    switch (MI->getOpcode()) {
    default: return X86::COND_INVALID;
@@ -4138,7 +4382,7 @@ static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
    }
  }
  
-/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// Check if there exists an earlier instruction that
  /// operates on the same source operands and sets flags in the same way as
  /// Compare; remove Compare if possible.
  bool X86InstrInfo::
@@ -4429,7 +4673,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
    return true;
  }
  
-/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// Try to remove the load by folding it to a register
  /// operand at the use. We fold the load instructions if load defines a virtual
  /// register, the virtual register is used once in the same BB, and the
  /// instructions in-between do not load or store, and have no side effects.
@@ -4449,7 +4693,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
    DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
    assert(DefMI);
    bool SawStore = false;
-  if (!DefMI->isSafeToMove(this, nullptr, SawStore))
+  if (!DefMI->isSafeToMove(nullptr, SawStore))
      return nullptr;
  
    // Collect information about virtual register operands of MI.
@@ -4473,9 +4717,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
      return nullptr;
  
    // Check whether we can fold the def into SrcOperandId.
-  SmallVector<unsigned, 8> Ops;
-  Ops.push_back(SrcOperandId);
-  MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+  MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
    if (FoldMI) {
      FoldAsLoadDefReg = 0;
      return FoldMI;
@@ -4484,9 +4726,9 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
    return nullptr;
  }
  
-/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
-/// instruction with two undef reads of the register being defined.  This is
-/// used for mapping:
+/// Expand a single-def pseudo instruction to a two-addr
+/// instruction with two undef reads of the register being defined.
+/// This is used for mapping:
  ///   %xmm4 = V_SET0
  /// to:
  ///   %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
@@ -4569,8 +4811,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
    return false;
  }
  
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+  unsigned NumAddrOps = MOs.size();
+  for (unsigned i = 0; i != NumAddrOps; ++i)
+    MIB.addOperand(MOs[i]);
+  if (NumAddrOps < 4) // FrameIndex only
+    addOffset(MIB, 0);
+}
+
  static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
-                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     ArrayRef<MachineOperand> MOs,
+                                     MachineBasicBlock::iterator InsertPt,
                                       MachineInstr *MI,
                                       const TargetInstrInfo &TII) {
    // Create the base instruction with the memory operand as the first part.
@@ -4578,11 +4829,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
    MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                                MI->getDebugLoc(), true);
    MachineInstrBuilder MIB(MF, NewMI);
-  unsigned NumAddrOps = MOs.size();
-  for (unsigned i = 0; i != NumAddrOps; ++i)
-    MIB.addOperand(MOs[i]);
-  if (NumAddrOps < 4)  // FrameIndex only
-    addOffset(MIB, 0);
+  addOperands(MIB, MOs);
  
    // Loop over the rest of the ri operands, converting them over.
    unsigned NumOps = MI->getDesc().getNumOperands()-2;
@@ -4594,12 +4841,16 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
      MachineOperand &MO = MI->getOperand(i);
      MIB.addOperand(MO);
    }
+
+  MachineBasicBlock *MBB = InsertPt->getParent();
+  MBB->insert(InsertPt, NewMI);
+
    return MIB;
  }
  
-static MachineInstr *FuseInst(MachineFunction &MF,
-                              unsigned Opcode, unsigned OpNo,
-                              const SmallVectorImpl<MachineOperand> &MOs,
+static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
+                              unsigned OpNo, ArrayRef<MachineOperand> MOs,
+                              MachineBasicBlock::iterator InsertPt,
                                MachineInstr *MI, const TargetInstrInfo &TII) {
    // Omit the implicit operands, something BuildMI can't do.
    MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4610,38 +4861,32 @@ static MachineInstr *FuseInst(MachineFunction &MF,
      MachineOperand &MO = MI->getOperand(i);
      if (i == OpNo) {
        assert(MO.isReg() && "Expected to fold into reg operand!");
-      unsigned NumAddrOps = MOs.size();
-      for (unsigned i = 0; i != NumAddrOps; ++i)
-        MIB.addOperand(MOs[i]);
-      if (NumAddrOps < 4)  // FrameIndex only
-        addOffset(MIB, 0);
+      addOperands(MIB, MOs);
      } else {
        MIB.addOperand(MO);
      }
    }
+
+  MachineBasicBlock *MBB = InsertPt->getParent();
+  MBB->insert(InsertPt, NewMI);
+
    return MIB;
  }
  
  static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
-                                const SmallVectorImpl<MachineOperand> &MOs,
+                                ArrayRef<MachineOperand> MOs,
+                                MachineBasicBlock::iterator InsertPt,
                                  MachineInstr *MI) {
-  MachineFunction &MF = *MI->getParent()->getParent();
-  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
-
-  unsigned NumAddrOps = MOs.size();
-  for (unsigned i = 0; i != NumAddrOps; ++i)
-    MIB.addOperand(MOs[i]);
-  if (NumAddrOps < 4)  // FrameIndex only
-    addOffset(MIB, 0);
+  MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
+                                    MI->getDebugLoc(), TII.get(Opcode));
+  addOperands(MIB, MOs);
    return MIB.addImm(0);
  }
  
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                    MachineInstr *MI, unsigned OpNum,
-                                    const SmallVectorImpl<MachineOperand> &MOs,
-                                    unsigned Size, unsigned Align,
-                                    bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+    ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+    unsigned Size, unsigned Align, bool AllowCommute) const {
    const DenseMap<unsigned,
                   std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
    bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4675,7 +4920,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      isTwoAddrFold = true;
    } else if (OpNum == 0) {
      if (MI->getOpcode() == X86::MOV32r0) {
-      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
        if (NewMI)
          return NewMI;
      }
@@ -4720,9 +4965,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        }
  
        if (isTwoAddrFold)
-        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
        else
-        NewMI = FuseInst(MF, Opcode, OpNum, MOs, MI, *this);
+        NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
  
        if (NarrowToMOV32rm) {
          // If this is the special case where we use a MOV32rm to load a 32-bit
@@ -4774,8 +5019,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
          // Attempt to fold with the commuted version of the instruction.
          unsigned CommuteOp =
              (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
-        NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
-                                      /*AllowCommute=*/false);
+        NewMI =
+            foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
+                                  /*AllowCommute=*/false);
          if (NewMI)
            return NewMI;
  
@@ -4803,7 +5049,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    return nullptr;
  }
  
-/// hasPartialRegUpdate - Return true for all instructions that only update
+/// Return true for all instructions that only update
  /// the first 32 or 64-bits of the destination register and leave the rest
  /// unmodified. This can be used to avoid folding loads if the instructions
  /// only update part of the destination register, and the non-updated part is
@@ -4865,7 +5111,7 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
    return false;
  }
  
-/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// Inform the ExeDepsFix pass how many idle
  /// instructions we would like before a partial register update.
  unsigned X86InstrInfo::
  getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
@@ -5004,10 +5250,9 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
    MI->addRegisterKilled(Reg, TRI, true);
  }
  
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops,
-                                    int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
    // Check switch flag
    if (NoFusing) return nullptr;
  
@@ -5045,9 +5290,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
    } else if (Ops.size() != 1)
      return nullptr;
  
-  SmallVector<MachineOperand,4> MOs;
-  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+  return foldMemoryOperandImpl(MF, MI, Ops[0],
+                               MachineOperand::CreateFI(FrameIndex), InsertPt,
                                 Size, Alignment, /*AllowCommute=*/true);
  }
  
@@ -5070,17 +5314,16 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
    return false;
  }
  
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-                                                  MachineInstr *MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                                  MachineInstr *LoadMI) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
+    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
+    MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const {
    // If loading from a FrameIndex, fold directly from the FrameIndex.
    unsigned NumOps = LoadMI->getDesc().getNumOperands();
    int FrameIndex;
    if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
      if (isPartialRegisterLoad(*LoadMI, MF))
        return nullptr;
-    return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+    return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
    }
  
    // Check switch flag
@@ -5195,18 +5438,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        return nullptr;
  
      // Folding a normal load. Just copy the load's address operands.
-    for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
-      MOs.push_back(LoadMI->getOperand(i));
+    MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands,
+               LoadMI->operands_begin() + NumOps);
      break;
    }
    }
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
                                 /*Size=*/0, Alignment, /*AllowCommute=*/true);
  }
  
-
  bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                  const SmallVectorImpl<unsigned> &Ops) const {
+                                        ArrayRef<unsigned> Ops) const {
    // Check switch flag
    if (NoFusing) return 0;
  
@@ -5459,7 +5701,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
    }
    if (Load)
      BeforeOps.push_back(SDValue(Load, 0));
-  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+  BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
    SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
    NewNodes.push_back(NewNode);
  
@@ -5841,7 +6083,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
             RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
  }
  
-/// getGlobalBaseReg - Return a virtual register initialized with the
+/// Return a virtual register initialized with the
  /// the global base register value. Output instructions required to
  /// initialize the register in the function entry block, if necessary.
  ///
@@ -5874,6 +6116,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
    { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
    { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
    { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
    { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
    { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
    { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -5889,6 +6132,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
    { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
    { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
    { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  { X86::VMOVLPSmr,  X86::VMOVLPDmr,  X86::VMOVPQI2QImr  },
    { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
    { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
    { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
@@ -5974,7 +6218,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
    MI->setDesc(get(table[Domain-1]));
  }
  
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+/// Return the noop instruction to use for a noop.
  void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
    NopInst.setOpcode(X86::NOOP);
  }
@@ -5986,7 +6230,7 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
  void X86InstrInfo::getUnconditionalBranch(
      MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
    Branch.setOpcode(X86::JMP_1);
-  Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+  Branch.addOperand(MCOperand::createExpr(BranchTarget));
  }
  
  // This code must remain in sync with getJumpInstrTableEntryBound in this class!
@@ -6083,15 +6327,219 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
  }
  
  bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
+hasHighOperandLatency(const TargetSchedModel &SchedModel,
                        const MachineRegisterInfo *MRI,
                        const MachineInstr *DefMI, unsigned DefIdx,
                        const MachineInstr *UseMI, unsigned UseIdx) const {
    return isHighLatencyDef(DefMI->getOpcode());
  }
  
+/// If the input instruction is part of a chain of dependent ops that are
+/// suitable for reassociation, return the earlier instruction in the sequence
+/// that defines its first operand, otherwise return a nullptr.
+/// If the instruction's operands must be commuted to be considered a
+/// reassociation candidate, Commuted will be set to true.
+static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
+                                        unsigned AssocOpcode,
+                                        bool checkPrevOneUse,
+                                        bool &Commuted) {
+  if (Inst.getOpcode() != AssocOpcode)
+    return nullptr;
+  
+  MachineOperand Op1 = Inst.getOperand(1);
+  MachineOperand Op2 = Inst.getOperand(2);
+  
+  const MachineBasicBlock *MBB = Inst.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  // We need virtual register definitions.
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+    MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+  if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+    MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+  
+  // And they need to be in the trace (otherwise, they won't have a depth).
+  if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
+    return nullptr;
+  
+  Commuted = false;
+  if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+    std::swap(MI1, MI2);
+    Commuted = true;
+  }
+
+  // Avoid reassociating operands when it won't provide any benefit. If both
+  // operands are produced by instructions of this type, we may already
+  // have the optimal sequence.
+  if (MI2->getOpcode() == AssocOpcode)
+    return nullptr;
+  
+  // The instruction must only be used by the other instruction that we
+  // reassociate with.
+  if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+    return nullptr;
+  
+  // We must match a simple chain of dependent ops.
+  // TODO: This check is not necessary for the earliest instruction in the
+  // sequence. Instead of a sequence of 3 dependent instructions with the same
+  // opcode, we only need to find a sequence of 2 dependent instructions with
+  // the same opcode plus 1 other instruction that adds to the height of the
+  // trace.
+  if (MI1->getOpcode() != AssocOpcode)
+    return nullptr;
+  
+  return MI1;
+}
+
+/// Select a pattern based on how the operands of each associative operation
+/// need to be commuted.
+static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
+                                                     bool CommuteRoot) {
+  if (CommutePrev) {
+    if (CommuteRoot)
+      return MachineCombinerPattern::MC_REASSOC_XA_YB;
+    return MachineCombinerPattern::MC_REASSOC_XA_BY;
+  } else {
+    if (CommuteRoot)
+      return MachineCombinerPattern::MC_REASSOC_AX_YB;
+    return MachineCombinerPattern::MC_REASSOC_AX_BY;
+  }
+}
+
+bool X86InstrInfo::hasPattern(MachineInstr &Root,
+        SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+    return false;
+
+  // TODO: There are many more associative instruction types to match:
+  //       1. Other forms of scalar FP add (non-AVX)
+  //       2. Other data types (double, integer, vectors)
+  //       3. Other math / logic operations (mul, and, or)
+  unsigned AssocOpcode = X86::VADDSSrr;
+
+  // TODO: There is nothing x86-specific here except the instruction type.
+  // This logic could be hoisted into the machine combiner pass itself.
+  bool CommuteRoot;
+  if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
+                                              CommuteRoot)) {
+    bool CommutePrev;
+    if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
+      // We found a sequence of instructions that may be suitable for a
+      // reassociation of operands to increase ILP.
+      Pattern.push_back(getPattern(CommutePrev, CommuteRoot));
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+/// Attempt the following reassociation to reduce critical path length:
+///   B = A op X (Prev)
+///   C = B op Y (Root)
+///   ===>
+///   B = X op Y
+///   C = A op B
+static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+                           MachineCombinerPattern::MC_PATTERN Pattern,
+                           SmallVectorImpl<MachineInstr *> &InsInstrs,
+                           SmallVectorImpl<MachineInstr *> &DelInstrs,
+                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+  MachineFunction *MF = Root.getParent()->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+  // This array encodes the operand index for each parameter because the
+  // operands may be commuted. Each row corresponds to a pattern value,
+  // and each column specifies the index of A, B, X, Y.
+  unsigned OpIdx[4][4] = {
+    { 1, 1, 2, 2 },
+    { 1, 2, 2, 1 },
+    { 2, 1, 1, 2 },
+    { 2, 2, 1, 1 }
+  };
+
+  MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
+  MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
+  MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
+  MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
+  MachineOperand &OpC = Root.getOperand(0);
+  
+  unsigned RegA = OpA.getReg();
+  unsigned RegB = OpB.getReg();
+  unsigned RegX = OpX.getReg();
+  unsigned RegY = OpY.getReg();
+  unsigned RegC = OpC.getReg();
+
+  if (TargetRegisterInfo::isVirtualRegister(RegA))
+    MRI.constrainRegClass(RegA, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegB))
+    MRI.constrainRegClass(RegB, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegX))
+    MRI.constrainRegClass(RegX, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegY))
+    MRI.constrainRegClass(RegY, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegC))
+    MRI.constrainRegClass(RegC, RC);
+
+  // Create a new virtual register for the result of (X op Y) instead of
+  // recycling RegB because the MachineCombiner's computation of the critical
+  // path requires a new register definition rather than an existing one.
+  unsigned NewVR = MRI.createVirtualRegister(RC);
+  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+  unsigned Opcode = Root.getOpcode();
+  bool KillA = OpA.isKill();
+  bool KillX = OpX.isKill();
+  bool KillY = OpY.isKill();
+
+  // Create new instructions for insertion.
+  MachineInstrBuilder MIB1 =
+    BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+      .addReg(RegX, getKillRegState(KillX))
+      .addReg(RegY, getKillRegState(KillY));
+  InsInstrs.push_back(MIB1);
+  
+  MachineInstrBuilder MIB2 =
+    BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+      .addReg(RegA, getKillRegState(KillA))
+      .addReg(NewVR, getKillRegState(true));
+  InsInstrs.push_back(MIB2);
+
+  // Record old instructions for deletion.
+  DelInstrs.push_back(&Prev);
+  DelInstrs.push_back(&Root);
+}
+
+void X86InstrInfo::genAlternativeCodeSequence(
+    MachineInstr &Root,
+    MachineCombinerPattern::MC_PATTERN Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+  // Select the previous instruction in the sequence based on the input pattern.
+  MachineInstr *Prev = nullptr;
+  if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
+      Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+  else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
+           Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+  else
+    llvm_unreachable("Unknown pattern for machine combiner");
+  
+  reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+  return;
+}
+
  namespace {
-  /// CGBR - Create Global Base Reg pass. This initializes the PIC
+  /// Create Global Base Reg pass. This initializes the PIC
    /// global base register for x86-32.
    struct CGBR : public MachineFunctionPass {
      static char ID;
@@ -6156,7 +6604,7 @@ namespace {
        MachineFunctionPass::getAnalysisUsage(AU);
      }
    };
-}
+} // namespace
  
  char CGBR::ID = 0;
  FunctionPass*
@@ -6268,7 +6716,7 @@ namespace {
        MachineFunctionPass::getAnalysisUsage(AU);
      }
    };
-}
+} // namespace
  
  char LDTLSCleanup::ID = 0;
  FunctionPass*