enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 7)
+ // (stored in bits 0 - 3)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
- TB_INDEX_MASK = 0xff,
-
- // Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion.
- // (stored in bits 8 - 15)
- TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+ TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
// This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 16,
+ TB_NO_REVERSE = 1 << 4,
// Do not insert the forward map (RegOp -> MemOp) into the table.
// This is needed for Native Client, which prohibits branch
// instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 17,
+ TB_NO_FORWARD = 1 << 5,
+
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
- TB_FOLDED_LOAD = 1 << 18,
- TB_FOLDED_STORE = 1 << 19
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
struct X86OpTblEntry {
uint16_t RegOp;
uint16_t MemOp;
- uint32_t Flags;
+ uint16_t Flags;
};
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
{ X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
- { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
{ X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
{ X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
static const X86OpTblEntry OpTbl3[] = {
// FMA foldable instructions
- { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
- { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
- { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
- { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
-
- { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
- { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
- { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
- { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
- { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
- { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
- { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
- { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
- { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
- { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
- { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
- { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
-
- { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
- { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
- { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
- { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
-
- { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
- { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
- { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
- { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
- { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
- { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
- { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
-
- { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
- { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
- { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
- { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
-
- { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
- { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
- { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
- { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
- { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
- { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
- { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
-
- { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
- { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
- { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
- { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
-
- { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
+ { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
+ { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
+ { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
+ { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 },
+
+ { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
+ { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
+ { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
+ { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 },
+
+ { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
+ { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
+ { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
+ { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
+ { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 },
+
+ { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
+ { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
+ { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
+ { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 },
+
+ { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
+
+ { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 },
+ { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
+ { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
}
}
+/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and
+/// register size in bytes.
+static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) {
+ static const unsigned Opc[16][3] = {
+ { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
+ { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
+ { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
+ { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
+ { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
+ { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
+ { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
+ { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
+ { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
+ { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
+ { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
+ { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
+ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
+ { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
+ { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
+ { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }
+ };
+
+ assert(CC < 16 && "Can only handle standard cond codes");
+ switch(RegBytes) {
+ default: llvm_unreachable("Illegal register size!");
+ case 2: return Opc[CC][0];
+ case 4: return Opc[CC][1];
+ case 8: return Opc[CC][2];
+ }
+}
+
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
if (!MI->isTerminator()) return false;
return Count;
}
+bool X86InstrInfo::
+canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ // Not all subtargets have cmov instructions.
+ if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ return false;
+ if (Cond.size() != 1)
+ return false;
+ // We cannot do the composite conditions, at least not in SSA form.
+ if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
+ if (X86::GR16RegClass.hasSubClassEq(RC) ||
+ X86::GR32RegClass.hasSubClassEq(RC) ||
+ X86::GR64RegClass.hasSubClassEq(RC)) {
+ // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
+ // Bridge. Probably Ivy Bridge as well.
+ CondCycles = 2;
+ TrueCycles = 2;
+ FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
+ return false;
+}
+
+void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert(Cond.size() == 1 && "Invalid Cond array");
+ unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
+ MRI.getRegClass(DstReg)->getSize());
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+}
+
/// isHReg - Test if the given register is a physical h register.
static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
bool X86InstrInfo::
-OptimizeSubInstr(MachineInstr *SubInstr, const MachineRegisterInfo *MRI) const {
- // If destination is a memory operand, do not perform this optimization.
- if ((SubInstr->getOpcode() != X86::SUB64rr) &&
- (SubInstr->getOpcode() != X86::SUB32rr) &&
- (SubInstr->getOpcode() != X86::SUB16rr) &&
- (SubInstr->getOpcode() != X86::SUB8rr) &&
- (SubInstr->getOpcode() != X86::SUB64ri32) &&
- (SubInstr->getOpcode() != X86::SUB64ri8) &&
- (SubInstr->getOpcode() != X86::SUB32ri) &&
- (SubInstr->getOpcode() != X86::SUB32ri8) &&
- (SubInstr->getOpcode() != X86::SUB16ri) &&
- (SubInstr->getOpcode() != X86::SUB16ri8) &&
- (SubInstr->getOpcode() != X86::SUB8ri))
- return false;
- unsigned DestReg = SubInstr->getOperand(0).getReg();
- if (MRI->use_begin(DestReg) != MRI->use_end())
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ }
+ return false;
+}
+
+/// getSwappedConditionForSET - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForSET(unsigned SETOpc) {
+ switch (SETOpc) {
+ default: return 0;
+ case X86::SETEr: return X86::SETEr;
+ case X86::SETEm: return X86::SETEm;
+ case X86::SETNEr: return X86::SETNEr;
+ case X86::SETNEm: return X86::SETNEm;
+ case X86::SETLr: return X86::SETGr;
+ case X86::SETLm: return X86::SETGm;
+ case X86::SETLEr: return X86::SETGEr;
+ case X86::SETLEm: return X86::SETGEm;
+ case X86::SETGr: return X86::SETLr;
+ case X86::SETGm: return X86::SETLm;
+ case X86::SETGEr: return X86::SETLEr;
+ case X86::SETGEm: return X86::SETLEm;
+ case X86::SETBr: return X86::SETAr;
+ case X86::SETBm: return X86::SETAm;
+ case X86::SETBEr: return X86::SETAEr;
+ case X86::SETBEm: return X86::SETAEm;
+ case X86::SETAr: return X86::SETBr;
+ case X86::SETAm: return X86::SETBm;
+ case X86::SETAEr: return X86::SETBEr;
+ case X86::SETAEm: return X86::SETBEm;
+ }
+}
+
+/// getSwappedConditionForBranch - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForBranch(unsigned BranchOpc) {
+ switch (BranchOpc) {
+ default: return 0;
+ case X86::JE_4: return X86::JE_4;
+ case X86::JNE_4: return X86::JNE_4;
+ case X86::JL_4: return X86::JG_4;
+ case X86::JLE_4: return X86::JGE_4;
+ case X86::JG_4: return X86::JL_4;
+ case X86::JGE_4: return X86::JLE_4;
+ case X86::JB_4: return X86::JA_4;
+ case X86::JBE_4: return X86::JAE_4;
+ case X86::JA_4: return X86::JB_4;
+ case X86::JAE_4: return X86::JBE_4;
+ }
+}
+
+/// getSwappedConditionForCMov - assume the flags are set by MI(a,b), return
+/// the opcode if we modify the instructions such that flags are
+/// set by MI(b,a).
+static unsigned getSwappedConditionForCMov(unsigned CMovOpc) {
+ switch (CMovOpc) {
+ default: return 0;
+ case X86::CMOVE16rm: return X86::CMOVE16rm;
+ case X86::CMOVE16rr: return X86::CMOVE16rr;
+ case X86::CMOVE32rm: return X86::CMOVE32rm;
+ case X86::CMOVE32rr: return X86::CMOVE32rr;
+ case X86::CMOVE64rm: return X86::CMOVE64rm;
+ case X86::CMOVE64rr: return X86::CMOVE64rr;
+ case X86::CMOVNE16rm: return X86::CMOVNE16rm;
+ case X86::CMOVNE16rr: return X86::CMOVNE16rr;
+ case X86::CMOVNE32rm: return X86::CMOVNE32rm;
+ case X86::CMOVNE32rr: return X86::CMOVNE32rr;
+ case X86::CMOVNE64rm: return X86::CMOVNE64rm;
+ case X86::CMOVNE64rr: return X86::CMOVNE64rr;
+
+ case X86::CMOVL16rm: return X86::CMOVG16rm;
+ case X86::CMOVL16rr: return X86::CMOVG16rr;
+ case X86::CMOVL32rm: return X86::CMOVG32rm;
+ case X86::CMOVL32rr: return X86::CMOVG32rr;
+ case X86::CMOVL64rm: return X86::CMOVG64rm;
+ case X86::CMOVL64rr: return X86::CMOVG64rr;
+ case X86::CMOVLE16rm: return X86::CMOVGE16rm;
+ case X86::CMOVLE16rr: return X86::CMOVGE16rr;
+ case X86::CMOVLE32rm: return X86::CMOVGE32rm;
+ case X86::CMOVLE32rr: return X86::CMOVGE32rr;
+ case X86::CMOVLE64rm: return X86::CMOVGE64rm;
+ case X86::CMOVLE64rr: return X86::CMOVGE64rr;
+
+ case X86::CMOVG16rm: return X86::CMOVL16rm;
+ case X86::CMOVG16rr: return X86::CMOVL16rr;
+ case X86::CMOVG32rm: return X86::CMOVL32rm;
+ case X86::CMOVG32rr: return X86::CMOVL32rr;
+ case X86::CMOVG64rm: return X86::CMOVL64rm;
+ case X86::CMOVG64rr: return X86::CMOVL64rr;
+ case X86::CMOVGE16rm: return X86::CMOVLE16rm;
+ case X86::CMOVGE16rr: return X86::CMOVLE16rr;
+ case X86::CMOVGE32rm: return X86::CMOVLE32rm;
+ case X86::CMOVGE32rr: return X86::CMOVLE32rr;
+ case X86::CMOVGE64rm: return X86::CMOVLE64rm;
+ case X86::CMOVGE64rr: return X86::CMOVLE64rr;
+
+ case X86::CMOVB16rm: return X86::CMOVA16rm;
+ case X86::CMOVB16rr: return X86::CMOVA16rr;
+ case X86::CMOVB32rm: return X86::CMOVA32rm;
+ case X86::CMOVB32rr: return X86::CMOVA32rr;
+ case X86::CMOVB64rm: return X86::CMOVA64rm;
+ case X86::CMOVB64rr: return X86::CMOVA64rr;
+ case X86::CMOVBE16rm: return X86::CMOVAE16rm;
+ case X86::CMOVBE16rr: return X86::CMOVAE16rr;
+ case X86::CMOVBE32rm: return X86::CMOVAE32rm;
+ case X86::CMOVBE32rr: return X86::CMOVAE32rr;
+ case X86::CMOVBE64rm: return X86::CMOVAE64rm;
+ case X86::CMOVBE64rr: return X86::CMOVAE64rr;
+
+ case X86::CMOVA16rm: return X86::CMOVB16rm;
+ case X86::CMOVA16rr: return X86::CMOVB16rr;
+ case X86::CMOVA32rm: return X86::CMOVB32rm;
+ case X86::CMOVA32rr: return X86::CMOVB32rr;
+ case X86::CMOVA64rm: return X86::CMOVB64rm;
+ case X86::CMOVA64rr: return X86::CMOVB64rr;
+ case X86::CMOVAE16rm: return X86::CMOVBE16rm;
+ case X86::CMOVAE16rr: return X86::CMOVBE16rr;
+ case X86::CMOVAE32rm: return X86::CMOVBE32rm;
+ case X86::CMOVAE32rr: return X86::CMOVBE32rr;
+ case X86::CMOVAE64rm: return X86::CMOVBE64rm;
+ case X86::CMOVAE64rr: return X86::CMOVBE64rr;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// This function can be extended later on.
+/// SrcReg, SrcRegs: register operands for FlagI.
+/// ImmValue: immediate for FlagI if it takes an immediate.
+inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if (((FlagI->getOpcode() == X86::CMP64rr &&
+ OI->getOpcode() == X86::SUB64rr) ||
+ (FlagI->getOpcode() == X86::CMP32rr &&
+ OI->getOpcode() == X86::SUB32rr)||
+ (FlagI->getOpcode() == X86::CMP16rr &&
+ OI->getOpcode() == X86::SUB16rr)||
+ (FlagI->getOpcode() == X86::CMP8rr &&
+ OI->getOpcode() == X86::SUB8rr)) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
+
+ if (((FlagI->getOpcode() == X86::CMP64ri32 &&
+ OI->getOpcode() == X86::SUB64ri32) ||
+ (FlagI->getOpcode() == X86::CMP64ri8 &&
+ OI->getOpcode() == X86::SUB64ri8) ||
+ (FlagI->getOpcode() == X86::CMP32ri &&
+ OI->getOpcode() == X86::SUB32ri) ||
+ (FlagI->getOpcode() == X86::CMP32ri8 &&
+ OI->getOpcode() == X86::SUB32ri8) ||
+ (FlagI->getOpcode() == X86::CMP16ri &&
+ OI->getOpcode() == X86::SUB16ri) ||
+ (FlagI->getOpcode() == X86::CMP16ri8 &&
+ OI->getOpcode() == X86::SUB16ri8) ||
+ (FlagI->getOpcode() == X86::CMP8ri &&
+ OI->getOpcode() == X86::SUB8ri)) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
+
+/// optimizeCompareInstr - Check if there exists an earlier instruction that
+/// operates on the same source operands and sets flags in the same way as
+/// Compare; remove Compare if possible.
+bool X86InstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+
+ // CmpInstr is the first instruction of the BB.
+ MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+
+ // We are searching for an earlier instruction that can make CmpInstr
+ // redundant and that instruction will be saved in Sub.
+ MachineInstr *Sub = NULL;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ // We iterate backward, starting from the instruction before CmpInstr and
+ // stop when reaching the definition of a source register or done with the BB.
+ // RI points to the instruction before CmpInstr.
+ // If the definition is in this basic block, RE points to the definition;
+ // otherwise, RE is the rend of the basic block.
+ MachineBasicBlock::reverse_iterator
+ RI = MachineBasicBlock::reverse_iterator(I),
+ RE = CmpInstr->getParent() == MI->getParent() ?
+ MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
+ CmpInstr->getParent()->rend();
+ for (; RI != RE; ++RI) {
+ MachineInstr *Instr = &*RI;
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ Sub = Instr;
+ break;
+ }
+
+ if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
+ Instr->readsRegister(X86::EFLAGS, TRI))
+ // This instruction modifies or uses EFLAGS.
+ // We can't remove CmpInstr.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!Sub)
return false;
- // There is no use of the destination register, we can replace SUB with CMP.
- switch (SubInstr->getOpcode()) {
- default: break;
- case X86::SUB64rr: SubInstr->setDesc(get(X86::CMP64rr)); break;
- case X86::SUB32rr: SubInstr->setDesc(get(X86::CMP32rr)); break;
- case X86::SUB16rr: SubInstr->setDesc(get(X86::CMP16rr)); break;
- case X86::SUB8rr: SubInstr->setDesc(get(X86::CMP8rr)); break;
- case X86::SUB64ri32: SubInstr->setDesc(get(X86::CMP64ri32)); break;
- case X86::SUB64ri8: SubInstr->setDesc(get(X86::CMP64ri8)); break;
- case X86::SUB32ri: SubInstr->setDesc(get(X86::CMP32ri)); break;
- case X86::SUB32ri8: SubInstr->setDesc(get(X86::CMP32ri8)); break;
- case X86::SUB16ri: SubInstr->setDesc(get(X86::CMP16ri)); break;
- case X86::SUB16ri8: SubInstr->setDesc(get(X86::CMP16ri8)); break;
- case X86::SUB8ri: SubInstr->setDesc(get(X86::CMP8ri)); break;
+ bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg);
+
+ // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
+ // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
+ // If we are done with the basic block, we need to check whether EFLAGS is
+ // live-out.
+ bool IsSafe = false;
+ SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+ MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
+ for (++I; I != E; ++I) {
+ const MachineInstr &Instr = *I;
+ if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+ // It is safe to remove CmpInstr if EFLAGS is updated again.
+ IsSafe = true;
+ break;
+ }
+ if (!Instr.readsRegister(X86::EFLAGS, TRI))
+ continue;
+
+ // EFLAGS is used by this instruction.
+ if (IsSwapped) {
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
+ // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ unsigned NewOpc = getSwappedConditionForSET(Instr.getOpcode());
+ if (!NewOpc) NewOpc = getSwappedConditionForBranch(Instr.getOpcode());
+ if (!NewOpc) NewOpc = getSwappedConditionForCMov(Instr.getOpcode());
+ if (!NewOpc) return false;
+
+ // Push the MachineInstr to OpsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // instructions will be modified.
+ OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+ }
+ if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+ IsSafe = true;
+ break;
+ }
}
- SubInstr->RemoveOperand(0);
+
+ // If EFLAGS is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (IsSwapped && !IsSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(X86::EFLAGS))
+ return false;
+ }
+
+ // Make sure Sub instruction defines EFLAGS.
+ assert(Sub->getNumOperands() >= 4 && Sub->getOperand(3).isReg() &&
+ Sub->getOperand(3).getReg() == X86::EFLAGS &&
+ "EFLAGS should be the 4th operand of SUBrr or SUBri.");
+ Sub->getOperand(3).setIsDef(true);
+ CmpInstr->eraseFromParent();
+
+ // Modify the condition code of instructions in OpsToUpdate.
+ for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
+ OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
return true;
}
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
- private:
- unsigned BaseReg;
};
}