#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include <limits>
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-instr-info"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
NoFusing("disable-spill-fusing",
cl::desc("Disable fusing of spill code into instructions"));
{ X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
{ X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
{ X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+ { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
+ { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
// AES foldable instructions
{ X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
// AVX-512 foldable instructions
- { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
- { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
{ X86::VADDPSZrr, X86::VADDPSZrm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
{ X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
{ X86::VMINPDZrr, X86::VMINPDZrm, 0 },
{ X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
+ { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
+ { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
+ { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
+ { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
+ { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
+ { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
+ { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
+ { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
+ { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
+ { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
+ { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
{ X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
{ X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
+ { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
{ X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
{ X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
{ X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
{ X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
+ { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
static const X86OpTblEntry OpTbl3[] = {
// FMA foldable instructions
- { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 },
- { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 },
- { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 },
- { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 },
- { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 },
- { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 },
-
- { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 },
- { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 },
- { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 },
- { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 },
- { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 },
- { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 },
- { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 },
- { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 },
- { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 },
- { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 },
- { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 },
- { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 },
-
- { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 },
- { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 },
- { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 },
- { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 },
- { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 },
- { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 },
-
- { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 },
- { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 },
- { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 },
- { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 },
- { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 },
- { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 },
- { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 },
- { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 },
- { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 },
-
- { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 },
- { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 },
- { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 },
- { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 },
- { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 },
- { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 },
-
- { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 },
- { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 },
- { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 },
- { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 },
- { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 },
- { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 },
- { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 },
- { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 },
- { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 },
-
- { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 },
- { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 },
- { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 },
- { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 },
- { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 },
- { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 },
-
- { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 },
- { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 },
- { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 },
- { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 },
- { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 },
-
- { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 },
- { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 },
- { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 },
- { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 },
-
- { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 },
- { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 },
- { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 },
- { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 },
+ { X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE },
+
+ { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE },
+ { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_NONE },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_NONE },
+ { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_NONE },
+ { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_NONE },
+ { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_NONE },
+ { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_NONE },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_NONE },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_NONE },
+ { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_NONE },
+ { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE },
+
+ { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE },
+
+ { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_NONE },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_NONE },
+ { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_NONE },
+ { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE },
+
+ { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE },
+
+ { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_NONE },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_NONE },
+ { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_NONE },
+ { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE },
+
+ { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE },
+
+ { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_NONE },
+ { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_NONE },
+
+ { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_NONE },
+ { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_NONE },
+
+ { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_NONE },
+ { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE },
// FMA4 foldable patterns
{ X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
/// operand and follow operands form a reference to the stack frame.
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const {
- if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
- MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
- MI->getOperand(Op+1).getImm() == 1 &&
- MI->getOperand(Op+2).getReg() == 0 &&
- MI->getOperand(Op+3).getImm() == 0) {
- FrameIndex = MI->getOperand(Op).getIndex();
+ if (MI->getOperand(Op+X86::AddrBaseReg).isFI() &&
+ MI->getOperand(Op+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(Op+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(Op+X86::AddrDisp).isImm() &&
+ MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 &&
+ MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 &&
+ MI->getOperand(Op+X86::AddrDisp).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex();
return true;
}
return false;
case X86::FsMOVAPSrm:
case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
+ MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(1+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
MI->isInvariantLoad(AA)) {
- unsigned BaseReg = MI->getOperand(1).getReg();
+ unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ if (!ReMatPICStubLoad && MI->getOperand(1+X86::AddrDisp).isGlobal())
return false;
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
case X86::LEA32r:
case X86::LEA64r: {
- if (MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- !MI->getOperand(4).isReg()) {
+ if (MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(1+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
+ !MI->getOperand(1+X86::AddrDisp).isReg()) {
// lea fi#, lea GV, etc. are all rematerializable.
- if (!MI->getOperand(1).isReg())
+ if (!MI->getOperand(1+X86::AddrBaseReg).isReg())
return true;
- unsigned BaseReg = MI->getOperand(1).getReg();
+ unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
if (BaseReg == 0)
return true;
// Allow re-materialization of lea PICBase + x.
return true;
}
-/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
-/// would clobber the EFLAGS condition register. Note the result may be
-/// conservative. If it cannot definitely determine the safety after visiting
-/// a few instructions in each direction it assumes it's not safe.
-static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
+bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
MachineBasicBlock::iterator E = MBB.end();
// For compile time consideration, if we are not able to determine the
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0;
- MachineInstr *InsMI2 = 0;
+ MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD16rr %reg1028<kill>, %reg1028
// just a single insert_subreg.
// convert them to equivalent lea if the condition code register def's
// are dead!
if (hasLiveCondCodeDef(MI))
- return 0;
+ return nullptr;
MachineFunction &MF = *MI->getParent()->getParent();
// All instructions input are two-addr instructions. Get the known operands.
const MachineOperand &Dest = MI->getOperand(0);
const MachineOperand &Src = MI->getOperand(1);
- MachineInstr *NewMI = NULL;
+ MachineInstr *NewMI = nullptr;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
// 16-bit LEA is also slow on Core2.
switch (MIOpc) {
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
- if (B != C) return 0;
+ if (B != C) return nullptr;
unsigned M = MI->getOperand(3).getImm();
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
.addOperand(Dest).addOperand(Src).addImm(M);
}
case X86::SHUFPDrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
- if (B != C) return 0;
+ if (B != C) return nullptr;
unsigned M = MI->getOperand(3).getImm();
// Convert to PSHUFD mask.
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
!MF.getRegInfo().constrainRegClass(Src.getReg(),
&X86::GR64_NOSPRegClass))
- return 0;
+ return nullptr;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addOperand(Dest)
case X86::SHL32ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
case X86::SHL16ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest)
.addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
default: {
switch (MIOpc) {
- default: return 0;
+ default: return nullptr;
case X86::INC64r:
case X86::INC32r:
case X86::INC64_32r: {
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
case X86::INC16r:
case X86::INC64_16r:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src), 1);
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
case X86::DEC16r:
case X86::DEC64_16r:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src), -1);
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
const MachineOperand &Src2 = MI->getOperand(2);
bool isKill2, isUndef2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, isUndef2, ImplicitOp2))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest);
case X86::ADD16rr:
case X86::ADD16rr_DB: {
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src),
}
}
- if (!NewMI) return 0;
+ if (!NewMI) return nullptr;
if (LV) { // Update live variables
if (Src.isKill())
}
}
+bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ switch (MI->getOpcode()) {
+ case X86::VFMADDPDr231r:
+ case X86::VFMADDPSr231r:
+ case X86::VFMADDSDr231r:
+ case X86::VFMADDSSr231r:
+ case X86::VFMSUBPDr231r:
+ case X86::VFMSUBPSr231r:
+ case X86::VFMSUBSDr231r:
+ case X86::VFMSUBSSr231r:
+ case X86::VFNMADDPDr231r:
+ case X86::VFNMADDPSr231r:
+ case X86::VFNMADDSDr231r:
+ case X86::VFNMADDSSr231r:
+ case X86::VFNMSUBPDr231r:
+ case X86::VFNMSUBPSr231r:
+ case X86::VFNMSUBSDr231r:
+ case X86::VFNMSUBSSr231r:
+ case X86::VFMADDPDr231rY:
+ case X86::VFMADDPSr231rY:
+ case X86::VFMSUBPDr231rY:
+ case X86::VFMSUBPSr231rY:
+ case X86::VFNMADDPDr231rY:
+ case X86::VFNMADDPSr231rY:
+ case X86::VFNMSUBPDr231rY:
+ case X86::VFNMSUBPSr231rY:
+ SrcOpIdx1 = 2;
+ SrcOpIdx2 = 3;
+ return true;
+ default:
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ }
+}
+
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
switch (BrOpc) {
default: return X86::COND_INVALID;
std::next(I)->eraseFromParent();
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
UnCondBrIter = MBB.end();
}
}
+/// isUseDefConvertible - check whether the use can be converted
+/// to remove a comparison against zero.
+static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return X86::COND_INVALID;
+ case X86::LZCNT16rr: case X86::LZCNT16rm:
+ case X86::LZCNT32rr: case X86::LZCNT32rm:
+ case X86::LZCNT64rr: case X86::LZCNT64rm:
+ return X86::COND_B;
+ case X86::POPCNT16rr:case X86::POPCNT16rm:
+ case X86::POPCNT32rr:case X86::POPCNT32rm:
+ case X86::POPCNT64rr:case X86::POPCNT64rm:
+ return X86::COND_E;
+ case X86::TZCNT16rr: case X86::TZCNT16rm:
+ case X86::TZCNT32rr: case X86::TZCNT32rm:
+ case X86::TZCNT64rr: case X86::TZCNT64rm:
+ return X86::COND_B;
+ }
+}
+
/// optimizeCompareInstr - Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
// If we are comparing against zero, check whether we can use MI to update
// EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
- if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
- !isDefConvertible(MI)))
+ if (IsCmpZero && MI->getParent() != CmpInstr->getParent())
return false;
+ // If we have a use of the source register between the def and our compare
+ // instruction we can eliminate the compare iff the use sets EFLAGS in the
+ // right way.
+ bool ShouldUpdateCC = false;
+ X86::CondCode NewCC = X86::COND_INVALID;
+ if (IsCmpZero && !isDefConvertible(MI)) {
+ // Scan forward from the use until we hit the use we're looking for or the
+ // compare instruction.
+ for (MachineBasicBlock::iterator J = MI;; ++J) {
+ // Do we have a convertible instruction?
+ NewCC = isUseDefConvertible(J);
+ if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
+ J->getOperand(1).getReg() == SrcReg) {
+ assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
+ ShouldUpdateCC = true; // Update CC later on.
+ // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
+ // with the new def.
+ MI = Def = J;
+ break;
+ }
+
+ if (J == I)
+ return false;
+ }
+ }
+
// We are searching for an earlier instruction that can make CmpInstr
// redundant and that instruction will be saved in Sub.
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
const TargetRegisterInfo *TRI = &getRegisterInfo();
// We iterate backward, starting from the instruction before CmpInstr and
RE = CmpInstr->getParent() == MI->getParent() ?
MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
CmpInstr->getParent()->rend();
- MachineInstr *Movr0Inst = 0;
+ MachineInstr *Movr0Inst = nullptr;
for (; RI != RE; ++RI) {
MachineInstr *Instr = &*RI;
// Check whether CmpInstr can be made redundant by the current instruction.
continue;
// EFLAGS is used by this instruction.
- X86::CondCode OldCC;
+ X86::CondCode OldCC = X86::COND_INVALID;
bool OpcIsSET = false;
if (IsCmpZero || IsSwapped) {
// We decode the condition code from opcode.
// CF and OF are used, we can't perform this optimization.
return false;
}
+
+ // If we're updating the condition code check if we have to reverse the
+ // condition.
+ if (ShouldUpdateCC)
+ switch (OldCC) {
+ default:
+ return false;
+ case X86::COND_E:
+ break;
+ case X86::COND_NE:
+ NewCC = GetOppositeBranchCondition(NewCC);
+ break;
+ }
} else if (IsSwapped) {
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
// to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
// We swap the condition code and synthesize the new opcode.
- X86::CondCode NewCC = getSwappedCondition(OldCC);
+ NewCC = getSwappedCondition(OldCC);
if (NewCC == X86::COND_INVALID) return false;
+ }
+ if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
// Synthesize the new opcode.
bool HasMemoryOperand = Instr.hasOneMemOperand();
unsigned NewOpc;
unsigned &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
- return 0;
+ return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
if (MI->mayLoad()) {
FoldAsLoadDefReg = 0;
- return 0;
+ return nullptr;
}
// Check whether we can move DefMI here.
DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
assert(DefMI);
bool SawStore = false;
- if (!DefMI->isSafeToMove(this, 0, SawStore))
- return 0;
+ if (!DefMI->isSafeToMove(this, nullptr, SawStore))
+ return nullptr;
// We try to commute MI if possible.
unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
continue;
// Do not fold if we have a subreg use or a def or multiple uses.
if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return 0;
+ return nullptr;
SrcOperandId = i;
FoundSrcOperand = true;
}
- if (!FoundSrcOperand) return 0;
+ if (!FoundSrcOperand) return nullptr;
// Check whether we can fold the def into SrcOperandId.
SmallVector<unsigned, 8> Ops;
if (Idx == 1) {
// MI was changed but it didn't help, commute it back!
commuteInstruction(MI, false);
- return 0;
+ return nullptr;
}
// Check whether we can commute MI and enable folding.
if (MI->isCommutable()) {
MachineInstr *NewMI = commuteInstruction(MI, false);
// Unable to commute.
- if (!NewMI) return 0;
+ if (!NewMI) return nullptr;
if (NewMI != MI) {
// New instruction. It doesn't need to be kept.
NewMI->eraseFromParent();
- return 0;
+ return nullptr;
}
}
}
- return 0;
+ return nullptr;
}
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
- const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ const DenseMap<unsigned,
+ std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
bool isTwoAddrFold = false;
// when X86Subtarget is Atom.
if (isCallRegIndirect &&
(MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
- return NULL;
+ return nullptr;
}
unsigned NumOps = MI->getDesc().getNumOperands();
// X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
if (MI->getOpcode() == X86::ADD32ri &&
MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
- return NULL;
+ return nullptr;
- MachineInstr *NewMI = NULL;
+ MachineInstr *NewMI = nullptr;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
unsigned Opcode = I->second.first;
unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
if (Align < MinAlign)
- return NULL;
+ return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
- return NULL;
+ return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
// a 32-bit load which is implicitly zero-extended. This likely is due
// to liveintervalanalysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
- return NULL;
+ return nullptr;
Opcode = X86::MOV32rm;
NarrowToMOV32rm = true;
}
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
- return NULL;
+ return nullptr;
}
/// hasPartialRegUpdate - Return true for all instructions that only update
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// Check switch flag
- if (NoFusing) return NULL;
+ if (NoFusing) return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
if (!MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
- return 0;
+ return nullptr;
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned NewOpc = 0;
unsigned RCSize = 0;
switch (MI->getOpcode()) {
- default: return NULL;
+ default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
if (Size < RCSize)
- return NULL;
+ return nullptr;
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
- return NULL;
+ return nullptr;
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
// Check switch flag
- if (NoFusing) return NULL;
+ if (NoFusing) return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
if (!MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
- return 0;
+ return nullptr;
// Determine the alignment of the load.
unsigned Alignment = 0;
Alignment = 4;
break;
default:
- return 0;
+ return nullptr;
}
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI->getOpcode()) {
- default: return NULL;
+ default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
- return NULL;
+ return nullptr;
// Make sure the subregisters match.
// Otherwise we risk changing the size of the load.
if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
- return NULL;
+ return nullptr;
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
// Medium and large mode can't fold loads this way.
if (TM.getCodeModel() != CodeModel::Small &&
TM.getCodeModel() != CodeModel::Kernel)
- return NULL;
+ return nullptr;
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
// This doesn't work for several reasons.
// 1. GlobalBaseReg may have been spilled.
// 2. It may not be live at MI.
- return NULL;
+ return nullptr;
}
// Create a constant-pool entry.
> 4)
// These instructions only load 32 bits, we can't fold them if the
// destination register is wider than 32 bits (4 bytes).
- return NULL;
+ return nullptr;
if ((LoadMI->getOpcode() == X86::MOVSDrm ||
LoadMI->getOpcode() == X86::VMOVSDrm) &&
MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
> 8)
// These instructions only load 64 bits, we can't fold them if the
// destination register is wider than 64 bits (8 bytes).
- return NULL;
+ return nullptr;
// Folding a normal load. Just copy the load's address operands.
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ const DenseMap<unsigned,
+ std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
AddrOps.push_back(Chain);
// Emit the load instruction.
- SDNode *Load = 0;
+ SDNode *Load = nullptr;
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
std::pair<MachineInstr::mmo_iterator,
// Emit the data processing instruction.
std::vector<EVT> VTs;
- const TargetRegisterClass *DstRC = 0;
+ const TargetRegisterClass *DstRC = nullptr;
if (MCID.getNumDefs() > 0) {
DstRC = getRegClass(MCID, 0, &RI, MF);
VTs.push_back(*DstRC->vt_begin());
{ X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
{ X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
{ X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
- { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
+ { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
+ { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
+ { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
+ { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
- return 0;
+ return nullptr;
}
static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
return ReplaceableInstrsAVX2[i];
- return 0;
+ return nullptr;
}
std::pair<uint16_t, uint16_t>
NopInst.setOpcode(X86::NOOP);
}
+void X86InstrInfo::getUnconditionalBranch(
+ MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+ Branch.setOpcode(X86::JMP_4);
+ Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+}
+
+void X86InstrInfo::getTrap(MCInst &MI) const {
+ MI.setOpcode(X86::TRAP);
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
- assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
- "X86-64 PIC uses RIP relative addressing");
+ // Don't do anything if this is 64-bit as 64-bit PIC
+ // uses RIP relative addressing.
+ if (TM->getSubtarget<X86Subtarget>().is64Bit())
+ return false;
// Only emit a global base reg in PIC mode.
if (TM->getRelocationModel() != Reloc::PIC_)
char CGBR::ID = 0;
FunctionPass*
-llvm::createGlobalBaseRegPass() { return new CGBR(); }
+llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
namespace {
struct LDTLSCleanup : public MachineFunctionPass {