-//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//\r
-//\r
-// The LLVM Compiler Infrastructure\r
-//\r
-// This file is distributed under the University of Illinois Open Source\r
-// License. See LICENSE.TXT for details.\r
-//\r
-//===----------------------------------------------------------------------===//\r
-//\r
-// This defines functionality used to emit comments about X86 instructions to\r
-// an output stream for -fverbose-asm.\r
-//\r
-//===----------------------------------------------------------------------===//\r
-\r
-#include "X86InstComments.h"\r
-#include "MCTargetDesc/X86MCTargetDesc.h"\r
-#include "Utils/X86ShuffleDecode.h"\r
-#include "llvm/MC/MCInst.h"\r
-#include "llvm/CodeGen/MachineValueType.h"\r
-#include "llvm/Support/raw_ostream.h"\r
-\r
-using namespace llvm;\r
-\r
-/// \brief Extracts the src/dst types for a given zero extension instruction.\r
-/// \note While the number of elements in DstVT type correct, the\r
-/// number in the SrcVT type is expanded to fill the src xmm register and the\r
-/// upper elements may not be included in the dst xmm/ymm register.\r
-static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {\r
- switch (MI->getOpcode()) {\r
- default:\r
- llvm_unreachable("Unknown zero extension instruction");\r
- // i8 zero extension\r
- case X86::PMOVZXBWrm:\r
- case X86::PMOVZXBWrr:\r
- case X86::VPMOVZXBWrm:\r
- case X86::VPMOVZXBWrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v8i16;\r
- break;\r
- case X86::VPMOVZXBWYrm:\r
- case X86::VPMOVZXBWYrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v16i16;\r
- break;\r
- case X86::PMOVZXBDrm:\r
- case X86::PMOVZXBDrr:\r
- case X86::VPMOVZXBDrm:\r
- case X86::VPMOVZXBDrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v4i32;\r
- break;\r
- case X86::VPMOVZXBDYrm:\r
- case X86::VPMOVZXBDYrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v8i32;\r
- break;\r
- case X86::PMOVZXBQrm:\r
- case X86::PMOVZXBQrr:\r
- case X86::VPMOVZXBQrm:\r
- case X86::VPMOVZXBQrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v2i64;\r
- break;\r
- case X86::VPMOVZXBQYrm:\r
- case X86::VPMOVZXBQYrr:\r
- SrcVT = MVT::v16i8;\r
- DstVT = MVT::v4i64;\r
- break;\r
- // i16 zero extension\r
- case X86::PMOVZXWDrm:\r
- case X86::PMOVZXWDrr:\r
- case X86::VPMOVZXWDrm:\r
- case X86::VPMOVZXWDrr:\r
- SrcVT = MVT::v8i16;\r
- DstVT = MVT::v4i32;\r
- break;\r
- case X86::VPMOVZXWDYrm:\r
- case X86::VPMOVZXWDYrr:\r
- SrcVT = MVT::v8i16;\r
- DstVT = MVT::v8i32;\r
- break;\r
- case X86::PMOVZXWQrm:\r
- case X86::PMOVZXWQrr:\r
- case X86::VPMOVZXWQrm:\r
- case X86::VPMOVZXWQrr:\r
- SrcVT = MVT::v8i16;\r
- DstVT = MVT::v2i64;\r
- break;\r
- case X86::VPMOVZXWQYrm:\r
- case X86::VPMOVZXWQYrr:\r
- SrcVT = MVT::v8i16;\r
- DstVT = MVT::v4i64;\r
- break;\r
- // i32 zero extension\r
- case X86::PMOVZXDQrm:\r
- case X86::PMOVZXDQrr:\r
- case X86::VPMOVZXDQrm:\r
- case X86::VPMOVZXDQrr:\r
- SrcVT = MVT::v4i32;\r
- DstVT = MVT::v2i64;\r
- break;\r
- case X86::VPMOVZXDQYrm:\r
- case X86::VPMOVZXDQYrr:\r
- SrcVT = MVT::v4i32;\r
- DstVT = MVT::v4i64;\r
- break;\r
- }\r
-}\r
-\r
-//===----------------------------------------------------------------------===//\r
-// Top Level Entrypoint\r
-//===----------------------------------------------------------------------===//\r
-\r
-/// EmitAnyX86InstComments - This function decodes x86 instructions and prints\r
-/// newline terminated strings to the specified string if desired. This\r
-/// information is shown in disassembly dumps when verbose assembly is enabled.\r
-bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,\r
- const char *(*getRegName)(unsigned)) {\r
- // If this is a shuffle operation, the switch should fill in this state.\r
- SmallVector<int, 8> ShuffleMask;\r
- const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;\r
-\r
- switch (MI->getOpcode()) {\r
- default:\r
- // Not an instruction for which we can decode comments.\r
- return false;\r
-\r
- case X86::BLENDPDrri:\r
- case X86::VBLENDPDrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::BLENDPDrmi:\r
- case X86::VBLENDPDrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v2f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VBLENDPDYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VBLENDPDYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v4f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::BLENDPSrri:\r
- case X86::VBLENDPSrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::BLENDPSrmi:\r
- case X86::VBLENDPSrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v4f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VBLENDPSYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VBLENDPSYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v8f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::PBLENDWrri:\r
- case X86::VPBLENDWrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PBLENDWrmi:\r
- case X86::VPBLENDWrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v8i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPBLENDWYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPBLENDWYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v16i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::VPBLENDDrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPBLENDDrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v4i32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::VPBLENDDYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPBLENDDYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeBLENDMask(MVT::v8i32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::INSERTPSrr:\r
- case X86::VINSERTPSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::INSERTPSrm:\r
- case X86::VINSERTPSrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeINSERTPSMask(MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::MOVLHPSrr:\r
- case X86::VMOVLHPSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVLHPSMask(2, ShuffleMask);\r
- break;\r
-\r
- case X86::MOVHLPSrr:\r
- case X86::VMOVHLPSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVHLPSMask(2, ShuffleMask);\r
- break;\r
-\r
- case X86::MOVSLDUPrr:\r
- case X86::VMOVSLDUPrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVSLDUPrm:\r
- case X86::VMOVSLDUPrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);\r
- break;\r
-\r
- case X86::VMOVSHDUPYrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VMOVSHDUPYrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);\r
- break;\r
-\r
- case X86::VMOVSLDUPYrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VMOVSLDUPYrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);\r
- break;\r
-\r
- case X86::MOVSHDUPrr:\r
- case X86::VMOVSHDUPrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVSHDUPrm:\r
- case X86::VMOVSHDUPrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);\r
- break;\r
-\r
- case X86::VMOVDDUPYrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VMOVDDUPYrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);\r
- break;\r
-\r
- case X86::MOVDDUPrr:\r
- case X86::VMOVDDUPrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVDDUPrm:\r
- case X86::VMOVDDUPrm:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);\r
- break;\r
-\r
- case X86::PSLLDQri:\r
- case X86::VPSLLDQri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSLLDQMask(MVT::v16i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::VPSLLDQYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSLLDQMask(MVT::v32i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::PSRLDQri:\r
- case X86::VPSRLDQri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSRLDQMask(MVT::v16i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::VPSRLDQYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSRLDQMask(MVT::v32i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::PALIGNR128rr:\r
- case X86::VPALIGNR128rr:\r
- Src1Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PALIGNR128rm:\r
- case X86::VPALIGNR128rm:\r
- Src2Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePALIGNRMask(MVT::v16i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
- case X86::VPALIGNR256rr:\r
- Src1Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPALIGNR256rm:\r
- Src2Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePALIGNRMask(MVT::v32i8,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::PSHUFDri:\r
- case X86::VPSHUFDri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::PSHUFDmi:\r
- case X86::VPSHUFDmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v4i32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
- case X86::VPSHUFDYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPSHUFDYmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v8i32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
-\r
- case X86::PSHUFHWri:\r
- case X86::VPSHUFHWri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::PSHUFHWmi:\r
- case X86::VPSHUFHWmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFHWMask(MVT::v8i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
- case X86::VPSHUFHWYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPSHUFHWYmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFHWMask(MVT::v16i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
- case X86::PSHUFLWri:\r
- case X86::VPSHUFLWri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::PSHUFLWmi:\r
- case X86::VPSHUFLWmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFLWMask(MVT::v8i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
- case X86::VPSHUFLWYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPSHUFLWYmi:\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFLWMask(MVT::v16i16,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- break;\r
-\r
- case X86::PUNPCKHBWrr:\r
- case X86::VPUNPCKHBWrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKHBWrm:\r
- case X86::VPUNPCKHBWrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHBWYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHBWYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);\r
- break;\r
- case X86::PUNPCKHWDrr:\r
- case X86::VPUNPCKHWDrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKHWDrm:\r
- case X86::VPUNPCKHWDrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHWDYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHWDYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);\r
- break;\r
- case X86::PUNPCKHDQrr:\r
- case X86::VPUNPCKHDQrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKHDQrm:\r
- case X86::VPUNPCKHDQrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHDQYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHDQYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHDQZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHDQZrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);\r
- break;\r
- case X86::PUNPCKHQDQrr:\r
- case X86::VPUNPCKHQDQrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKHQDQrm:\r
- case X86::VPUNPCKHQDQrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHQDQYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHQDQYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKHQDQZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKHQDQZrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);\r
- break;\r
-\r
- case X86::PUNPCKLBWrr:\r
- case X86::VPUNPCKLBWrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKLBWrm:\r
- case X86::VPUNPCKLBWrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLBWYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLBWYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);\r
- break;\r
- case X86::PUNPCKLWDrr:\r
- case X86::VPUNPCKLWDrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKLWDrm:\r
- case X86::VPUNPCKLWDrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLWDYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLWDYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);\r
- break;\r
- case X86::PUNPCKLDQrr:\r
- case X86::VPUNPCKLDQrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKLDQrm:\r
- case X86::VPUNPCKLDQrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLDQYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLDQYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLDQZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLDQZrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);\r
- break;\r
- case X86::PUNPCKLQDQrr:\r
- case X86::VPUNPCKLQDQrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::PUNPCKLQDQrm:\r
- case X86::VPUNPCKLQDQrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLQDQYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLQDQYrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);\r
- break;\r
- case X86::VPUNPCKLQDQZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPUNPCKLQDQZrm:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);\r
- break;\r
-\r
- case X86::SHUFPDrri:\r
- case X86::VSHUFPDrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::SHUFPDrmi:\r
- case X86::VSHUFPDrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeSHUFPMask(MVT::v2f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VSHUFPDYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VSHUFPDYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeSHUFPMask(MVT::v4f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::SHUFPSrri:\r
- case X86::VSHUFPSrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::SHUFPSrmi:\r
- case X86::VSHUFPSrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeSHUFPMask(MVT::v4f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VSHUFPSYrri:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VSHUFPSYrmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeSHUFPMask(MVT::v8f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::UNPCKLPDrr:\r
- case X86::VUNPCKLPDrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::UNPCKLPDrm:\r
- case X86::VUNPCKLPDrm:\r
- DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKLPDYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKLPDYrm:\r
- DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKLPDZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKLPDZrm:\r
- DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::UNPCKLPSrr:\r
- case X86::VUNPCKLPSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::UNPCKLPSrm:\r
- case X86::VUNPCKLPSrm:\r
- DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKLPSYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKLPSYrm:\r
- DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKLPSZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKLPSZrm:\r
- DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::UNPCKHPDrr:\r
- case X86::VUNPCKHPDrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::UNPCKHPDrm:\r
- case X86::VUNPCKHPDrm:\r
- DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKHPDYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKHPDYrm:\r
- DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKHPDZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKHPDZrm:\r
- DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::UNPCKHPSrr:\r
- case X86::VUNPCKHPSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::UNPCKHPSrm:\r
- case X86::VUNPCKHPSrm:\r
- DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKHPSYrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKHPSYrm:\r
- DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VUNPCKHPSZrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VUNPCKHPSZrm:\r
- DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERMILPSri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERMILPSmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v4f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERMILPSYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERMILPSYmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v8f32,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERMILPDri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERMILPDmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v2f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERMILPDYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERMILPDYmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodePSHUFMask(MVT::v4f64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERM2F128rr:\r
- case X86::VPERM2I128rr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERM2F128rm:\r
- case X86::VPERM2I128rm:\r
- // For instruction comments purpose, assume the 256-bit vector is v4i64.\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeVPERM2X128Mask(MVT::v4i64,\r
- MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::VPERMQYri:\r
- case X86::VPERMPDYri:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::VPERMQYmi:\r
- case X86::VPERMPDYmi:\r
- if(MI->getOperand(MI->getNumOperands()-1).isImm())\r
- DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(),\r
- ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::MOVSDrr:\r
- case X86::VMOVSDrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVSDrm:\r
- case X86::VMOVSDrm:\r
- DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::MOVSSrr:\r
- case X86::VMOVSSrr:\r
- Src2Name = getRegName(MI->getOperand(2).getReg());\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVSSrm:\r
- case X86::VMOVSSrm:\r
- DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::MOVPQI2QIrr:\r
- case X86::MOVZPQILo2PQIrr:\r
- case X86::VMOVPQI2QIrr:\r
- case X86::VMOVZPQILo2PQIrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::MOVQI2PQIrm:\r
- case X86::MOVZQI2PQIrm:\r
- case X86::MOVZPQILo2PQIrm:\r
- case X86::VMOVQI2PQIrm:\r
- case X86::VMOVZQI2PQIrm:\r
- case X86::VMOVZPQILo2PQIrm:\r
- DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
- case X86::MOVDI2PDIrm:\r
- case X86::VMOVDI2PDIrm:\r
- DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- break;\r
-\r
- case X86::PMOVZXBWrr:\r
- case X86::PMOVZXBDrr:\r
- case X86::PMOVZXBQrr:\r
- case X86::PMOVZXWDrr:\r
- case X86::PMOVZXWQrr:\r
- case X86::PMOVZXDQrr:\r
- case X86::VPMOVZXBWrr:\r
- case X86::VPMOVZXBDrr:\r
- case X86::VPMOVZXBQrr:\r
- case X86::VPMOVZXWDrr:\r
- case X86::VPMOVZXWQrr:\r
- case X86::VPMOVZXDQrr:\r
- case X86::VPMOVZXBWYrr:\r
- case X86::VPMOVZXBDYrr:\r
- case X86::VPMOVZXBQYrr:\r
- case X86::VPMOVZXWDYrr:\r
- case X86::VPMOVZXWQYrr:\r
- case X86::VPMOVZXDQYrr:\r
- Src1Name = getRegName(MI->getOperand(1).getReg());\r
- // FALL THROUGH.\r
- case X86::PMOVZXBWrm:\r
- case X86::PMOVZXBDrm:\r
- case X86::PMOVZXBQrm:\r
- case X86::PMOVZXWDrm:\r
- case X86::PMOVZXWQrm:\r
- case X86::PMOVZXDQrm:\r
- case X86::VPMOVZXBWrm:\r
- case X86::VPMOVZXBDrm:\r
- case X86::VPMOVZXBQrm:\r
- case X86::VPMOVZXWDrm:\r
- case X86::VPMOVZXWQrm:\r
- case X86::VPMOVZXDQrm:\r
- case X86::VPMOVZXBWYrm:\r
- case X86::VPMOVZXBDYrm:\r
- case X86::VPMOVZXBQYrm:\r
- case X86::VPMOVZXWDYrm:\r
- case X86::VPMOVZXWQYrm:\r
- case X86::VPMOVZXDQYrm: {\r
- MVT SrcVT, DstVT;\r
- getZeroExtensionTypes(MI, SrcVT, DstVT);\r
- DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);\r
- DestName = getRegName(MI->getOperand(0).getReg());\r
- } break;\r
- }\r
-\r
- // The only comments we decode are shuffles, so give up if we were unable to\r
- // decode a shuffle mask.\r
- if (ShuffleMask.empty())\r
- return false;\r
-\r
- if (!DestName) DestName = Src1Name;\r
- OS << (DestName ? DestName : "mem") << " = ";\r
-\r
- // If the two sources are the same, canonicalize the input elements to be\r
- // from the first src so that we get larger element spans.\r
- if (Src1Name == Src2Name) {\r
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {\r
- if ((int)ShuffleMask[i] >= 0 && // Not sentinel.\r
- ShuffleMask[i] >= (int)e) // From second mask.\r
- ShuffleMask[i] -= e;\r
- }\r
- }\r
-\r
- // The shuffle mask specifies which elements of the src1/src2 fill in the\r
- // destination, with a few sentinel values. Loop through and print them\r
- // out.\r
- for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {\r
- if (i != 0)\r
- OS << ',';\r
- if (ShuffleMask[i] == SM_SentinelZero) {\r
- OS << "zero";\r
- continue;\r
- }\r
-\r
- // Otherwise, it must come from src1 or src2. Print the span of elements\r
- // that comes from this src.\r
- bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();\r
- const char *SrcName = isSrc1 ? Src1Name : Src2Name;\r
- OS << (SrcName ? SrcName : "mem") << '[';\r
- bool IsFirst = true;\r
- while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&\r
- (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {\r
- if (!IsFirst)\r
- OS << ',';\r
- else\r
- IsFirst = false;\r
- if (ShuffleMask[i] == SM_SentinelUndef)\r
- OS << "u";\r
- else\r
- OS << ShuffleMask[i] % ShuffleMask.size();\r
- ++i;\r
- }\r
- OS << ']';\r
- --i; // For loop increments element #.\r
- }\r
- //MI->print(OS, 0);\r
- OS << "\n";\r
-\r
- // We successfully added a comment to this instruction.\r
- return true;\r
-}\r
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// \brief Extracts the src/dst types for a given zero extension instruction.
+/// \note While the number of elements in DstVT type correct, the
+/// number in the SrcVT type is expanded to fill the src xmm register and the
+/// upper elements may not be included in the dst xmm/ymm register.
+static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown zero extension instruction");
+ // i8 zero extension
+ case X86::PMOVZXBWrm:
+ case X86::PMOVZXBWrr:
+ case X86::VPMOVZXBWrm:
+ case X86::VPMOVZXBWrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v8i16;
+ break;
+ case X86::VPMOVZXBWYrm:
+ case X86::VPMOVZXBWYrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v16i16;
+ break;
+ case X86::PMOVZXBDrm:
+ case X86::PMOVZXBDrr:
+ case X86::VPMOVZXBDrm:
+ case X86::VPMOVZXBDrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v4i32;
+ break;
+ case X86::VPMOVZXBDYrm:
+ case X86::VPMOVZXBDYrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v8i32;
+ break;
+ case X86::PMOVZXBQrm:
+ case X86::PMOVZXBQrr:
+ case X86::VPMOVZXBQrm:
+ case X86::VPMOVZXBQrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v2i64;
+ break;
+ case X86::VPMOVZXBQYrm:
+ case X86::VPMOVZXBQYrr:
+ SrcVT = MVT::v16i8;
+ DstVT = MVT::v4i64;
+ break;
+ // i16 zero extension
+ case X86::PMOVZXWDrm:
+ case X86::PMOVZXWDrr:
+ case X86::VPMOVZXWDrm:
+ case X86::VPMOVZXWDrr:
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v4i32;
+ break;
+ case X86::VPMOVZXWDYrm:
+ case X86::VPMOVZXWDYrr:
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v8i32;
+ break;
+ case X86::PMOVZXWQrm:
+ case X86::PMOVZXWQrr:
+ case X86::VPMOVZXWQrm:
+ case X86::VPMOVZXWQrr:
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v2i64;
+ break;
+ case X86::VPMOVZXWQYrm:
+ case X86::VPMOVZXWQYrr:
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v4i64;
+ break;
+ // i32 zero extension
+ case X86::PMOVZXDQrm:
+ case X86::PMOVZXDQrr:
+ case X86::VPMOVZXDQrm:
+ case X86::VPMOVZXDQrr:
+ SrcVT = MVT::v4i32;
+ DstVT = MVT::v2i64;
+ break;
+ case X86::VPMOVZXDQYrm:
+ case X86::VPMOVZXDQYrr:
+ SrcVT = MVT::v4i32;
+ DstVT = MVT::v4i64;
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<int, 8> ShuffleMask;
+ const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
+
+ switch (MI->getOpcode()) {
+ default:
+ // Not an instruction for which we can decode comments.
+ return false;
+
+ case X86::BLENDPDrri:
+ case X86::VBLENDPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPDrmi:
+ case X86::VBLENDPDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v2f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::BLENDPSrri:
+ case X86::VBLENDPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::BLENDPSrmi:
+ case X86::VBLENDPSrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VBLENDPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VBLENDPSYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PBLENDWrri:
+ case X86::VPBLENDWrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PBLENDWrmi:
+ case X86::VPBLENDWrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPBLENDWYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDWYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v4i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::VPBLENDDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPBLENDDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeBLENDMask(MVT::v8i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::INSERTPSrr:
+ case X86::VINSERTPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::INSERTPSrm:
+ case X86::VINSERTPSrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeINSERTPSMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ case X86::VMOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ case X86::VMOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVSLDUPrr:
+ case X86::VMOVSLDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSLDUPrm:
+ case X86::VMOVSLDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSHDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSHDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::VMOVSLDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVSLDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ break;
+
+ case X86::MOVSHDUPrr:
+ case X86::VMOVSHDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSHDUPrm:
+ case X86::VMOVSHDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ break;
+
+ case X86::VMOVDDUPYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VMOVDDUPYrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
+ break;
+
+ case X86::MOVDDUPrr:
+ case X86::VMOVDDUPrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVDDUPrm:
+ case X86::VMOVDDUPrm:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
+ break;
+
+ case X86::PSLLDQri:
+ case X86::VPSLLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSLLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSRLDQri:
+ case X86::VPSRLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSRLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PALIGNR128rr:
+ case X86::VPALIGNR128rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PALIGNR128rm:
+ case X86::VPALIGNR128rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePALIGNRMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPALIGNR256rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPALIGNR256rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePALIGNRMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ case X86::VPSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ case X86::VPSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v4i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFDYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v8i32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+
+ case X86::PSHUFHWri:
+ case X86::VPSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ case X86::VPSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFHWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFHWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFHWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFHWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ case X86::VPSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ case X86::VPSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFLWMask(MVT::v8i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPSHUFLWYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPSHUFLWYmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFLWMask(MVT::v16i16,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ case X86::VPUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ case X86::VPUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ case X86::VPUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ case X86::VPUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ case X86::VPUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ case X86::VPUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ case X86::VPUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ case X86::VPUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ case X86::VPUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ case X86::VPUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ case X86::VPUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ case X86::VPUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ case X86::VPUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ case X86::VPUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ case X86::VPUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ case X86::VPUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQZrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ case X86::VSHUFPDrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPDrmi:
+ case X86::VSHUFPDrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeSHUFPMask(MVT::v2f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeSHUFPMask(MVT::v4f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ case X86::VSHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ case X86::VSHUFPSrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeSHUFPMask(MVT::v4f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSYrmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeSHUFPMask(MVT::v8f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPDZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDZrm:
+ DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKLPSZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSZrm:
+ DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ case X86::VUNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ case X86::VUNPCKHPDrm:
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDYrm:
+ DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPDZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPDZrm:
+ DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ case X86::VUNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ case X86::VUNPCKHPSrm:
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSYrm:
+ DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VUNPCKHPSZrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKHPSZrm:
+ DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v4f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPSYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSYmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v8f32,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v2f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMILPDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDYmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSHUFMask(MVT::v4f64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERM2F128rr:
+ case X86::VPERM2I128rr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPERM2F128rm:
+ case X86::VPERM2I128rm:
+ // For instruction comments purpose, assume the 256-bit vector is v4i64.
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeVPERM2X128Mask(MVT::v4i64,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VPERMQYri:
+ case X86::VPERMPDYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMQYmi:
+ case X86::VPERMPDYmi:
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::MOVSDrr:
+ case X86::VMOVSDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSDrm:
+ case X86::VMOVSDrm:
+ DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::MOVSSrr:
+ case X86::VMOVSSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVSSrm:
+ case X86::VMOVSSrm:
+ DecodeScalarMoveMask(MVT::v4f32, nullptr == Src2Name, ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::MOVPQI2QIrr:
+ case X86::MOVZPQILo2PQIrr:
+ case X86::VMOVPQI2QIrr:
+ case X86::VMOVZPQILo2PQIrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::MOVQI2PQIrm:
+ case X86::MOVZQI2PQIrm:
+ case X86::MOVZPQILo2PQIrm:
+ case X86::VMOVQI2PQIrm:
+ case X86::VMOVZQI2PQIrm:
+ case X86::VMOVZPQILo2PQIrm:
+ DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::MOVDI2PDIrm:
+ case X86::VMOVDI2PDIrm:
+ DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PMOVZXBWrr:
+ case X86::PMOVZXBDrr:
+ case X86::PMOVZXBQrr:
+ case X86::PMOVZXWDrr:
+ case X86::PMOVZXWQrr:
+ case X86::PMOVZXDQrr:
+ case X86::VPMOVZXBWrr:
+ case X86::VPMOVZXBDrr:
+ case X86::VPMOVZXBQrr:
+ case X86::VPMOVZXWDrr:
+ case X86::VPMOVZXWQrr:
+ case X86::VPMOVZXDQrr:
+ case X86::VPMOVZXBWYrr:
+ case X86::VPMOVZXBDYrr:
+ case X86::VPMOVZXBQYrr:
+ case X86::VPMOVZXWDYrr:
+ case X86::VPMOVZXWQYrr:
+ case X86::VPMOVZXDQYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PMOVZXBWrm:
+ case X86::PMOVZXBDrm:
+ case X86::PMOVZXBQrm:
+ case X86::PMOVZXWDrm:
+ case X86::PMOVZXWQrm:
+ case X86::PMOVZXDQrm:
+ case X86::VPMOVZXBWrm:
+ case X86::VPMOVZXBDrm:
+ case X86::VPMOVZXBQrm:
+ case X86::VPMOVZXWDrm:
+ case X86::VPMOVZXWQrm:
+ case X86::VPMOVZXDQrm:
+ case X86::VPMOVZXBWYrm:
+ case X86::VPMOVZXBDYrm:
+ case X86::VPMOVZXBQYrm:
+ case X86::VPMOVZXWDYrm:
+ case X86::VPMOVZXWQYrm:
+ case X86::VPMOVZXDQYrm: {
+ MVT SrcVT, DstVT;
+ getZeroExtensionTypes(MI, SrcVT, DstVT);
+ DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ } break;
+ }
+
+ // The only comments we decode are shuffles, so give up if we were unable to
+ // decode a shuffle mask.
+ if (ShuffleMask.empty())
+ return false;
+
+ if (!DestName) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= (int)e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e && (int)ShuffleMask[i] != SM_SentinelZero &&
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ if (ShuffleMask[i] == SM_SentinelUndef)
+ OS << "u";
+ else
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+
+ // We successfully added a comment to this instruction.
+ return true;
+}
-//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//\r
-//\r
-// The LLVM Compiler Infrastructure\r
-//\r
-// This file is distributed under the University of Illinois Open Source\r
-// License. See LICENSE.TXT for details.\r
-//\r
-//===----------------------------------------------------------------------===//\r
-//\r
-// Define several functions to decode x86 specific shuffle semantics into a\r
-// generic vector mask.\r
-//\r
-//===----------------------------------------------------------------------===//\r
-\r
-#include "X86ShuffleDecode.h"\r
-#include "llvm/IR/Constants.h"\r
-#include "llvm/CodeGen/MachineValueType.h"\r
-\r
-//===----------------------------------------------------------------------===//\r
-// Vector Mask Decoding\r
-//===----------------------------------------------------------------------===//\r
-\r
-namespace llvm {\r
-\r
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- // Defaults the copying the dest value.\r
- ShuffleMask.push_back(0);\r
- ShuffleMask.push_back(1);\r
- ShuffleMask.push_back(2);\r
- ShuffleMask.push_back(3);\r
-\r
- // Decode the immediate.\r
- unsigned ZMask = Imm & 15;\r
- unsigned CountD = (Imm >> 4) & 3;\r
- unsigned CountS = (Imm >> 6) & 3;\r
-\r
- // CountS selects which input element to use.\r
- unsigned InVal = 4+CountS;\r
- // CountD specifies which element of destination to update.\r
- ShuffleMask[CountD] = InVal;\r
- // ZMask zaps values, potentially overriding the CountD elt.\r
- if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;\r
- if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;\r
- if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;\r
- if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;\r
-}\r
-\r
-// <3,1> or <6,7,2,3>\r
-void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {\r
- for (unsigned i = NElts/2; i != NElts; ++i)\r
- ShuffleMask.push_back(NElts+i);\r
-\r
- for (unsigned i = NElts/2; i != NElts; ++i)\r
- ShuffleMask.push_back(i);\r
-}\r
-\r
-// <0,2> or <0,1,4,5>\r
-void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {\r
- for (unsigned i = 0; i != NElts/2; ++i)\r
- ShuffleMask.push_back(i);\r
-\r
- for (unsigned i = 0; i != NElts/2; ++i)\r
- ShuffleMask.push_back(NElts+i);\r
-}\r
-\r
-void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
- for (int i = 0, e = NumElts / 2; i < e; ++i) {\r
- ShuffleMask.push_back(2 * i);\r
- ShuffleMask.push_back(2 * i);\r
- }\r
-}\r
-\r
-void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
- for (int i = 0, e = NumElts / 2; i < e; ++i) {\r
- ShuffleMask.push_back(2 * i + 1);\r
- ShuffleMask.push_back(2 * i + 1);\r
- }\r
-}\r
-\r
-void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned VectorSizeInBits = VT.getSizeInBits();\r
- unsigned ScalarSizeInBits = VT.getScalarSizeInBits();\r
- unsigned NumElts = VT.getVectorNumElements();\r
- unsigned NumLanes = VectorSizeInBits / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
- unsigned NumLaneSubElts = 64 / ScalarSizeInBits;\r
-\r
- for (unsigned l = 0; l < NumElts; l += NumLaneElts)\r
- for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)\r
- for (unsigned s = 0; s != NumLaneSubElts; s++)\r
- ShuffleMask.push_back(l + s);\r
-}\r
-\r
-void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned VectorSizeInBits = VT.getSizeInBits();\r
- unsigned NumElts = VectorSizeInBits / 8;\r
- unsigned NumLanes = VectorSizeInBits / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- for (unsigned l = 0; l < NumElts; l += NumLaneElts)\r
- for (unsigned i = 0; i < NumLaneElts; ++i) {\r
- int M = SM_SentinelZero;\r
- if (i >= Imm) M = i - Imm + l;\r
- ShuffleMask.push_back(M);\r
- }\r
-}\r
-\r
-void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned VectorSizeInBits = VT.getSizeInBits();\r
- unsigned NumElts = VectorSizeInBits / 8;\r
- unsigned NumLanes = VectorSizeInBits / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- for (unsigned l = 0; l < NumElts; l += NumLaneElts)\r
- for (unsigned i = 0; i < NumLaneElts; ++i) {\r
- unsigned Base = i + Imm;\r
- int M = Base + l;\r
- if (Base >= NumLaneElts) M = SM_SentinelZero;\r
- ShuffleMask.push_back(M);\r
- }\r
-}\r
-\r
-void DecodePALIGNRMask(MVT VT, unsigned Imm,\r
- SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
- unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);\r
-\r
- unsigned NumLanes = VT.getSizeInBits() / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {\r
- for (unsigned i = 0; i != NumLaneElts; ++i) {\r
- unsigned Base = i + Offset;\r
- // if i+offset is out of this lane then we actually need the other source\r
- if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;\r
- ShuffleMask.push_back(Base + l);\r
- }\r
- }\r
-}\r
-\r
-/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.\r
-/// VT indicates the type of the vector allowing it to handle different\r
-/// datatypes and vector widths.\r
-void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- unsigned NumLanes = VT.getSizeInBits() / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- unsigned NewImm = Imm;\r
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {\r
- for (unsigned i = 0; i != NumLaneElts; ++i) {\r
- ShuffleMask.push_back(NewImm % NumLaneElts + l);\r
- NewImm /= NumLaneElts;\r
- }\r
- if (NumLaneElts == 4) NewImm = Imm; // reload imm\r
- }\r
-}\r
-\r
-void DecodePSHUFHWMask(MVT VT, unsigned Imm,\r
- SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- for (unsigned l = 0; l != NumElts; l += 8) {\r
- unsigned NewImm = Imm;\r
- for (unsigned i = 0, e = 4; i != e; ++i) {\r
- ShuffleMask.push_back(l + i);\r
- }\r
- for (unsigned i = 4, e = 8; i != e; ++i) {\r
- ShuffleMask.push_back(l + 4 + (NewImm & 3));\r
- NewImm >>= 2;\r
- }\r
- }\r
-}\r
-\r
-void DecodePSHUFLWMask(MVT VT, unsigned Imm,\r
- SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- for (unsigned l = 0; l != NumElts; l += 8) {\r
- unsigned NewImm = Imm;\r
- for (unsigned i = 0, e = 4; i != e; ++i) {\r
- ShuffleMask.push_back(l + (NewImm & 3));\r
- NewImm >>= 2;\r
- }\r
- for (unsigned i = 4, e = 8; i != e; ++i) {\r
- ShuffleMask.push_back(l + i);\r
- }\r
- }\r
-}\r
-\r
-/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates\r
-/// the type of the vector allowing it to handle different datatypes and vector\r
-/// widths.\r
-void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- unsigned NumLanes = VT.getSizeInBits() / 128;\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- unsigned NewImm = Imm;\r
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {\r
- // each half of a lane comes from different source\r
- for (unsigned s = 0; s != NumElts*2; s += NumElts) {\r
- for (unsigned i = 0; i != NumLaneElts/2; ++i) {\r
- ShuffleMask.push_back(NewImm % NumLaneElts + s + l);\r
- NewImm /= NumLaneElts;\r
- }\r
- }\r
- if (NumLaneElts == 4) NewImm = Imm; // reload imm\r
- }\r
-}\r
-\r
-/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd\r
-/// and punpckh*. VT indicates the type of the vector allowing it to handle\r
-/// different datatypes and vector widths.\r
-void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate\r
- // independently on 128-bit lanes.\r
- unsigned NumLanes = VT.getSizeInBits() / 128;\r
- if (NumLanes == 0 ) NumLanes = 1; // Handle MMX\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {\r
- for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {\r
- ShuffleMask.push_back(i); // Reads from dest/src1\r
- ShuffleMask.push_back(i+NumElts); // Reads from src/src2\r
- }\r
- }\r
-}\r
-\r
-/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd\r
-/// and punpckl*. VT indicates the type of the vector allowing it to handle\r
-/// different datatypes and vector widths.\r
-void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
-\r
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate\r
- // independently on 128-bit lanes.\r
- unsigned NumLanes = VT.getSizeInBits() / 128;\r
- if (NumLanes == 0 ) NumLanes = 1; // Handle MMX\r
- unsigned NumLaneElts = NumElts / NumLanes;\r
-\r
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {\r
- for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {\r
- ShuffleMask.push_back(i); // Reads from dest/src1\r
- ShuffleMask.push_back(i+NumElts); // Reads from src/src2\r
- }\r
- }\r
-}\r
-\r
-void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,\r
- SmallVectorImpl<int> &ShuffleMask) {\r
- if (Imm & 0x88)\r
- return; // Not a shuffle\r
-\r
- unsigned HalfSize = VT.getVectorNumElements()/2;\r
-\r
- for (unsigned l = 0; l != 2; ++l) {\r
- unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;\r
- for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)\r
- ShuffleMask.push_back(i);\r
- }\r
-}\r
-\r
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {\r
- Type *MaskTy = C->getType();\r
- // It is not an error for the PSHUFB mask to not be a vector of i8 because the\r
- // constant pool uniques constants by their bit representation.\r
- // e.g. the following take up the same space in the constant pool:\r
- // i128 -170141183420855150465331762880109871104\r
- //\r
- // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>\r
- //\r
- // <4 x i32> <i32 -2147483648, i32 -2147483648,\r
- // i32 -2147483648, i32 -2147483648>\r
-\r
- unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();\r
-\r
- if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.\r
- return;\r
-\r
- // This is a straightforward byte vector.\r
- if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {\r
- int NumElements = MaskTy->getVectorNumElements();\r
- ShuffleMask.reserve(NumElements);\r
-\r
- for (int i = 0; i < NumElements; ++i) {\r
- // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte\r
- // lane of the vector we're inside.\r
- int Base = i < 16 ? 0 : 16;\r
- Constant *COp = C->getAggregateElement(i);\r
- if (!COp) {\r
- ShuffleMask.clear();\r
- return;\r
- } else if (isa<UndefValue>(COp)) {\r
- ShuffleMask.push_back(SM_SentinelUndef);\r
- continue;\r
- }\r
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();\r
- // If the high bit (7) of the byte is set, the element is zeroed.\r
- if (Element & (1 << 7))\r
- ShuffleMask.push_back(SM_SentinelZero);\r
- else {\r
- // Only the least significant 4 bits of the byte are used.\r
- int Index = Base + (Element & 0xf);\r
- ShuffleMask.push_back(Index);\r
- }\r
- }\r
- }\r
- // TODO: Handle funny-looking vectors too.\r
-}\r
-\r
-void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,\r
- SmallVectorImpl<int> &ShuffleMask) {\r
- for (int i = 0, e = RawMask.size(); i < e; ++i) {\r
- uint64_t M = RawMask[i];\r
- if (M == (uint64_t)SM_SentinelUndef) {\r
- ShuffleMask.push_back(M);\r
- continue;\r
- }\r
- // For AVX vectors with 32 bytes the base of the shuffle is the half of\r
- // the vector we're inside.\r
- int Base = i < 16 ? 0 : 16;\r
- // If the high bit (7) of the byte is set, the element is zeroed.\r
- if (M & (1 << 7))\r
- ShuffleMask.push_back(SM_SentinelZero);\r
- else {\r
- // Only the least significant 4 bits of the byte are used.\r
- int Index = Base + (M & 0xf);\r
- ShuffleMask.push_back(Index);\r
- }\r
- }\r
-}\r
-\r
-void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- int ElementBits = VT.getScalarSizeInBits();\r
- int NumElements = VT.getVectorNumElements();\r
- for (int i = 0; i < NumElements; ++i) {\r
- // If there are more than 8 elements in the vector, then any immediate blend\r
- // mask applies to each 128-bit lane. There can never be more than\r
- // 8 elements in a 128-bit lane with an immediate blend.\r
- int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;\r
- assert(Bit < 8 &&\r
- "Immediate blends only operate over 8 elements at a time!");\r
- ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);\r
- }\r
-}\r
-\r
-/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.\r
-/// No VT provided since it only works on 256-bit, 4 element vectors.\r
-void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {\r
- for (unsigned i = 0; i != 4; ++i) {\r
- ShuffleMask.push_back((Imm >> (2*i)) & 3);\r
- }\r
-}\r
-\r
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {\r
- Type *MaskTy = C->getType();\r
- assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");\r
- assert(MaskTy->getVectorElementType()->isIntegerTy() &&\r
- "Expected integer constant mask elements!");\r
- int ElementBits = MaskTy->getScalarSizeInBits();\r
- int NumElements = MaskTy->getVectorNumElements();\r
- assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&\r
- "Unexpected number of vector elements.");\r
- ShuffleMask.reserve(NumElements);\r
- if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {\r
- assert((unsigned)NumElements == CDS->getNumElements() &&\r
- "Constant mask has a different number of elements!");\r
-\r
- for (int i = 0; i < NumElements; ++i) {\r
- int Base = (i * ElementBits / 128) * (128 / ElementBits);\r
- uint64_t Element = CDS->getElementAsInteger(i);\r
- // Only the least significant 2 bits of the integer are used.\r
- int Index = Base + (Element & 0x3);\r
- ShuffleMask.push_back(Index);\r
- }\r
- } else if (auto *CV = dyn_cast<ConstantVector>(C)) {\r
- assert((unsigned)NumElements == C->getNumOperands() &&\r
- "Constant mask has a different number of elements!");\r
-\r
- for (int i = 0; i < NumElements; ++i) {\r
- int Base = (i * ElementBits / 128) * (128 / ElementBits);\r
- Constant *COp = CV->getOperand(i);\r
- if (isa<UndefValue>(COp)) {\r
- ShuffleMask.push_back(SM_SentinelUndef);\r
- continue;\r
- }\r
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();\r
- // Only the least significant 2 bits of the integer are used.\r
- int Index = Base + (Element & 0x3);\r
- ShuffleMask.push_back(Index);\r
- }\r
- }\r
-}\r
-\r
-void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {\r
- unsigned NumDstElts = DstVT.getVectorNumElements();\r
- unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();\r
- unsigned DstScalarBits = DstVT.getScalarSizeInBits();\r
- unsigned Scale = DstScalarBits / SrcScalarBits;\r
- assert(SrcScalarBits < DstScalarBits &&\r
- "Expected zero extension mask to increase scalar size");\r
- assert(SrcVT.getVectorNumElements() >= NumDstElts &&\r
- "Too many zero extension lanes");\r
-\r
- for (unsigned i = 0; i != NumDstElts; i++) {\r
- Mask.push_back(i);\r
- for (unsigned j = 1; j != Scale; j++)\r
- Mask.push_back(SM_SentinelZero);\r
- }\r
-}\r
-\r
-void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {\r
- unsigned NumElts = VT.getVectorNumElements();\r
- ShuffleMask.push_back(0);\r
- for (unsigned i = 1; i < NumElts; i++)\r
- ShuffleMask.push_back(SM_SentinelZero);\r
-}\r
-\r
-void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {\r
- // First element comes from the first element of second source.\r
- // Remaining elements: Load zero extends / Move copies from first source.\r
- unsigned NumElts = VT.getVectorNumElements();\r
- Mask.push_back(NumElts);\r
- for (unsigned i = 1; i < NumElts; i++)\r
- Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);\r
-}\r
-} // llvm namespace\r
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/MachineValueType.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i);
+ ShuffleMask.push_back(2 * i);
+ }
+}
+
+void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ for (int i = 0, e = NumElts / 2; i < e; ++i) {
+ ShuffleMask.push_back(2 * i + 1);
+ ShuffleMask.push_back(2 * i + 1);
+ }
+}
+
+void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ unsigned NumLaneSubElts = 64 / ScalarSizeInBits;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)
+ for (unsigned s = 0; s != NumLaneSubElts; s++)
+ ShuffleMask.push_back(l + s);
+}
+
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ int M = SM_SentinelZero;
+ if (i >= Imm) M = i - Imm + l;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ int M = Base + l;
+ if (Base >= NumLaneElts) M = SM_SentinelZero;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePALIGNRMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Base = i + Offset;
+ // if i+offset is out of this lane then we actually need the other source
+ if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+ ShuffleMask.push_back(Base + l);
+ }
+ }
+}
+
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ unsigned NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + l);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
+ }
+}
+
+void DecodePSHUFHWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + 4 + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ }
+}
+
+void DecodePSHUFLWMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ unsigned NewImm = Imm;
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ ShuffleMask.push_back(l + (NewImm & 3));
+ NewImm >>= 2;
+ }
+ for (unsigned i = 4, e = 8; i != e; ++i) {
+ ShuffleMask.push_back(l + i);
+ }
+ }
+}
+
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ unsigned NewImm = Imm;
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ // each half of a lane comes from different source
+ for (unsigned s = 0; s != NumElts*2; s += NumElts) {
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
+ NewImm /= NumLaneElts;
+ }
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
+ }
+}
+
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ }
+ }
+}
+
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+ // independently on 128-bit lanes.
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ }
+ }
+}
+
+void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ if (Imm & 0x88)
+ return; // Not a shuffle
+
+ unsigned HalfSize = VT.getVectorNumElements()/2;
+
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
+ for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
+ ShuffleMask.push_back(i);
+ }
+}
+
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+ if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ return;
+
+ // This is a straightforward byte vector.
+ if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
+ int NumElements = MaskTy->getVectorNumElements();
+ ShuffleMask.reserve(NumElements);
+
+ for (int i = 0; i < NumElements; ++i) {
+ // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
+ // lane of the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (Element & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (Element & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+ }
+ // TODO: Handle funny-looking vectors too.
+}
+
+void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ if (M == (uint64_t)SM_SentinelUndef) {
+ ShuffleMask.push_back(M);
+ continue;
+ }
+ // For AVX vectors with 32 bytes the base of the shuffle is the half of
+ // the vector we're inside.
+ int Base = i < 16 ? 0 : 16;
+ // If the high bit (7) of the byte is set, the element is zeroed.
+ if (M & (1 << 7))
+ ShuffleMask.push_back(SM_SentinelZero);
+ else {
+ // Only the least significant 4 bits of the byte are used.
+ int Index = Base + (M & 0xf);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ int ElementBits = VT.getScalarSizeInBits();
+ int NumElements = VT.getVectorNumElements();
+ for (int i = 0; i < NumElements; ++i) {
+ // If there are more than 8 elements in the vector, then any immediate blend
+ // mask applies to each 128-bit lane. There can never be more than
+ // 8 elements in a 128-bit lane with an immediate blend.
+ int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
+ assert(Bit < 8 &&
+ "Immediate blends only operate over 8 elements at a time!");
+ ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
+ }
+}
+
+/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
+/// No VT provided since it only works on 256-bit, 4 element vectors.
+void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm >> (2*i)) & 3);
+ }
+}
+
+void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
+ assert(MaskTy->getVectorElementType()->isIntegerTy() &&
+ "Expected integer constant mask elements!");
+ int ElementBits = MaskTy->getScalarSizeInBits();
+ int NumElements = MaskTy->getVectorNumElements();
+ assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+ "Unexpected number of vector elements.");
+ ShuffleMask.reserve(NumElements);
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ assert((unsigned)NumElements == CDS->getNumElements() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ uint64_t Element = CDS->getElementAsInteger(i);
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ assert((unsigned)NumElements == C->getNumOperands() &&
+ "Constant mask has a different number of elements!");
+
+ for (int i = 0; i < NumElements; ++i) {
+ int Base = (i * ElementBits / 128) * (128 / ElementBits);
+ Constant *COp = CV->getOperand(i);
+ if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
+ }
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ // Only the least significant 2 bits of the integer are used.
+ int Index = Base + (Element & 0x3);
+ ShuffleMask.push_back(Index);
+ }
+ }
+}
+
+void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
+ unsigned NumDstElts = DstVT.getVectorNumElements();
+ unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
+ unsigned DstScalarBits = DstVT.getScalarSizeInBits();
+ unsigned Scale = DstScalarBits / SrcScalarBits;
+ assert(SrcScalarBits < DstScalarBits &&
+ "Expected zero extension mask to increase scalar size");
+ assert(SrcVT.getVectorNumElements() >= NumDstElts &&
+ "Too many zero extension lanes");
+
+ for (unsigned i = 0; i != NumDstElts; i++) {
+ Mask.push_back(i);
+ for (unsigned j = 1; j != Scale; j++)
+ Mask.push_back(SM_SentinelZero);
+ }
+}
+
+void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ShuffleMask.push_back(0);
+ for (unsigned i = 1; i < NumElts; i++)
+ ShuffleMask.push_back(SM_SentinelZero);
+}
+
+void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
+ // First element comes from the first element of second source.
+ // Remaining elements: Load zero extends / Move copies from first source.
+ unsigned NumElts = VT.getVectorNumElements();
+ Mask.push_back(NumElts);
+ for (unsigned i = 1; i < NumElts; i++)
+ Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
+}
+} // llvm namespace